Beispiel #1
0
class OWPivot(OWWidget):
    name = "Pivot Table"
    description = "Reshape data table based on column values."
    category = "Transform"
    icon = "icons/Pivot.svg"
    priority = 1000
    keywords = ["pivot", "group", "aggregate"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        pivot_table = Output("Pivot Table", Table, default=True)
        filtered_data = Output("Filtered Data", Table)
        grouped_data = Output("Grouped Data", Table)

    class Warning(OWWidget.Warning):
        # TODO - inconsistent for different variable types
        no_col_feature = Msg("Column feature should be selected.")
        cannot_aggregate = Msg("Some aggregations ({}) cannot be computed.")
        renamed_vars = Msg("Some variables have been renamed in some tables"
                           "to avoid duplicates.\n{}")
        too_many_values = Msg("Selected variable has too many values.")
        no_variables = Msg("At least 1 primitive variable is required.")

    settingsHandler = DomainContextHandler()
    row_feature = ContextSetting(None)
    col_feature = ContextSetting(None)
    val_feature = ContextSetting(None)
    sel_agg_functions = Setting(set([Pivot.Count]))
    selection = Setting(set(), schema_only=True)
    auto_commit = Setting(True)

    AGGREGATIONS = (
        Pivot.Count,
        Pivot.Count_defined,
        None,  # separator
        Pivot.Sum,
        Pivot.Mean,
        Pivot.Var,
        Pivot.Median,
        2,  # column break
        Pivot.Mode,
        Pivot.Min,
        Pivot.Max,
        None,
        Pivot.Majority)

    MAX_VALUES = 100

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.pivot = None  # type: Pivot
        self.__pending_selection = self.selection  # type: Set
        self._add_control_area_controls()
        self._add_main_area_controls()

    def _add_control_area_controls(self):
        gui.comboBox(gui.vBox(self.controlArea, box="Rows"),
                     self,
                     "row_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(valid_types=DomainModel.PRIMITIVE),
                     callback=self.__feature_changed,
                     orientation=Qt.Horizontal)
        gui.comboBox(gui.vBox(self.controlArea, box="Columns"),
                     self,
                     "col_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(placeholder="(Same as rows)",
                                       valid_types=DiscreteVariable),
                     callback=self.__feature_changed,
                     orientation=Qt.Horizontal)
        gui.comboBox(gui.vBox(self.controlArea, box="Values"),
                     self,
                     "val_feature",
                     contentsLength=14,
                     searchable=True,
                     model=DomainModel(placeholder="(None)"),
                     callback=self.__val_feature_changed,
                     orientation=Qt.Horizontal)
        self.__add_aggregation_controls()
        gui.rubber(self.controlArea)
        gui.auto_apply(self.buttonsArea, self, "auto_commit")

    def __add_aggregation_controls(self):
        def new_inbox():
            nonlocal row, col, inbox
            inbox = QWidget()
            layout = QGridLayout()
            inbox.setLayout(layout)
            layout.setContentsMargins(0, 0, 0, 0)
            box.layout().addWidget(inbox)
            row = col = 0

        box = gui.vBox(self.controlArea, "Aggregations")
        row = col = 0
        inbox = None
        new_inbox()
        self.aggregation_checkboxes = []  # for test purposes
        for agg in self.AGGREGATIONS:
            if agg is None:
                line = QFrame()
                line.setFrameShape(QFrame.HLine)
                line.setLineWidth(1)
                line.setFrameShadow(QFrame.Sunken)
                box.layout().addWidget(line)
                new_inbox()
                continue
            elif agg == 2:
                col += 1
                row = 0
                continue
            check_box = QCheckBox(str(agg), inbox)
            check_box.setChecked(agg in self.sel_agg_functions)
            check_box.clicked.connect(
                lambda *args, a=agg: self.__aggregation_cb_clicked(a, args[0]))
            inbox.layout().addWidget(check_box, row, col)
            self.aggregation_checkboxes.append(check_box)
            row += 1

    def _add_main_area_controls(self):
        self.table_view = PivotTableView()
        self.table_view.selection_changed.connect(self.__invalidate_filtered)
        self.mainArea.layout().addWidget(self.table_view)

    @property
    def no_col_feature(self):
        return self.col_feature is None and self.row_feature is not None \
            and self.row_feature.is_continuous

    @property
    def skipped_aggs(self):
        def add(fun):
            data, var = self.data, self.val_feature
            primitive_funcs = Pivot.ContVarFunctions + Pivot.DiscVarFunctions
            return data and not var and fun not in Pivot.AutonomousFunctions \
                or var and var.is_discrete and fun in Pivot.ContVarFunctions \
                or var and var.is_continuous and fun in Pivot.DiscVarFunctions \
                or var and not var.is_primitive() and fun in primitive_funcs

        skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)]
        return ", ".join(sorted(skipped))

    @property
    def data_has_primitives(self):
        if not self.data:
            return False
        domain = self.data.domain
        return any(v.is_primitive() for v in domain.variables + domain.metas)

    def __feature_changed(self):
        self.selection = set()
        self.pivot = None
        self.commit.deferred()

    def __val_feature_changed(self):
        self.selection = set()
        if self.no_col_feature or not self.pivot:
            return
        self.pivot.update_pivot_table(self.val_feature)
        self.commit.deferred()

    def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions,
                                 checked: bool):
        self.selection = set()
        if checked:
            self.sel_agg_functions.add(agg_fun)
        else:
            self.sel_agg_functions.remove(agg_fun)
        if self.no_col_feature or not self.pivot or not self.data:
            return
        self.pivot.update_group_table(self.sel_agg_functions, self.val_feature)
        self.commit.deferred()

    def __invalidate_filtered(self):
        self.selection = self.table_view.get_selection()
        self.commit.deferred()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.selection = set()
        self.data = data
        self.pivot = None
        self.check_data()
        self.init_attr_values()
        if self.data_has_primitives:
            self.openContext(self.data)
        self.commit.now()

    def check_data(self):
        self.clear_messages()

    def init_attr_values(self):
        domain = self.data.domain if self.data and len(self.data) else None
        for attr in ("row_feature", "col_feature", "val_feature"):
            getattr(self.controls, attr).model().set_domain(domain)
            setattr(self, attr, None)
        model = self.controls.row_feature.model()
        if model:
            self.row_feature = model[0]
        model = self.controls.val_feature.model()
        if model and len(model) > 2:
            allvars = domain.variables + domain.metas
            self.val_feature = allvars[0] if allvars[0] in model else model[2]

    @gui.deferred
    def commit(self):
        def send_outputs(pivot_table, filtered_data, grouped_data):
            if self.data:
                if grouped_data:
                    grouped_data.name = self.data.name
                if pivot_table:
                    pivot_table.name = self.data.name
                if filtered_data:
                    filtered_data.name = self.data.name
            self.Outputs.grouped_data.send(grouped_data)
            self.Outputs.pivot_table.send(pivot_table)
            self.Outputs.filtered_data.send(filtered_data)

        self.Warning.renamed_vars.clear()
        self.Warning.too_many_values.clear()
        self.Warning.cannot_aggregate.clear()
        self.Warning.no_col_feature.clear()

        self.table_view.clear()

        if self.pivot is None:
            if self.data:
                if not self.data_has_primitives:
                    self.Warning.no_variables()
                    send_outputs(None, None, None)
                    return

            if self.no_col_feature:
                self.Warning.no_col_feature()
                send_outputs(None, None, None)
                return

            if self.data:
                col_var = self.col_feature or self.row_feature
                col = self.data.get_column_view(col_var)[0].astype(float)
                if len(nanunique(col)) >= self.MAX_VALUES:
                    self.table_view.clear()
                    self.Warning.too_many_values()
                    send_outputs(None, None, None)
                    return

            self.pivot = Pivot(self.data, self.sel_agg_functions,
                               self.row_feature, self.col_feature,
                               self.val_feature)

        if self.skipped_aggs:
            self.Warning.cannot_aggregate(self.skipped_aggs)
        self._update_graph()

        send_outputs(self.pivot.pivot_table, self.get_filtered_data(),
                     self.pivot.group_table)

        if self.pivot.renamed:
            self.Warning.renamed_vars(self.pivot.renamed)

    def _update_graph(self):
        if self.pivot.pivot_table:
            col_feature = self.col_feature or self.row_feature
            self.table_view.update_table(col_feature.name,
                                         self.row_feature.name,
                                         *self.pivot.pivot_tables)
            selection = self.__pending_selection or self.selection
            self.table_view.set_selection(selection)
            self.selection = self.table_view.get_selection()
            self.__pending_selection = set()

    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values(cond, conjunction=False)(self.data)

    @staticmethod
    def sizeHint():
        return QSize(640, 525)

    def send_report(self):
        self.report_items((("Row feature", self.row_feature),
                           ("Column feature", self.col_feature),
                           ("Value feature", self.val_feature)))
        if self.data and self.val_feature is not None:
            self.report_table("", self.table_view)
        if not self.data:
            self.report_items((("Group by", self.row_feature), ))
            self.report_table(self.table_view)
Beispiel #2
0
class OWConcordance(OWWidget):
    name = "Concordance"
    description = "Display the context of the word."
    icon = "icons/Concordance.svg"
    priority = 520

    class Inputs:
        corpus = Input("Corpus", Corpus)
        query_word = Input("Query Word", Topic)

    class Outputs:
        selected_documents = Output("Selected Documents", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)
    autocommit = Setting(True)
    context_width = Setting(5)
    word = ContextSetting("", exclude_metas=False)
    selected_rows = Setting([], schema_only=True)

    class Warning(OWWidget.Warning):
        multiple_words_on_input = Msg("Multiple query words on input. "
                                      "Only the first one is considered!")

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.n_matching = ''  # Info on docs matching the word
        self.n_tokens = ''  # Info on tokens
        self.n_types = ''  # Info on types (unique tokens)
        self.is_word_on_input = False

        # Info attributes
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Width parameter
        gui.spin(self.controlArea,
                 self,
                 'context_width',
                 3,
                 10,
                 box=True,
                 label="Number of words:",
                 callback=self.set_width)

        gui.rubber(self.controlArea)

        # Search
        c_box = gui.widgetBox(self.mainArea, orientation="vertical")
        self.input = gui.lineEdit(c_box,
                                  self,
                                  'word',
                                  orientation=Qt.Horizontal,
                                  sizePolicy=QSizePolicy(
                                      QSizePolicy.MinimumExpanding,
                                      QSizePolicy.Fixed),
                                  label='Query:',
                                  callback=self.set_word,
                                  callbackOnType=True)
        self.input.setFocus()

        # Concordances view
        self.conc_view = QTableView()
        self.model = ConcordanceModel()
        self.conc_view.setModel(self.model)
        self.conc_view.setWordWrap(False)
        self.conc_view.setSelectionBehavior(QTableView.SelectRows)
        self.conc_view.setSelectionModel(DocumentSelectionModel(self.model))
        self.conc_view.setItemDelegate(HorizontalGridDelegate())
        self.conc_view.selectionModel().selectionChanged.connect(
            self.selection_changed)
        self.conc_view.horizontalHeader().hide()
        self.conc_view.setShowGrid(False)
        self.mainArea.layout().addWidget(self.conc_view)
        self.set_width()

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
                        'Auto commit is on')

    def sizeHint(self):  # pragma: no cover
        return QSize(600, 400)

    def set_width(self):
        sel = self.conc_view.selectionModel().selection()
        self.model.set_width(self.context_width)
        if sel:
            self.conc_view.selectionModel().select(
                sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    def selection_changed(self):
        selection = self.conc_view.selectionModel().selection()
        self.selected_rows = sorted(
            set(cell.row() for cell in selection.indexes()))
        self.commit()

    def set_selection(self, selection):
        if selection:
            sel = QItemSelection()
            for row in selection:
                index = self.conc_view.model().index(row, 0)
                sel.select(index, index)
            self.conc_view.selectionModel().select(
                sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    @Inputs.corpus
    def set_corpus(self, data=None):
        self.closeContext()
        self.corpus = data
        if data is None:  # data removed, clear selection
            self.selected_rows = []

        if not self.is_word_on_input:
            self.word = ""
            self.openContext(self.corpus)

        self.model.set_corpus(self.corpus)
        self.set_word()

    @Inputs.query_word
    def set_word_from_input(self, topic):
        self.Warning.multiple_words_on_input.clear()
        if self.is_word_on_input:  # word changed, clear selection
            self.selected_rows = []
        self.is_word_on_input = topic is not None and len(topic) > 0
        self.input.setEnabled(not self.is_word_on_input)
        if self.is_word_on_input:
            if len(topic) > 1:
                self.Warning.multiple_words_on_input()
            self.word = topic.metas[0, 0]
            self.set_word()

    def set_word(self):
        self.model.set_word(self.word)
        self.update_widget()
        self.commit()

    def handleNewSignals(self):
        self.set_selection(self.selected_rows)

    def resize_columns(self):
        col_width = (self.conc_view.width() -
                     self.conc_view.columnWidth(1)) / 2 - 12
        self.conc_view.setColumnWidth(0, col_width)
        self.conc_view.setColumnWidth(2, col_width)

    def resizeEvent(self, event):  # pragma: no cover
        super().resizeEvent(event)
        self.resize_columns()

    def update_widget(self):
        self.conc_view.resizeColumnToContents(1)
        self.resize_columns()
        self.conc_view.resizeRowsToContents()

        if self.corpus is not None:
            self.n_matching = '{}/{}'.format(
                self.model.matching_docs() if self.word else 0,
                len(self.corpus))
            self.n_tokens = self.model.n_tokens
            self.n_types = self.model.n_types
        else:
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        selected_docs = sorted(
            set(self.model.word_index[row][0] for row in self.selected_rows))
        if selected_docs:
            selected = self.corpus[selected_docs]
            self.Outputs.selected_documents.send(selected)
        else:
            self.Outputs.selected_documents.send(None)

    def send_report(self):
        view = self.conc_view
        model = self.conc_view.model()
        self.report_items("Concordances", (
            ("Query", model.word),
            ("Tokens", model.n_tokens),
            ("Types", model.n_types),
            ("Matching", self.n_matching),
        ))
        self.report_table(view)
Beispiel #3
0
class OWLinePlot(OWWidget):
    name = "Line Plot"
    description = "Visualization of data profiles (e.g., time series)."
    icon = "icons/LinePlot.svg"
    priority = 180

    enable_selection = Signal(bool)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    group_var = ContextSetting(None)
    show_profiles = Setting(False)
    show_range = Setting(True)
    show_mean = Setting(True)
    show_error = Setting(False)
    auto_commit = Setting(True)
    selection = Setting(None, schema_only=True)

    graph_name = "graph.plotItem"

    class Error(OWWidget.Error):
        not_enough_attrs = Msg("Need at least one continuous feature.")
        no_valid_data = Msg("No plot due to no valid data.")

    class Warning(OWWidget.Warning):
        no_display_option = Msg("No display option is selected.")

    class Information(OWWidget.Information):
        hidden_instances = Msg("Instances with unknown values are not shown.")
        too_many_features = Msg("Data has too many features. Only first {}"
                                " are shown.".format(MAX_FEATURES))

    def __init__(self, parent=None):
        super().__init__(parent)
        self.__groups = []
        self.data = None
        self.valid_data = None
        self.subset_data = None
        self.subset_indices = None
        self.__pending_selection = self.selection
        self.graph_variables = []
        self.setup_gui()

        self.graph.view_box.selection_changed.connect(self.selection_changed)
        self.enable_selection.connect(self.graph.view_box.enable_selection)

    def setup_gui(self):
        self._add_graph()
        self._add_controls()

    def _add_graph(self):
        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = LinePlotGraph(self)
        box.layout().addWidget(self.graph)

    def _add_controls(self):
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(infobox, "No data on input.")
        displaybox = gui.widgetBox(self.controlArea, "Display")
        gui.checkBox(displaybox,
                     self,
                     "show_profiles",
                     "Lines",
                     callback=self.__show_profiles_changed,
                     tooltip="Plot lines")
        gui.checkBox(displaybox,
                     self,
                     "show_range",
                     "Range",
                     callback=self.__show_range_changed,
                     tooltip="Plot range between 10th and 90th percentile")
        gui.checkBox(displaybox,
                     self,
                     "show_mean",
                     "Mean",
                     callback=self.__show_mean_changed,
                     tooltip="Plot mean curve")
        gui.checkBox(displaybox,
                     self,
                     "show_error",
                     "Error bars",
                     callback=self.__show_error_changed,
                     tooltip="Show standard deviation")

        self.group_vars = DomainModel(placeholder="None",
                                      separators=False,
                                      valid_types=DiscreteVariable)
        self.group_view = gui.listView(self.controlArea,
                                       self,
                                       "group_var",
                                       box="Group by",
                                       model=self.group_vars,
                                       callback=self.__group_var_changed)
        self.group_view.setEnabled(False)
        self.group_view.setMinimumSize(QSize(30, 100))
        self.group_view.setSizePolicy(QSizePolicy.Expanding,
                                      QSizePolicy.Ignored)

        plot_gui = OWPlotGUI(self)
        plot_gui.box_zoom_select(self.controlArea)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

    def __show_profiles_changed(self):
        self.check_display_options()
        self._update_visibility("profiles")

    def __show_range_changed(self):
        self.check_display_options()
        self._update_visibility("range")

    def __show_mean_changed(self):
        self.check_display_options()
        self._update_visibility("mean")

    def __show_error_changed(self):
        self._update_visibility("error")

    def __group_var_changed(self):
        if self.data is None or not self.graph_variables:
            return
        self.plot_groups()
        self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        self._update_sub_profiles()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self.clear()
        self.check_data()
        self.check_display_options()

        if self.data is not None:
            self.group_vars.set_domain(self.data.domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.group_var = self.data.domain.class_var \
                if self.data.domain.has_discrete_class else None

        self.openContext(data)
        self.setup_plot()
        self.commit()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        self.clear_messages()
        if self.data is not None:
            self.infoLabel.setText(
                "%i instances on input\n%i features" %
                (len(self.data), len(self.data.domain.attributes)))
            self.graph_variables = [
                var for var in self.data.domain.attributes if var.is_continuous
            ]
            self.valid_data = ~countnans(self.data.X, axis=1).astype(bool)
            if len(self.graph_variables) < 1:
                error(self.Error.not_enough_attrs)
            elif not np.sum(self.valid_data):
                error(self.Error.no_valid_data)
            else:
                if not np.all(self.valid_data):
                    self.Information.hidden_instances()
                if len(self.graph_variables) > MAX_FEATURES:
                    self.Information.too_many_features()
                    self.graph_variables = self.graph_variables[:MAX_FEATURES]

    def check_display_options(self):
        self.Warning.no_display_option.clear()
        if self.data is not None:
            if not (self.show_profiles or self.show_range or self.show_mean):
                self.Warning.no_display_option()
            enable = (self.show_profiles or self.show_range) and \
                len(self.data[self.valid_data]) < SEL_MAX_INSTANCES
            self.enable_selection.emit(enable)

    @Inputs.data_subset
    @check_sql_input
    def set_subset_data(self, subset):
        self.subset_data = subset

    def handleNewSignals(self):
        self.set_subset_ids()
        if self.data is not None:
            self._update_profiles_color()
            self._update_sel_profiles_color()
            self._update_sub_profiles()

    def set_subset_ids(self):
        sub_ids = {e.id for e in self.subset_data} \
            if self.subset_data is not None else {}
        self.subset_indices = None
        if self.data is not None and sub_ids:
            self.subset_indices = [
                x.id for x in self.data[self.valid_data] if x.id in sub_ids
            ]

    def setup_plot(self):
        if self.data is None:
            return

        ticks = [a.name for a in self.graph_variables]
        self.graph.getAxis("bottom").set_ticks(ticks)
        self.plot_groups()
        self.apply_selection()
        self.graph.view_box.enableAutoRange()
        self.graph.view_box.updateAutoRange()

    def plot_groups(self):
        self._remove_groups()
        data = self.data[self.valid_data, self.graph_variables]
        if self.group_var is None:
            self._plot_group(data, np.where(self.valid_data)[0])
        else:
            class_col_data, _ = self.data.get_column_view(self.group_var)
            for index in range(len(self.group_var.values)):
                mask = np.logical_and(class_col_data == index, self.valid_data)
                indices = np.flatnonzero(mask)
                if not len(indices):
                    continue
                group_data = self.data[indices, self.graph_variables]
                self._plot_group(group_data, indices, index)
        self.graph.update_legend(self.group_var)
        self.graph.view_box.add_profiles(data.X)

    def _remove_groups(self):
        for group in self.__groups:
            group.remove_items()
        self.graph.view_box.remove_profiles()
        self.__groups = []

    def _plot_group(self, data, indices, index=None):
        color = self.__get_group_color(index)
        group = ProfileGroup(data, indices, color, self.graph)
        kwargs = self.__get_visibility_flags()
        group.set_visible_error(**kwargs)
        group.set_visible_mean(**kwargs)
        group.set_visible_range(**kwargs)
        group.set_visible_profiles(**kwargs)
        self.__groups.append(group)

    def __get_group_color(self, index):
        if self.group_var is not None:
            return QColor(*self.group_var.colors[index])
        return QColor(LinePlotStyle.DEFAULT_COLOR)

    def __get_visibility_flags(self):
        return {
            "show_profiles": self.show_profiles,
            "show_range": self.show_range,
            "show_mean": self.show_mean,
            "show_error": self.show_error
        }

    def _update_profiles_color(self):
        # color alpha depends on subset and selection; with selection or
        # subset profiles color has more opacity
        if not self.show_profiles:
            return
        for group in self.__groups:
            has_sel = bool(self.subset_indices) or bool(self.selection)
            group.update_profiles_color(has_sel)

    def _update_sel_profiles_and_range(self):
        # mark selected instances and selected range
        if not (self.show_profiles or self.show_range):
            return
        for group in self.__groups:
            inds = [i for i in group.indices if self.__in(i, self.selection)]
            table = self.data[inds, self.graph_variables].X if inds else None
            if self.show_profiles:
                group.update_sel_profiles(table)
            if self.show_range:
                group.update_sel_range(table)

    def _update_sel_profiles_color(self):
        # color depends on subset; when subset is present,
        # selected profiles are black
        if not self.selection or not self.show_profiles:
            return
        for group in self.__groups:
            group.update_sel_profiles_color(bool(self.subset_indices))

    def _update_sub_profiles(self):
        # mark subset instances
        if not (self.show_profiles or self.show_range):
            return
        for group in self.__groups:
            inds = [
                i for i, _id in zip(group.indices, group.ids)
                if self.__in(_id, self.subset_indices)
            ]
            table = self.data[inds, self.graph_variables].X if inds else None
            group.update_sub_profiles(table)

    def _update_visibility(self, obj_name):
        if not len(self.__groups):
            return
        self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        kwargs = self.__get_visibility_flags()
        for group in self.__groups:
            getattr(group, "set_visible_{}".format(obj_name))(**kwargs)
        self.graph.view_box.updateAutoRange()

    def apply_selection(self):
        if self.data is not None and self.__pending_selection is not None:
            sel = [i for i in self.__pending_selection if i < len(self.data)]
            mask = np.zeros(len(self.data), dtype=bool)
            mask[sel] = True
            mask = mask[self.valid_data]
            self.selection_changed(mask)
            self.__pending_selection = None

    def selection_changed(self, mask):
        if self.data is None:
            return
        # need indices for self.data: mask refers to self.data[self.valid_data]
        indices = np.arange(len(self.data))[self.valid_data][mask]
        self.graph.select(indices)
        old = self.selection
        self.selection = None if self.data and isinstance(self.data, SqlTable)\
            else list(self.graph.selection)
        if not old and self.selection or old and not self.selection:
            self._update_profiles_color()
        self._update_sel_profiles_and_range()
        self._update_sel_profiles_color()
        self.commit()

    def commit(self):
        selected = self.data[self.selection] \
            if self.data is not None and bool(self.selection) else None
        annotated = create_annotated_table(self.data, self.selection)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)

    def send_report(self):
        if self.data is None:
            return

        caption = report.render_items_vert((("Group by", self.group_var), ))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    def sizeHint(self):
        return QSize(1132, 708)

    def clear(self):
        self.valid_data = None
        self.selection = None
        self.__groups = []
        self.graph_variables = []
        self.graph.reset()
        self.infoLabel.setText("No data on input.")
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)

    @staticmethod
    def __in(obj, collection):
        return collection is not None and obj in collection
Beispiel #4
0
class OWProjectionWidgetBase(OWWidget, openclass=True):
    """
    Base widget for widgets that use attribute data to set the colors, labels,
    shapes and sizes of points.

    The widgets defines settings `attr_color`, `attr_label`, `attr_shape`
    and `attr_size`, but leaves defining the gui to the derived widgets.
    These are expected to have controls that manipulate these settings,
    and the controls are expected to use attribute models.

    The widgets also defines attributes `data` and `valid_data` and expects
    the derived widgets to use them to store an instances of `data.Table`
    and a bool `np.ndarray` with indicators of valid (that is, shown)
    data points.
    """
    attr_color = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_label = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_shape = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_size = ContextSetting(None, required=ContextSetting.OPTIONAL)

    class Information(OWWidget.Information):
        missing_size = Msg(
            "Points with undefined '{}' are shown in smaller size")
        missing_shape = Msg(
            "Points with undefined '{}' are shown as crossed circles")

    def __init__(self):
        super().__init__()
        self.data = None
        self.valid_data = None

    def init_attr_values(self):
        """
        Set the models for `attr_color`, `attr_shape`, `attr_size` and
        `attr_label`. All values are set to `None`, except `attr_color`
        which is set to the class variable if it exists.
        """
        data = self.data
        domain = data.domain if data and len(data) else None
        for attr in ("attr_color", "attr_shape", "attr_size", "attr_label"):
            getattr(self.controls, attr).model().set_domain(domain)
            setattr(self, attr, None)
        if domain is not None:
            self.attr_color = domain.class_var

    def get_coordinates_data(self):
        """A get coordinated method that returns no coordinates.

        Derived classes must override this method.
        """
        return None, None

    def get_subset_mask(self):
        """
        Return the bool array indicating the points in the subset

        The base method does nothing and would usually be overridden by
        a method that returns indicators from the subset signal.

        Do not confuse the subset with selection.

        Returns:
            (np.ndarray or `None`): a bool array of indicators
        """
        return None

    def get_column(self,
                   attr,
                   filter_valid=True,
                   max_categories=None,
                   return_labels=False):
        """
        Retrieve the data from the given column in the data table

        The method:
        - densifies sparse data,
        - converts arrays with dtype object to floats if the attribute is
          actually primitive,
        - filters out invalid data (if `filter_valid` is `True`),
        - merges infrequent (discrete) values into a single value
          (if `max_categories` is set).

        Tha latter feature is used for shapes and labels, where only a
        specified number of different values is shown, and others are
        merged into category 'Other'. In this case, the method may return
        either the data (e.g. color indices, shape indices) or the list
        of retained values, followed by `['Other']`.

        Args:
            attr (:obj:~Orange.data.Variable): the column to extract
            filter_valid (bool): filter out invalid data (default: `True`)
            max_categories (int): merge infrequent values (default: `None`);
                ignored for non-discrete attributes
            return_labels (bool): return a list of labels instead of data
                (default: `False`)

        Returns:
            (np.ndarray): (valid) data from the column, or a list of labels
        """
        if attr is None:
            return None

        needs_merging = attr.is_discrete \
                        and max_categories is not None \
                        and len(attr.values) >= max_categories
        if return_labels and not needs_merging:
            assert attr.is_discrete
            return attr.values

        all_data = self.data.get_column_view(attr)[0]
        if all_data.dtype == object and attr.is_primitive():
            all_data = all_data.astype(float)
        if filter_valid and self.valid_data is not None:
            all_data = all_data[self.valid_data]
        if not needs_merging:
            return all_data

        dist = bincount(all_data, max_val=len(attr.values) - 1)[0]
        infrequent = np.zeros(len(attr.values), dtype=bool)
        infrequent[np.argsort(dist)[:-(max_categories - 1)]] = True
        if return_labels:
            return [
                value
                for value, infreq in zip(attr.values, infrequent) if not infreq
            ] + ["Other"]
        else:
            result = all_data.copy()
            freq_vals = [i for i, f in enumerate(infrequent) if not f]
            for i, infreq in enumerate(infrequent):
                if infreq:
                    result[all_data == i] = max_categories - 1
                else:
                    result[all_data == i] = freq_vals.index(i)
            return result

    # Sizes
    def get_size_data(self):
        """Return the column corresponding to `attr_size`"""
        return self.get_column(self.attr_size)

    def impute_sizes(self, size_data):
        """
        Default imputation for size data

        Let the graph handle it, but add a warning if needed.

        Args:
            size_data (np.ndarray): scaled points sizes
        """
        if self.graph.default_impute_sizes(size_data):
            self.Information.missing_size(self.attr_size)
        else:
            self.Information.missing_size.clear()

    def sizes_changed(self):
        self.graph.update_sizes()

    # Colors
    def get_color_data(self):
        """Return the column corresponding to color data"""
        return self.get_column(self.attr_color, max_categories=MAX_COLORS)

    def get_color_labels(self):
        """
        Return labels for the color legend

        Returns:
            (list of str): labels
        """
        if self.attr_color is None:
            return None
        if not self.attr_color.is_discrete:
            return self.attr_color.str_val
        return self.get_column(self.attr_color,
                               max_categories=MAX_COLORS,
                               return_labels=True)

    def is_continuous_color(self):
        """
        Tells whether the color is continuous

        Returns:
            (bool):
        """
        return self.attr_color is not None and self.attr_color.is_continuous

    def get_palette(self):
        """
        Return a palette suitable for the current `attr_color`

        This method must be overridden if the widget offers coloring that is
        not based on attribute values.
        """
        attr = self.attr_color
        if not attr:
            return None
        palette = attr.palette
        if attr.is_discrete and len(attr.values) >= MAX_COLORS:
            values = self.get_color_labels()
            colors = [
                palette.palette[attr.to_val(value)] for value in values[:-1]
            ] + [[192, 192, 192]]

            palette = colorpalettes.DiscretePalette.from_colors(colors)
        return palette

    def can_draw_density(self):
        """
        Tells whether the current data and settings are suitable for drawing
        densities

        Returns:
            (bool):
        """
        return self.data is not None and self.data.domain is not None and \
            len(self.data) > 1 and self.attr_color is not None

    def colors_changed(self):
        self.graph.update_colors()
        self._update_opacity_warning()
        self.cb_class_density.setEnabled(self.can_draw_density())

    # Labels
    def get_label_data(self, formatter=None):
        """Return the column corresponding to label data"""
        if self.attr_label:
            label_data = self.get_column(self.attr_label)
            if formatter is None:
                formatter = self.attr_label.str_val
            return np.array([formatter(x) for x in label_data])
        return None

    def labels_changed(self):
        self.graph.update_labels()

    # Shapes
    def get_shape_data(self):
        """
        Return labels for the shape legend

        Returns:
            (list of str): labels
        """
        return self.get_column(self.attr_shape, max_categories=MAX_SHAPES)

    def get_shape_labels(self):
        return self.get_column(self.attr_shape,
                               max_categories=MAX_SHAPES,
                               return_labels=True)

    def impute_shapes(self, shape_data, default_symbol):
        """
        Default imputation for shape data

        Let the graph handle it, but add a warning if needed.

        Args:
            shape_data (np.ndarray): scaled points sizes
            default_symbol (str): a string representing the symbol
        """
        if self.graph.default_impute_shapes(shape_data, default_symbol):
            self.Information.missing_shape(self.attr_shape)
        else:
            self.Information.missing_shape.clear()

    def shapes_changed(self):
        self.graph.update_shapes()

    # Tooltip
    def _point_tooltip(self, point_id, skip_attrs=()):
        def show_part(_point_data, singular, plural, max_shown, _vars):
            cols = [
                escape('{} = {}'.format(var.name, _point_data[var]))
                for var in _vars[:max_shown + 2]
                if _vars == domain.class_vars or var not in skip_attrs
            ][:max_shown]
            if not cols:
                return ""
            n_vars = len(_vars)
            if n_vars > max_shown:
                cols[-1] = "... and {} others".format(n_vars - max_shown + 1)
            return \
                "<b>{}</b>:<br/>".format(singular if n_vars < 2 else plural) \
                + "<br/>".join(cols)

        domain = self.data.domain
        parts = (("Class", "Classes", 4, domain.class_vars),
                 ("Meta", "Metas", 4, domain.metas), ("Feature", "Features",
                                                      10, domain.attributes))

        point_data = self.data[point_id]
        return "<br/>".join(
            show_part(point_data, *columns) for columns in parts)

    def get_tooltip(self, point_ids):
        """
        Return the tooltip string for the given points

        The method is called by the plot on mouse hover

        Args:
            point_ids (list): indices into `data`

        Returns:
            (str):
        """
        point_ids = \
            np.flatnonzero(self.valid_data)[np.asarray(point_ids, dtype=int)]
        text = "<hr/>".join(
            self._point_tooltip(point_id)
            for point_id in point_ids[:MAX_POINTS_IN_TOOLTIP])
        if len(point_ids) > MAX_POINTS_IN_TOOLTIP:
            text = "{} instances<hr/>{}<hr/>...".format(len(point_ids), text)
        return text

    def keyPressEvent(self, event):
        """Update the tip about using the modifier keys when selecting"""
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        """Update the tip about using the modifier keys when selecting"""
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())
Beispiel #5
0
class OWCorrelations(OWWidget):
    name = "Correlations"
    description = "Compute all pairwise attribute correlations."
    icon = "icons/Correlations.svg"
    priority = 1106

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)
        features = Output("Features", AttributeList)
        correlations = Output("Correlations", Table)

    want_control_area = False

    correlation_type: int

    settings_version = 3
    settingsHandler = DomainContextHandler()
    selection = ContextSetting([])
    feature = ContextSetting(None)
    correlation_type = Setting(0)

    class Information(OWWidget.Information):
        removed_cons_feat = Msg("Constant features have been removed.")

    class Warning(OWWidget.Warning):
        not_enough_vars = Msg("At least two continuous features are needed.")
        not_enough_inst = Msg("At least two instances are needed.")

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.cont_data = None  # type: Table

        # GUI
        box = gui.vBox(self.mainArea)
        self.correlation_combo = gui.comboBox(
            box,
            self,
            "correlation_type",
            items=CorrelationType.items(),
            orientation=Qt.Horizontal,
            callback=self._correlation_combo_changed)

        self.feature_model = DomainModel(order=DomainModel.ATTRIBUTES,
                                         separators=False,
                                         placeholder="(All combinations)",
                                         valid_types=ContinuousVariable)
        gui.comboBox(box,
                     self,
                     "feature",
                     callback=self._feature_combo_changed,
                     model=self.feature_model)

        self.vizrank, _ = CorrelationRank.add_vizrank(
            None, self, None, self._vizrank_selection_changed)
        self.vizrank.button.setEnabled(False)
        self.vizrank.threadStopped.connect(self._vizrank_stopped)

        gui.separator(box)
        box.layout().addWidget(self.vizrank.filter)
        box.layout().addWidget(self.vizrank.rank_table)

        button_box = gui.hBox(self.mainArea)
        button_box.layout().addWidget(self.vizrank.button)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    @staticmethod
    def sizeHint():
        return QSize(350, 400)

    def _correlation_combo_changed(self):
        self.apply()

    def _feature_combo_changed(self):
        self.apply()

    def _vizrank_selection_changed(self, *args):
        self.selection = list(args)
        self.commit()

    def _vizrank_stopped(self):
        self._vizrank_select()

    def _vizrank_select(self):
        model = self.vizrank.rank_table.model()
        if not model.rowCount():
            return
        selection = QItemSelection()

        # This flag is needed because data in the model could be
        # filtered by a feature and therefore selection could not be found
        selection_in_model = False
        if self.selection:
            sel_names = sorted(var.name for var in self.selection)
            for i in range(model.rowCount()):
                # pylint: disable=protected-access
                names = sorted(x.name for x in model.data(
                    model.index(i, 0), CorrelationRank._AttrRole))
                if names == sel_names:
                    selection.select(model.index(i, 0),
                                     model.index(i,
                                                 model.columnCount() - 1))
                    selection_in_model = True
                    break
        if not selection_in_model:
            selection.select(model.index(0, 0),
                             model.index(0,
                                         model.columnCount() - 1))
        self.vizrank.rank_table.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.clear_messages()
        self.data = data
        self.cont_data = None
        self.selection = []
        if data is not None:
            if len(data) < 2:
                self.Warning.not_enough_inst()
            else:
                domain = data.domain
                cont_vars = [
                    a for a in domain.class_vars + domain.metas +
                    domain.attributes if a.is_continuous
                ]
                cont_data = Table.from_table(Domain(cont_vars), data)
                remover = Remove(Remove.RemoveConstant)
                cont_data = remover(cont_data)
                if remover.attr_results["removed"]:
                    self.Information.removed_cons_feat()
                if len(cont_data.domain.attributes) < 2:
                    self.Warning.not_enough_vars()
                else:
                    self.cont_data = SklImpute()(cont_data)
            self.info.set_input_summary(len(data),
                                        format_summary_details(data))
        else:
            self.info.set_input_summary(self.info.NoInput)
        self.set_feature_model()
        self.openContext(self.cont_data)
        self.apply()
        self.vizrank.button.setEnabled(self.cont_data is not None)

    def set_feature_model(self):
        self.feature_model.set_domain(self.cont_data and self.cont_data.domain)
        data = self.data
        if self.cont_data and data.domain.has_continuous_class:
            self.feature = self.cont_data.domain[data.domain.class_var.name]
        else:
            self.feature = None

    def apply(self):
        self.vizrank.initialize()
        if self.cont_data is not None:
            # this triggers self.commit() by changing vizrank selection
            self.vizrank.toggle()
        else:
            self.commit()

    def commit(self):
        self.Outputs.data.send(self.data)
        summary = len(self.data) if self.data else self.info.NoOutput
        details = format_summary_details(self.data) if self.data else ""
        self.info.set_output_summary(summary, details)

        if self.data is None or self.cont_data is None:
            self.Outputs.features.send(None)
            self.Outputs.correlations.send(None)
            return

        attrs = [ContinuousVariable("Correlation"), ContinuousVariable("FDR")]
        metas = [StringVariable("Feature 1"), StringVariable("Feature 2")]
        domain = Domain(attrs, metas=metas)
        model = self.vizrank.rank_model
        x = np.array([[
            float(model.data(model.index(row, 0), role))
            for role in (Qt.DisplayRole, CorrelationRank.PValRole)
        ] for row in range(model.rowCount())])
        x[:, 1] = FDR(list(x[:, 1]))
        # pylint: disable=protected-access
        m = np.array([[
            a.name
            for a in model.data(model.index(row, 0), CorrelationRank._AttrRole)
        ] for row in range(model.rowCount())],
                     dtype=object)
        corr_table = Table(domain, x, metas=m)
        corr_table.name = "Correlations"

        # data has been imputed; send original attributes
        self.Outputs.features.send(
            AttributeList(
                [self.data.domain[var.name] for var in self.selection]))
        self.Outputs.correlations.send(corr_table)

    def send_report(self):
        self.report_table(CorrelationType.items()[self.correlation_type],
                          self.vizrank.rank_table)

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            sel = context.values["selection"]
            context.values["selection"] = [(var.name, vartype(var))
                                           for var in sel[0]]
        if version < 3:
            sel = context.values["selection"]
            context.values["selection"] = ([(name, vtype + 100)
                                            for name, vtype in sel], -3)
Beispiel #6
0
class OWSieveDiagram(OWWidget):
    name = "筛图"
    description = "可视化观察到的和期望的频率对于值的组合"
    icon = "icons/SieveDiagram.svg"
    priority = 200
    keywords = []

    class Inputs:
        data = Input("数据", Table, default=True)
        features = Input("特征", AttributeList)

    class Outputs:
        selected_data = Output("被选数据", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    graph_name = "画布"

    want_control_area = False

    settings_version = 1
    settingsHandler = DomainContextHandler()
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection = ContextSetting(set())

    xy_changed_manually = Signal(Variable, Variable)

    def __init__(self):
        # pylint: disable=missing-docstring
        super().__init__()

        self.data = self.discrete_data = None
        self.attrs = []
        self.input_features = None
        self.areas = []
        self.selection = set()

        self.attr_box = gui.hBox(self.mainArea)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE)
        combo_args = dict(widget=self.attr_box,
                          master=self,
                          contentsLength=12,
                          callback=self.attr_changed,
                          sendSelectedValue=True,
                          valueType=str,
                          model=self.domain_model)
        fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed)
        gui.comboBox(value="attr_x", **combo_args)
        gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size)
        gui.comboBox(value="attr_y", **combo_args)
        self.vizrank, self.vizrank_button = SieveRank.add_vizrank(
            self.attr_box, self, "分数组合", self.set_attr)
        self.vizrank_button.setSizePolicy(*fixed_size)

        self.canvas = QGraphicsScene()
        self.canvasView = ViewWithPress(self.canvas,
                                        self.mainArea,
                                        handler=self.reset_selection)
        self.mainArea.layout().addWidget(self.canvasView)
        self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

    def sizeHint(self):
        return QSize(450, 550)

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.update_graph()

    def showEvent(self, event):
        super().showEvent(event)
        self.update_graph()

    @classmethod
    def migrate_context(cls, context, version):
        if not version:
            settings.rename_setting(context, "attrX", "attr_x")
            settings.rename_setting(context, "attrY", "attr_y")
            settings.migrate_str_to_variable(context)

    @Inputs.data
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
            self.discrete_data = None
        else:
            self.domain_model.set_domain(data.domain)
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        if self.data:
            self.discrete_data = self.sparse_to_dense(data, True)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(self.data is not None
                                       and len(self.data) > 1
                                       and len(self.data.domain.attributes) > 1
                                       and not self.data.is_sparse())

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def attr_changed(self):
        self.update_attr()
        self.xy_changed_manually.emit(self.attr_x, self.attr_y)

    def update_attr(self):
        """Update the graph and selection."""
        self.selection = set()
        self.discrete_data = self.sparse_to_dense(self.data)
        self.update_graph()
        self.update_selection()

    def sparse_to_dense(self, data, init=False):
        """
        Extracts two selected columns from sparse matrix.
        GH-2260
        """
        def discretizer(data):
            if any(attr.is_continuous for attr in chain(
                    data.domain.variables, data.domain.metas)):
                discretize = Discretize(method=EqualFreq(n=4),
                                        remove_const=False,
                                        discretize_classes=True,
                                        discretize_metas=True)
                return discretize(data).to_dense()
            return data

        if not data.is_sparse() and not init:
            return self.discrete_data
        if data.is_sparse():
            attrs = {self.attr_x, self.attr_y}
            new_domain = data.domain.select_columns(attrs)
            data = Table.from_table(new_domain, data)
        return discretizer(data)

    @Inputs.features
    def set_input_features(self, attr_list):
        """
        Handler for the Features signal.

        The method stores the attributes and calls `resolve_shown_attributes`

        Args:
            attr_list (AttributeList): data from the signal
        """
        self.input_features = attr_list
        self.resolve_shown_attributes()
        self.update_selection()

    def resolve_shown_attributes(self):
        """
        Use the attributes from the input signal if the signal is present
        and at least two attributes appear in the domain. If there are
        multiple, use the first two. Combos are disabled if inputs are used.
        """
        self.warning()
        self.attr_box.setEnabled(True)
        if not self.input_features:  # None or empty
            return
        features = [f for f in self.input_features if f in self.domain_model]
        if not features:
            self.warning(
                "Features from the input signal are not present in the data")
            return
        old_attrs = self.attr_x, self.attr_y
        self.attr_x, self.attr_y = [f for f in (features * 2)[:2]]
        self.attr_box.setEnabled(False)
        if (self.attr_x, self.attr_y) != old_attrs:
            self.selection = set()
            self.update_graph()

    def reset_selection(self):
        self.selection = set()
        self.update_selection()

    def select_area(self, area, event):
        """
        Add or remove the clicked area from the selection

        Args:
            area (QRect): the area that is clicked
            event (QEvent): event description
        """
        if event.button() != Qt.LeftButton:
            return
        index = self.areas.index(area)
        if event.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection()

    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attr_x.name, [val_x]),
                        filter.FilterDiscrete(self.attr_y.name, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))

    def update_graph(self):
        # Function uses weird names like r, g, b, but it does it with utmost
        # caution, hence
        # pylint: disable=invalid-name
        """Update the graph."""
        def text(txt, *args, **kwargs):
            return CanvasText(self.canvas,
                              "",
                              html_text=to_html(txt),
                              *args,
                              **kwargs)

        def width(txt):
            return text(txt, 0, 0, show=False).boundingRect().width()

        def height(txt):
            return text(txt, 0, 0, show=False).boundingRect().height()

        def fmt(val):
            return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val)

        def show_pearson(rect, pearson, pen_width):
            """
            Color the given rectangle according to its corresponding
            standardized Pearson residual.

            Args:
                rect (QRect): the rectangle being drawn
                pearson (float): signed standardized pearson residual
                pen_width (int): pen width (bolder pen is used for selection)
            """
            r = rect.rect()
            x, y, w, h = r.x(), r.y(), r.width(), r.height()
            if w == 0 or h == 0:
                return

            r = b = 255
            if pearson > 0:
                r = g = max(255 - 20 * pearson, 55)
            elif pearson < 0:
                b = g = max(255 + 20 * pearson, 55)
            else:
                r = g = b = 224
            rect.setBrush(QBrush(QColor(r, g, b)))
            pen_color = QColor(255 * (r == 255), 255 * (g == 255),
                               255 * (b == 255))
            pen = QPen(pen_color, pen_width)
            rect.setPen(pen)
            if pearson > 0:
                pearson = min(pearson, 10)
                dist = 20 - 1.6 * pearson
            else:
                pearson = max(pearson, -10)
                dist = 20 - 8 * pearson
            pen.setWidth(1)

            def _offseted_line(ax, ay):
                r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w),
                                      y + (ay or h))
                self.canvas.addItem(r)
                r.setPen(pen)

            ax = dist
            while ax < w:
                _offseted_line(ax, 0)
                ax += dist

            ay = dist
            while ay < h:
                _offseted_line(0, ay)
                ay += dist

        def make_tooltip():
            """Create the tooltip. The function uses local variables from
            the enclosing scope."""

            # pylint: disable=undefined-loop-variable
            def _oper(attr, txt):
                if self.data.domain[attr.name] is ddomain[attr.name]:
                    return "="
                return " " if txt[0] in "<≥" else " in "

            return (
                "<b>{attr_x}{xeq}{xval_name}</b>: {obs_x}/{n} ({p_x:.0f} %)".
                format(attr_x=to_html(attr_x.name),
                       xeq=_oper(attr_x, xval_name),
                       xval_name=to_html(xval_name),
                       obs_x=fmt(chi.probs_x[x] * n),
                       n=int(n),
                       p_x=100 * chi.probs_x[x]) + "<br/>" +
                "<b>{attr_y}{yeq}{yval_name}</b>: {obs_y}/{n} ({p_y:.0f} %)".
                format(attr_y=to_html(attr_y.name),
                       yeq=_oper(attr_y, yval_name),
                       yval_name=to_html(yval_name),
                       obs_y=fmt(chi.probs_y[y] * n),
                       n=int(n),
                       p_y=100 * chi.probs_y[y]) + "<hr/>" +
                """<b>combination of values: </b><br/>
                   &nbsp;&nbsp;&nbsp;expected {exp} ({p_exp:.0f} %)<br/>
                   &nbsp;&nbsp;&nbsp;observed {obs} ({p_obs:.0f} %)""".format(
                    exp=fmt(chi.expected[y, x]),
                    p_exp=100 * chi.expected[y, x] / n,
                    obs=fmt(chi.observed[y, x]),
                    p_obs=100 * chi.observed[y, x] / n))

        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attr_x is None or self.attr_y is None:
            return

        ddomain = self.discrete_data.domain
        attr_x, attr_y = self.attr_x, self.attr_y
        disc_x, disc_y = ddomain[attr_x.name], ddomain[attr_y.name]
        view = self.canvasView

        chi = ChiSqStats(self.discrete_data, disc_x, disc_y)
        max_ylabel_w = max((width(val) for val in disc_y.values), default=0)
        max_ylabel_w = min(max_ylabel_w, 200)
        x_off = height(attr_y.name) + max_ylabel_w
        y_off = 15
        square_size = min(view.width() - x_off - 35,
                          view.height() - y_off - 80)
        square_size = max(square_size, 10)
        self.canvasView.setSceneRect(0, 0, view.width(), view.height())
        if not disc_x.values or not disc_y.values:
            text_ = "特征 {} 和 {} 没有值".format(disc_x, disc_y) \
                if not disc_x.values and \
                   not disc_y.values and \
                          disc_x != disc_y \
                else \
                    "Feature {} has no values".format(
                        disc_x if not disc_x.values else disc_y)
            text(text_,
                 view.width() / 2 + 70,
                 view.height() / 2, Qt.AlignRight | Qt.AlignVCenter)
            return
        n = chi.n
        curr_x = x_off
        max_xlabel_h = 0
        self.areas = []
        for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)):
            if px == 0:
                continue
            width = square_size * px

            curr_y = y_off
            for y in range(len(chi.probs_y) - 1, -1, -1):  # bottom-up order
                py = chi.probs_y[y]
                yval_name = disc_y.values[y]
                if py == 0:
                    continue
                height = square_size * py

                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(self.canvas,
                                       curr_x + 2,
                                       curr_y + 2,
                                       width - 4,
                                       height - 4,
                                       z=-10,
                                       onclick=self.select_area)
                rect.value_pair = x, y
                self.areas.append(rect)
                show_pearson(rect, chi.residuals[y, x], 3 * selected)
                rect.setToolTip(make_tooltip())

                if x == 0:
                    text(yval_name, x_off, curr_y + height / 2,
                         Qt.AlignRight | Qt.AlignVCenter)
                curr_y += height

            xl = text(xval_name, curr_x + width / 2, y_off + square_size,
                      Qt.AlignHCenter | Qt.AlignTop)
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)
            curr_x += width

        bottom = y_off + square_size + max_xlabel_h
        text(attr_y.name,
             0,
             y_off + square_size / 2,
             Qt.AlignLeft | Qt.AlignVCenter,
             bold=True,
             vertical=True)
        text(attr_x.name,
             x_off + square_size / 2,
             bottom,
             Qt.AlignHCenter | Qt.AlignTop,
             bold=True)
        bottom += 30
        xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom)
        # Assume similar height for both lines
        text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height())

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)
        return None

    def send_report(self):
        self.report_plot()
Beispiel #7
0
class DomainEditor(QTableView):
    """Component for editing of variable types.

    Parameters
    ----------
    widget : parent widget
    """

    variables = ContextSetting([])

    def __init__(self, widget):
        super().__init__()
        widget.settingsHandler.initialize(self)
        widget.contextAboutToBeOpened.connect(lambda args: self.set_domain(args[0]))
        widget.contextOpened.connect(lambda: self.model().set_variables(self.variables))
        widget.contextClosed.connect(lambda: self.model().set_variables([]))

        self.setModel(VarTableModel(self.variables, self))

        self.setSelectionMode(QTableView.NoSelection)
        self.horizontalHeader().setStretchLastSection(True)
        self.setShowGrid(False)
        self.setEditTriggers(
            QTableView.SelectedClicked | QTableView.DoubleClicked)
        self.setSizePolicy(
            QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)
        # setItemDelegate(ForColumn) apparently does not take ownership
        # of delegates, and coredumps if a a references is not stored somewhere.
        # We thus store delegates as attributes
        self.grid_delegate = HorizontalGridDelegate()
        self.setItemDelegate(self.grid_delegate)
        self.vartype_delegate = VarTypeDelegate(self, VarTableModel.typenames)
        self.setItemDelegateForColumn(Column.tpe, self.vartype_delegate)
        self.place_delegate = PlaceDelegate(self, VarTableModel.places)
        self.setItemDelegateForColumn(Column.place, self.place_delegate)

    @staticmethod
    def _is_missing(x):
        return str(x) in ("nan", "")

    @staticmethod
    def _iter_vals(x):
        """Iterate over values of sparse or dense arrays."""
        for i in range(x.shape[0]):
            yield x[i, 0]

    @staticmethod
    def _to_column(x, to_sparse, dtype=None):
        """Transform list of values to sparse/dense column array."""
        x = np.asarray(x, dtype=dtype).reshape(-1, 1)
        if to_sparse:
            x = sp.csc_matrix(x)
        return x

    @staticmethod
    def _merge(cols, force_dense=False):
        if len(cols) == 0:
            return None

        all_dense = not any(sp.issparse(c) for c in cols)
        if all_dense:
            return np.hstack(cols)
        if force_dense:
            return np.hstack([c.toarray() if sp.issparse(c) else c for c in cols])
        sparse_cols = [c if sp.issparse(c) else sp.csc_matrix(c) for c in cols]
        return sp.hstack(sparse_cols).tocsr()

    def get_domain(self, domain, data, deduplicate=False):
        """
        Create domain (and dataset) from changes made in the widget.

        Returns
        -------

        Args:
            domain (Domain): original domain
            data (Table): original data
            deduplicate (bool): if True, variable names are deduplicated and
               the result contains an additional list with names of renamed
               variables

        Returns:
            (new_domain, [attribute_columns, class_var_columns, meta_columns])
            or
            (new_domain, [attribute_columns, class_var_columns, meta_columns], renamed)
        """
        # Allow type-checking with type() instead of isinstance() for exact comparison
        # pylint: disable=unidiomatic-typecheck
        variables = self.model().variables
        places = [[], [], []]  # attributes, class_vars, metas
        cols = [[], [], []]  # Xcols, Ycols, Mcols

        def numbers_are_round(var, col_data):
            if type(var) == ContinuousVariable:
                data = np.asarray(col_data.data)  # Works for dense and sparse
                data = data[~np.isnan(data)]
                return (data == data.astype(int)).all()
            return False

        # Exit early with original domain if the user didn't actually change anything
        if all((name == orig_var.name and tpe == type(orig_var) and place == orig_plc)
               for (name, tpe, place, _, _), (orig_var, orig_plc) in
               zip(variables,
                   chain(((at, Place.feature) for at in domain.attributes),
                         ((cl, Place.class_var) for cl in domain.class_vars),
                         ((mt, Place.meta) for mt in domain.metas)))):
            if deduplicate:
                return domain, [data.X, data.Y, data.metas], []
            else:
                return domain, [data.X, data.Y, data.metas]

        relevant_names = [var[0] for var in variables if var[2] != Place.skip]
        if deduplicate:
            renamed_iter = iter(get_unique_names_duplicates(relevant_names))
        else:
            renamed_iter = iter(relevant_names)
        renamed = []
        for (name, tpe, place, _, may_be_numeric), (orig_var, orig_plc) in \
                zip(variables,
                        chain([(at, Place.feature) for at in domain.attributes],
                              [(cl, Place.class_var) for cl in domain.class_vars],
                              [(mt, Place.meta) for mt in domain.metas])):
            if place == Place.skip:
                continue

            new_name = next(renamed_iter)
            if new_name != name and name not in renamed:
                renamed.append(name)

            col_data = self._get_column(data, orig_var, orig_plc)
            is_sparse = sp.issparse(col_data)

            if new_name == orig_var.name and tpe == type(orig_var):
                var = orig_var
            elif tpe == type(orig_var):
                var = orig_var.copy(name=new_name)
            elif tpe == DiscreteVariable:
                values = natural_sorted(
                    list(str(i) for i in unique(col_data)
                         if not self._is_missing(i))
                )
                round_numbers = numbers_are_round(orig_var, col_data)
                col_data = [np.nan if self._is_missing(x) else values.index(str(x))
                            for x in self._iter_vals(col_data)]
                if round_numbers:
                    values = [str(int(float(v))) for v in values]
                var = tpe(new_name, values)
                col_data = self._to_column(col_data, is_sparse)
            elif tpe == StringVariable:
                var = tpe.make(new_name)
                if type(orig_var) in [DiscreteVariable, TimeVariable]:
                    col_data = [orig_var.repr_val(x) if not np.isnan(x) else ""
                                for x in self._iter_vals(col_data)]
                elif type(orig_var) == ContinuousVariable:
                    round_numbers = numbers_are_round(orig_var, col_data)
                    col_data = ['' if np.isnan(x) else
                                str(int(x)) if round_numbers else
                                orig_var.repr_val(x)
                                for x in self._iter_vals(col_data)]
                # don't obey sparsity for StringVariable since they are
                # in metas which are transformed to dense below
                col_data = self._to_column(col_data, False, dtype=object)
            elif tpe == ContinuousVariable and type(orig_var) == DiscreteVariable:
                var = tpe.make(new_name)
                if may_be_numeric:
                    col_data = [np.nan if self._is_missing(x) else float(orig_var.values[int(x)])
                                for x in self._iter_vals(col_data)]
                col_data = self._to_column(col_data, is_sparse)
            else:
                var = tpe(new_name)
            places[place].append(var)
            cols[place].append(col_data)

        # merge columns for X, Y and metas
        feats = cols[Place.feature]
        X = self._merge(feats) if feats else np.empty((len(data), 0))
        Y = self._merge(cols[Place.class_var], force_dense=True)
        m = self._merge(cols[Place.meta], force_dense=True)
        domain = Domain(*places)
        if deduplicate:
            return domain, [X, Y, m], renamed
        else:
            return domain, [X, Y, m]

    @staticmethod
    def _get_column(data, source_var, source_place):
        """ Extract column from data and preserve sparsity. """
        if source_place == Place.meta:
            col_data = data[:, source_var].metas
        elif source_place == Place.class_var:
            col_data = data[:, source_var].Y.reshape(-1, 1)
        else:
            col_data = data[:, source_var].X
        return col_data

    def set_domain(self, domain):
        self.variables = self.parse_domain(domain)
        self.model().set_orig_variables(self.variables)

    def reset_domain(self):
        self.model().reset_variables()
        self.variables = self.model().variables

    @staticmethod
    def parse_domain(domain):
        """Convert domain into variable representation used by
        the VarTableModel.

        Parameters
        ----------
        domain : the domain to convert

        Returns
        -------
        list of [variable_name, var_type, place, values, can_be_numeric] lists.

        """
        if domain is None:
            return []

        def may_be_numeric(var):
            if var.is_continuous:
                return True
            if var.is_discrete:
                try:
                    sum(float(x) for x in var.values)
                    return True
                except ValueError:
                    return False
            return False

        def discrete_value_display(value_list):
            result = ", ".join(str(v)
                               for v in value_list[:VarTableModel.DISCRETE_VALUE_DISPLAY_LIMIT])
            if len(value_list) > VarTableModel.DISCRETE_VALUE_DISPLAY_LIMIT:
                result += ", ..."
            return result

        return [
            [var.name, type(var), place,
             discrete_value_display(var.values) if var.is_discrete else "",
             may_be_numeric(var)]
            for place, vars in enumerate(
                (domain.attributes, domain.class_vars, domain.metas))
            for var in vars
        ]
Beispiel #8
0
class OWSelectRows(widget.OWWidget):
    name = "Select Rows"
    description = "Select rows from the data based on values of variables."
    icon = "icons/SelectRows.svg"
    priority = 100
    category = "Transform"
    keywords = ["filter"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        matching_data = Output("Matching Data", Table, default=True)
        unmatched_data = Output("Unmatched Data", Table)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    want_main_area = False

    settingsHandler = SelectRowsContextHandler()
    conditions = ContextSetting([])
    update_on_change = Setting(True)
    purge_attributes = Setting(False, schema_only=True)
    purge_classes = Setting(False, schema_only=True)
    auto_commit = Setting(True)

    settings_version = 2

    Operators = {
        ContinuousVariable: [
            (FilterContinuous.Equal, "equals"),
            (FilterContinuous.NotEqual, "is not"),
            (FilterContinuous.Less, "is below"),
            (FilterContinuous.LessEqual, "is at most"),
            (FilterContinuous.Greater, "is greater than"),
            (FilterContinuous.GreaterEqual, "is at least"),
            (FilterContinuous.Between, "is between"),
            (FilterContinuous.Outside, "is outside"),
            (FilterContinuous.IsDefined, "is defined"),
        ],
        DiscreteVariable: [(FilterDiscreteType.Equal, "is"),
                           (FilterDiscreteType.NotEqual, "is not"),
                           (FilterDiscreteType.In, "is one of"),
                           (FilterDiscreteType.IsDefined, "is defined")],
        StringVariable: [
            (FilterString.Equal, "equals"),
            (FilterString.NotEqual, "is not"),
            (FilterString.Less, "is before"),
            (FilterString.LessEqual, "is equal or before"),
            (FilterString.Greater, "is after"),
            (FilterString.GreaterEqual, "is equal or after"),
            (FilterString.Between, "is between"),
            (FilterString.Outside, "is outside"),
            (FilterString.Contains, "contains"),
            (FilterString.StartsWith, "begins with"),
            (FilterString.EndsWith, "ends with"),
            (FilterString.IsDefined, "is defined"),
        ]
    }

    Operators[TimeVariable] = Operators[ContinuousVariable]

    AllTypes = {}
    for _all_name, _all_type, _all_ops in (("All variables", 0, [
        (None, "are defined")
    ]), ("All numeric variables", 2, [
        (v, _plural(t)) for v, t in Operators[ContinuousVariable]
    ]), ("All string variables", 3, [(v, _plural(t))
                                     for v, t in Operators[StringVariable]])):
        Operators[_all_name] = _all_ops
        AllTypes[_all_name] = _all_type

    operator_names = {
        vtype: [name for _, name in filters]
        for vtype, filters in Operators.items()
    }

    class Error(widget.OWWidget.Error):
        parsing_error = Msg("{}")

    def __init__(self):
        super().__init__()

        self.old_purge_classes = True

        self.conditions = []
        self.last_output_conditions = None
        self.data = None
        self.data_desc = self.match_desc = self.nonmatch_desc = None
        self.variable_model = DomainModel([
            list(self.AllTypes), DomainModel.Separator, DomainModel.CLASSES,
            DomainModel.ATTRIBUTES, DomainModel.METAS
        ])

        box = gui.vBox(self.controlArea, 'Conditions', stretch=100)
        self.cond_list = QTableWidget(box,
                                      showGrid=False,
                                      selectionMode=QTableWidget.NoSelection)
        box.layout().addWidget(self.cond_list)
        self.cond_list.setColumnCount(4)
        self.cond_list.setRowCount(0)
        self.cond_list.verticalHeader().hide()
        self.cond_list.horizontalHeader().hide()
        for i in range(3):
            self.cond_list.horizontalHeader().setSectionResizeMode(
                i, QHeaderView.Stretch)
        self.cond_list.horizontalHeader().resizeSection(3, 30)
        self.cond_list.viewport().setBackgroundRole(QPalette.Window)

        box2 = gui.hBox(box)
        gui.rubber(box2)
        self.add_button = gui.button(box2,
                                     self,
                                     "Add Condition",
                                     callback=self.add_row)
        self.add_all_button = gui.button(box2,
                                         self,
                                         "Add All Variables",
                                         callback=self.add_all)
        self.remove_all_button = gui.button(box2,
                                            self,
                                            "Remove All",
                                            callback=self.remove_all)
        gui.rubber(box2)

        box_setting = gui.vBox(self.buttonsArea)
        self.cb_pa = gui.checkBox(box_setting,
                                  self,
                                  "purge_attributes",
                                  "Remove unused features",
                                  callback=self.conditions_changed)
        self.cb_pc = gui.checkBox(box_setting,
                                  self,
                                  "purge_classes",
                                  "Remove unused classes",
                                  callback=self.conditions_changed)

        self.report_button.setFixedWidth(120)
        gui.rubber(self.buttonsArea.layout())

        gui.auto_send(self.buttonsArea, self, "auto_commit")

        self.set_data(None)
        self.resize(600, 400)

    def add_row(self, attr=None, condition_type=None, condition_value=None):
        model = self.cond_list.model()
        row = model.rowCount()
        model.insertRow(row)

        attr_combo = ComboBoxSearch(
            minimumContentsLength=12,
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon)
        attr_combo.setModel(self.variable_model)
        attr_combo.row = row
        attr_combo.setCurrentIndex(
            self.variable_model.indexOf(attr) if attr else len(self.AllTypes) +
            1)
        self.cond_list.setCellWidget(row, 0, attr_combo)

        index = QPersistentModelIndex(model.index(row, 3))
        temp_button = QPushButton(
            '×',
            self,
            flat=True,
            styleSheet='* {font-size: 16pt; color: silver}'
            '*:hover {color: black}')
        temp_button.clicked.connect(lambda: self.remove_one(index.row()))
        self.cond_list.setCellWidget(row, 3, temp_button)

        self.remove_all_button.setDisabled(False)
        self.set_new_operators(attr_combo, attr is not None, condition_type,
                               condition_value)
        attr_combo.currentIndexChanged.connect(
            lambda _: self.set_new_operators(attr_combo, False))

        self.cond_list.resizeRowToContents(row)

    def add_all(self):
        if self.cond_list.rowCount():
            Mb = QMessageBox
            if Mb.question(
                    self, "Remove existing filters",
                    "This will replace the existing filters with "
                    "filters for all variables.", Mb.Ok | Mb.Cancel) != Mb.Ok:
                return
            self.remove_all()
        for attr in self.variable_model[len(self.AllTypes) + 1:]:
            self.add_row(attr)
        self.conditions_changed()

    def remove_one(self, rownum):
        self.remove_one_row(rownum)
        self.conditions_changed()

    def remove_all(self):
        self.remove_all_rows()
        self.conditions_changed()

    def remove_one_row(self, rownum):
        self.cond_list.removeRow(rownum)
        if self.cond_list.model().rowCount() == 0:
            self.remove_all_button.setDisabled(True)

    def remove_all_rows(self):
        # Disconnect signals to avoid stray emits when changing variable_model
        for row in range(self.cond_list.rowCount()):
            for col in (0, 1):
                widg = self.cond_list.cellWidget(row, col)
                if widg:
                    widg.currentIndexChanged.disconnect()
        self.cond_list.clear()
        self.cond_list.setRowCount(0)
        self.remove_all_button.setDisabled(True)

    def set_new_operators(self,
                          attr_combo,
                          adding_all,
                          selected_index=None,
                          selected_values=None):
        old_combo = self.cond_list.cellWidget(attr_combo.row, 1)
        prev_text = old_combo.currentText() if old_combo else ""
        oper_combo = QComboBox()
        oper_combo.row = attr_combo.row
        oper_combo.attr_combo = attr_combo
        attr_name = attr_combo.currentText()
        if attr_name in self.AllTypes:
            oper_combo.addItems(self.operator_names[attr_name])
        else:
            var = self.data.domain[attr_name]
            oper_combo.addItems(self.operator_names[type(var)])
        if selected_index is None:
            selected_index = oper_combo.findText(prev_text)
            if selected_index == -1:
                selected_index = 0
        oper_combo.setCurrentIndex(selected_index)
        self.cond_list.setCellWidget(oper_combo.row, 1, oper_combo)
        self.set_new_values(oper_combo, adding_all, selected_values)
        oper_combo.currentIndexChanged.connect(
            lambda _: self.set_new_values(oper_combo, False))

    @staticmethod
    def _get_lineedit_contents(box):
        contents = []
        for child in getattr(box, "controls", [box]):
            if isinstance(child, QLineEdit):
                contents.append(child.text())
            elif isinstance(child, DateTimeWidget):
                if child.format == (0, 1):
                    contents.append(child.time())
                elif child.format == (1, 0):
                    contents.append(child.date())
                elif child.format == (1, 1):
                    contents.append(child.dateTime())
        return contents

    @staticmethod
    def _get_value_contents(box):
        cont = []
        names = []
        for child in getattr(box, "controls", [box]):
            if isinstance(child, QLineEdit):
                cont.append(child.text())
            elif isinstance(child, QComboBox):
                cont.append(child.currentIndex())
            elif isinstance(child, QToolButton):
                if child.popup is not None:
                    model = child.popup.list_view.model()
                    for row in range(model.rowCount()):
                        item = model.item(row)
                        if item.checkState():
                            cont.append(row + 1)
                            names.append(item.text())
                    child.desc_text = ', '.join(names)
                    child.set_text()
            elif isinstance(child, DateTimeWidget):
                if child.format == (0, 1):
                    cont.append(child.time())
                elif child.format == (1, 0):
                    cont.append(child.date())
                elif child.format == (1, 1):
                    cont.append(child.dateTime())
            elif isinstance(child, QLabel) or child is None:
                pass
            else:
                raise TypeError('Type %s not supported.' % type(child))
        return tuple(cont)

    class QDoubleValidatorEmpty(QDoubleValidator):
        def validate(self, input_, pos):
            if not input_:
                return QDoubleValidator.Acceptable, input_, pos
            if self.locale().groupSeparator() in input_:
                return QDoubleValidator.Invalid, input_, pos
            return super().validate(input_, pos)

    def set_new_values(self, oper_combo, adding_all, selected_values=None):
        # def remove_children():
        #     for child in box.children()[1:]:
        #         box.layout().removeWidget(child)
        #         child.setParent(None)

        def add_textual(contents):
            le = gui.lineEdit(box,
                              self,
                              None,
                              sizePolicy=QSizePolicy(QSizePolicy.Expanding,
                                                     QSizePolicy.Expanding))
            if contents:
                le.setText(contents)
            le.setAlignment(Qt.AlignRight)
            le.editingFinished.connect(self.conditions_changed)
            return le

        def add_numeric(contents):
            le = add_textual(contents)
            le.setValidator(OWSelectRows.QDoubleValidatorEmpty())
            return le

        box = self.cond_list.cellWidget(oper_combo.row, 2)
        lc = ["", ""]
        oper = oper_combo.currentIndex()
        attr_name = oper_combo.attr_combo.currentText()
        if attr_name in self.AllTypes:
            vtype = self.AllTypes[attr_name]
            var = None
        else:
            var = self.data.domain[attr_name]
            var_idx = self.data.domain.index(attr_name)
            vtype = vartype(var)
            if selected_values is not None:
                lc = list(selected_values) + ["", ""]
                lc = [str(x) if vtype != 4 else x for x in lc[:2]]
        if box and vtype == box.var_type:
            lc = self._get_lineedit_contents(box) + lc

        if oper_combo.currentText().endswith(" defined"):
            label = QLabel()
            label.var_type = vtype
            self.cond_list.setCellWidget(oper_combo.row, 2, label)
        elif var is not None and var.is_discrete:
            if oper_combo.currentText().endswith(" one of"):
                if selected_values:
                    lc = list(selected_values)
                button = DropDownToolButton(self, var, lc)
                button.var_type = vtype
                self.cond_list.setCellWidget(oper_combo.row, 2, button)
            else:
                combo = ComboBoxSearch()
                combo.addItems(("", ) + var.values)
                if lc[0]:
                    combo.setCurrentIndex(int(lc[0]))
                else:
                    combo.setCurrentIndex(0)
                combo.var_type = vartype(var)
                self.cond_list.setCellWidget(oper_combo.row, 2, combo)
                combo.currentIndexChanged.connect(self.conditions_changed)
        else:
            box = gui.hBox(self, addToLayout=False)
            box.var_type = vtype
            self.cond_list.setCellWidget(oper_combo.row, 2, box)
            if vtype == 2:  # continuous:
                box.controls = [add_numeric(lc[0])]
                if oper > 5:
                    gui.widgetLabel(box, " and ")
                    box.controls.append(add_numeric(lc[1]))
            elif vtype == 3:  # string:
                box.controls = [add_textual(lc[0])]
                if oper in [6, 7]:
                    gui.widgetLabel(box, " and ")
                    box.controls.append(add_textual(lc[1]))
            elif vtype == 4:  # time:

                def invalidate_datetime():
                    if w_:
                        if w.dateTime() > w_.dateTime():
                            w_.setDateTime(w.dateTime())
                        if w.format == (1, 1):
                            w.calendarWidget.timeedit.setTime(w.time())
                            w_.calendarWidget.timeedit.setTime(w_.time())
                    elif w.format == (1, 1):
                        w.calendarWidget.timeedit.setTime(w.time())

                def datetime_changed():
                    self.conditions_changed()
                    invalidate_datetime()

                datetime_format = (var.have_date, var.have_time)
                column = self.data.get_column_view(var_idx)[0]
                w = DateTimeWidget(self, column, datetime_format)
                w.set_datetime(lc[0])
                box.controls = [w]
                box.layout().addWidget(w)
                w.dateTimeChanged.connect(datetime_changed)
                if oper > 5:
                    gui.widgetLabel(box, " and ")
                    w_ = DateTimeWidget(self, column, datetime_format)
                    w_.set_datetime(lc[1])
                    box.layout().addWidget(w_)
                    box.controls.append(w_)
                    invalidate_datetime()
                    w_.dateTimeChanged.connect(datetime_changed)
                else:
                    w_ = None
            else:
                box.controls = []
        if not adding_all:
            self.conditions_changed()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self.cb_pa.setEnabled(not isinstance(data, SqlTable))
        self.cb_pc.setEnabled(not isinstance(data, SqlTable))
        self.remove_all_rows()
        self.add_button.setDisabled(data is None)
        self.add_all_button.setDisabled(
            data is None
            or len(data.domain.variables) + len(data.domain.metas) > 100)
        if not data:
            self.data_desc = None
            self.variable_model.set_domain(None)
            self.commit.deferred()
            return
        self.data_desc = report.describe_data_brief(data)
        self.variable_model.set_domain(data.domain)

        self.conditions = []
        self.openContext(data)
        for attr, cond_type, cond_value in self.conditions:
            if attr in self.variable_model:
                self.add_row(attr, cond_type, cond_value)
        if not self.cond_list.model().rowCount():
            self.add_row()

        self.commit.now()

    def conditions_changed(self):
        try:
            cells_by_rows = ([
                self.cond_list.cellWidget(row, col) for col in range(3)
            ] for row in range(self.cond_list.rowCount()))
            self.conditions = [
                (var_cell.currentData(gui.TableVariable)
                 or var_cell.currentText(), oper_cell.currentIndex(),
                 self._get_value_contents(val_cell))
                for var_cell, oper_cell, val_cell in cells_by_rows
            ]
            if self.update_on_change and (
                    self.last_output_conditions is None
                    or self.last_output_conditions != self.conditions):
                self.commit.deferred()
        except AttributeError:
            # Attribute error appears if the signal is triggered when the
            # controls are being constructed
            pass

    @staticmethod
    def _values_to_floats(attr, values):
        if len(values) == 0:
            return values
        if not all(values):
            return None
        if isinstance(attr, TimeVariable):
            values = (value.toString(format=Qt.ISODate) for value in values)
            parse = lambda x: (attr.parse(x), True)
        else:
            parse = QLocale().toDouble

        try:
            floats, ok = zip(*[parse(v) for v in values])
            if not all(ok):
                raise ValueError('Some values could not be parsed as floats'
                                 'in the current locale: {}'.format(values))
        except TypeError:
            floats = values  # values already floats
        assert all(isinstance(v, float) for v in floats)
        return floats

    @gui.deferred
    def commit(self):
        matching_output = self.data
        non_matching_output = None
        annotated_output = None

        self.Error.clear()
        if self.data:
            domain = self.data.domain
            conditions = []
            for attr_name, oper_idx, values in self.conditions:
                if attr_name in self.AllTypes:
                    attr_index = attr = None
                    attr_type = self.AllTypes[attr_name]
                    operators = self.Operators[attr_name]
                else:
                    attr_index = domain.index(attr_name)
                    attr = domain[attr_index]
                    attr_type = vartype(attr)
                    operators = self.Operators[type(attr)]
                opertype, _ = operators[oper_idx]
                if attr_type == 0:
                    filt = data_filter.IsDefined()
                elif attr_type in (2, 4):  # continuous, time
                    try:
                        floats = self._values_to_floats(attr, values)
                    except ValueError as e:
                        self.Error.parsing_error(e.args[0])
                        return
                    if floats is None:
                        continue
                    filt = data_filter.FilterContinuous(
                        attr_index, opertype, *floats)
                elif attr_type == 3:  # string
                    filt = data_filter.FilterString(attr_index, opertype,
                                                    *[str(v) for v in values])
                else:
                    if opertype == FilterDiscreteType.IsDefined:
                        f_values = None
                    else:
                        if not values or not values[0]:
                            continue
                        values = [attr.values[i - 1] for i in values]
                        if opertype == FilterDiscreteType.Equal:
                            f_values = {values[0]}
                        elif opertype == FilterDiscreteType.NotEqual:
                            f_values = set(attr.values)
                            f_values.remove(values[0])
                        elif opertype == FilterDiscreteType.In:
                            f_values = set(values)
                        else:
                            raise ValueError("invalid operand")
                    filt = data_filter.FilterDiscrete(attr_index, f_values)
                conditions.append(filt)

            if conditions:
                filters = data_filter.Values(conditions)
                matching_output = filters(self.data)
                filters.negate = True
                non_matching_output = filters(self.data)

                row_sel = np.in1d(self.data.ids, matching_output.ids)
                annotated_output = create_annotated_table(self.data, row_sel)

            # if hasattr(self.data, "name"):
            #     matching_output.name = self.data.name
            #     non_matching_output.name = self.data.name

            purge_attrs = self.purge_attributes
            purge_classes = self.purge_classes
            if (purge_attrs or purge_classes) and \
                    not isinstance(self.data, SqlTable):
                attr_flags = sum([
                    Remove.RemoveConstant * purge_attrs,
                    Remove.RemoveUnusedValues * purge_attrs
                ])
                class_flags = sum([
                    Remove.RemoveConstant * purge_classes,
                    Remove.RemoveUnusedValues * purge_classes
                ])
                # same settings used for attributes and meta features
                remover = Remove(attr_flags, class_flags, attr_flags)

                matching_output = remover(matching_output)
                non_matching_output = remover(non_matching_output)
                annotated_output = remover(annotated_output)

        if not matching_output:
            matching_output = None
        if not non_matching_output:
            non_matching_output = None
        if not annotated_output:
            annotated_output = None

        self.Outputs.matching_data.send(matching_output)
        self.Outputs.unmatched_data.send(non_matching_output)
        self.Outputs.annotated_data.send(annotated_output)

        self.match_desc = report.describe_data_brief(matching_output)
        self.nonmatch_desc = report.describe_data_brief(non_matching_output)

    def send_report(self):
        if not self.data:
            self.report_paragraph("No data.")
            return

        pdesc = None
        describe_domain = False
        for d in (self.data_desc, self.match_desc, self.nonmatch_desc):
            if not d or not d["Data instances"]:
                continue
            ndesc = d.copy()
            del ndesc["Data instances"]
            if pdesc is not None and pdesc != ndesc:
                describe_domain = True
            pdesc = ndesc

        conditions = []
        for attr, oper, values in self.conditions:
            if isinstance(attr, str):
                attr_name = attr
                var_type = self.AllTypes[attr]
                names = self.operator_names[attr_name]
            else:
                attr_name = attr.name
                var_type = vartype(attr)
                names = self.operator_names[type(attr)]
            name = names[oper]
            if oper == len(names) - 1:
                conditions.append("{} {}".format(attr_name, name))
            elif var_type == 1:  # discrete
                if name == "is one of":
                    valnames = [attr.values[v - 1] for v in values]
                    if not valnames:
                        continue
                    if len(valnames) == 1:
                        valstr = valnames[0]
                    else:
                        valstr = f"{', '.join(valnames[:-1])} or {valnames[-1]}"
                    conditions.append(f"{attr} is {valstr}")
                elif values and values[0]:
                    value = values[0] - 1
                    conditions.append(f"{attr} {name} {attr.values[value]}")
            elif var_type == 3:  # string variable
                conditions.append(
                    f"{attr} {name} {' and '.join(map(repr, values))}")
            elif var_type == 4:  # time
                values = (value.toString(format=Qt.ISODate)
                          for value in values)
                conditions.append(f"{attr} {name} {' and '.join(values)}")
            elif all(x for x in values):  # numeric variable
                conditions.append(f"{attr} {name} {' and '.join(values)}")
        items = OrderedDict()
        if describe_domain:
            items.update(self.data_desc)
        else:
            items["Instances"] = self.data_desc["Data instances"]
        items["Condition"] = " AND ".join(conditions) or "no conditions"
        self.report_items("Data", items)
        if describe_domain:
            self.report_items("Matching data", self.match_desc)
            self.report_items("Non-matching data", self.nonmatch_desc)
        else:
            match_inst = \
                bool(self.match_desc) and \
                self.match_desc["Data instances"]
            nonmatch_inst = \
                bool(self.nonmatch_desc) and \
                self.nonmatch_desc["Data instances"]
            self.report_items(
                "Output",
                (("Matching data",
                  "{} instances".format(match_inst) if match_inst else "None"),
                 ("Non-matching data", nonmatch_inst > 0
                  and "{} instances".format(nonmatch_inst))))
Beispiel #9
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data")]
    outputs = [("Selected Data", Table)]

    settingsHandler = DomainContextHandler()
    use_boxes = Setting(True)
    variable1 = ContextSetting("")
    variable2 = ContextSetting("")
    variable3 = ContextSetting("")
    variable4 = ContextSetting("")
    selection = ContextSetting({})
    # interior_coloring is context setting to properly reset it
    # if the widget switches to regression and back (set setData)
    interior_coloring = ContextSetting(1)

    PEARSON, CLASS_DISTRIBUTION = 0, 1
    interior_coloring_opts = ["Pearson residuals", "Class distribution"]
    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [
        QColor(255, 255, 255),
        QColor(210, 210, 255),
        QColor(110, 110, 255),
        QColor(0, 0, 255)
    ]
    RED_COLORS = [
        QColor(255, 255, 255),
        QColor(255, 200, 200),
        QColor(255, 100, 100),
        QColor(255, 0, 0)
    ]

    graph_name = "canvas"

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.unprocessed_subset_data = None
        self.subset_data = None

        self.areas = []

        self.canvas = QGraphicsScene()
        self.canvas_view = ViewWithPress(self.canvas,
                                         handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.attr_combos = [
            gui.comboBox(box,
                         self,
                         value="variable{}".format(i),
                         orientation=Qt.Horizontal,
                         contentsLength=12,
                         callback=self.reset_graph,
                         sendSelectedValue=True,
                         valueType=str) for i in range(1, 5)
        ]
        self.rb_colors = gui.radioButtonsInBox(self.controlArea,
                                               self,
                                               "interior_coloring",
                                               self.interior_coloring_opts,
                                               box="Interior Coloring",
                                               callback=self.update_graph)
        self.bar_button = gui.checkBox(gui.indentedBox(self.rb_colors),
                                       self,
                                       'use_boxes',
                                       label='Compare with total',
                                       callback=self._compare_with_total)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(530, 720)

    def _compare_with_total(self):
        if self.data and self.data.domain.has_discrete_class:
            self.interior_coloring = 1
            self.update_graph()

    def init_combos(self, data):
        for combo in self.attr_combos:
            combo.clear()
        if data is None:
            return
        for combo in self.attr_combos[1:]:
            combo.addItem("(None)")

        icons = gui.attributeIconDict
        for attr in chain(data.domain, data.domain.metas):
            if attr.is_discrete or attr.is_continuous:
                for combo in self.attr_combos:
                    combo.addItem(icons[attr], attr.name)

        if self.attr_combos[0].count() > 0:
            self.variable1 = self.attr_combos[0].itemText(0)
            self.variable2 = self.attr_combos[1].itemText(
                2 * (self.attr_combos[1].count() > 2))
        self.variable3 = self.attr_combos[2].itemText(0)
        self.variable4 = self.attr_combos[3].itemText(0)

    def get_attr_list(self):
        return [
            a for a in
            [self.variable1, self.variable2, self.variable3, self.variable4]
            if a and a != "(None)"
        ]

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        self.information([0, 1, 2])
        if not self.data:
            self.discrete_data = None
            return
        """ TODO: check
        if data.has_missing_class():
            self.information(1, "Examples with missing classes were removed.")
        """
        if any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
        else:
            self.discrete_data = self.data

        if self.data.domain.class_var is None:
            self.rb_colors.setDisabled(True)
            disc_class = False
        else:
            self.rb_colors.setDisabled(False)
            disc_class = self.data.domain.has_discrete_class
            self.rb_colors.group.button(2).setDisabled(not disc_class)
            self.bar_button.setDisabled(not disc_class)
        self.interior_coloring = bool(disc_class)
        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None

    def set_subset_data(self, data):
        if self.data is None:
            self.unprocessed_subset_data = data
            self.warning(10)
            return
        try:
            self.subset_data = data.from_table(self.data.domain, data)
            self.warning(10)
        except:
            self.subset_data = None
            self.warning(
                10, "'Data' and 'Data Subset' are incompatible"
                if data is not None else "")

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.reset_graph()

    def clear_selection(self):
        self.selection = {}
        self.update_selection_rects()
        self.send_selection()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def update_selection_rects(self):
        for i, (attr, vals, area) in enumerate(self.areas):
            if i in self.selection:
                area.setPen(QPen(Qt.black, 3, Qt.DotLine))
            else:
                area.setPen(QPen())

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.send("Selected Data", None)
            return
        filters = []
        self.warning(6)
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.warning(
                    6,
                    "Selection of continuous variables on SQL is not supported"
                )
        for i in self.selection:
            cols, vals, area = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        if self.discrete_data is not self.data:
            idset = set(selection.ids)
            sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
            selection = self.data[sel_idx]
        self.send("Selected Data", selection)

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def draw_data(attr_list,
                      x0_x1,
                      y0_y1,
                      side,
                      condition,
                      total_attrs,
                      used_attrs=[],
                      used_vals=[],
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0,
                         x1,
                         y0,
                         y1,
                         "",
                         used_attrs,
                         used_vals,
                         attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            if attr_vals == "":
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [
                    conditionaldict[attr_vals + "-" + val] for val in values
                ]
            total = sum(counts)

            # if we are visualizing the third attribute and the first attribute
            # has the last value, we have to reverse the order in which the
            # boxes will be drawn otherwise, if the last cell, nearest to the
            # labels of the fourth attribute, is empty, we wouldn't be able to
            # position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(
                    data.domain[used_attrs[0]])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = getHtmlCompatibleString(val)
                if attr_vals != "":
                    newattrvals = attr_vals + "-" + val
                else:
                    newattrvals = val

                tooltip = condition + 4 * "&nbsp;" + attr + \
                    ": <b>" + htmlval + "</b><br>"
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                common_args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1, tooltip,
                                 *common_args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1, tooltip, total_attrs,
                                  *common_args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end, tooltip,
                                 *common_args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1, tooltip,
                                  total_attrs, *common_args)

            draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                      used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs,
                      used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = \
                    get_variable_values_sorted(data.domain[used_attrs[0]])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0

            if attr_vals == "":
                counts = [conditionaldict.get(val, 1) for val in values]
            else:
                counts = [
                    conditionaldict.get(attr_vals + "-" + val, 1)
                    for val in values
                ]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)

            aligns = [
                Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter,
                Qt.AlignBottom | Qt.AlignHCenter,
                Qt.AlignLeft | Qt.AlignVCenter
            ]
            align = aligns[side]
            for i in range(len(values)):
                val = values[i]
                perc = counts[i] / float(total)
                if distributiondict[val] != 0:
                    if side == 0:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * 0.5 * perc,
                                   y1 + self.ATTR_VAL_OFFSET, align)
                    elif side == 1:
                        CanvasText(self.canvas, str(val),
                                   x0 - self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)
                    elif side == 2:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * perc * 0.5,
                                   y0 - self.ATTR_VAL_OFFSET, align)
                    else:
                        CanvasText(self.canvas, str(val),
                                   x1 + self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)

                if side % 2 == 0:
                    currpos += perc * width + spacing * (total_attrs - side)
                else:
                    currpos += perc * height + spacing * (total_attrs - side)

            if side == 0:
                CanvasText(self.canvas,
                           attr,
                           x0 + (x1 - x0) / 2,
                           y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET,
                           align,
                           bold=1)
            elif side == 1:
                CanvasText(self.canvas,
                           attr,
                           x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                           y0 + (y1 - y0) / 2,
                           align,
                           bold=1,
                           vertical=True)
            elif side == 2:
                CanvasText(self.canvas,
                           attr,
                           x0 + (x1 - x0) / 2,
                           y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET,
                           align,
                           bold=1)
            else:
                CanvasText(self.canvas,
                           attr,
                           x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET,
                           y0 + (y1 - y0) / 2,
                           align,
                           bold=1,
                           vertical=True)

        def add_rect(x0,
                     x1,
                     y0,
                     y1,
                     condition="",
                     used_attrs=[],
                     used_vals=[],
                     attr_vals=""):
            area_index = len(self.areas)
            if x0 == x1:
                x1 += 1
            if y0 == y1:
                y1 += 1

            # rectangles of width and height 1 are not shown - increase
            if x1 - x0 + y1 - y0 == 2:
                y1 += 1

            if class_var and class_var.is_discrete:
                colors = [QColor(*col) for col in class_var.colors]
            else:
                colors = None

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(self.canvas,
                                           x,
                                           y,
                                           w,
                                           h,
                                           z=z,
                                           onclick=select_area,
                                           **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(self.canvas,
                                       x,
                                       y,
                                       w,
                                       h,
                                       pen_color,
                                       brush_color,
                                       z=z,
                                       onclick=select_area,
                                       **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.interior_coloring == self.PEARSON:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                    mul, (apriori_dists[i][used_vals[i]] / float(s)
                          for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = (actual - expected) / sqrt(expected)
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                    condition + "<hr/>" + "Expected instances: %.1f<br>"
                    "Actual instances: %d<br>"
                    "Standardized (Pearson) residual: %.1f" %
                    (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and \
                        abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        counts = [
                            conditionalsubsetdict[attr_vals + "-" + val]
                            for val in cls_values
                        ]
                        if sum(counts) == 1:
                            rect(x0 - 2,
                                 y0 - 2,
                                 x1 - x0 + 5,
                                 y1 - y0 + 5,
                                 -550,
                                 colors[counts.index(1)],
                                 Qt.white,
                                 penWidth=2,
                                 penStyle=Qt.DashLine)
                        if self.subset_data is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[attr_vals +
                                                                "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total, bar_width,
                                         v, 15, color)
                                    total += v

                actual = [
                    conditionaldict[attr_vals + "-" + cls_values[i]]
                    for i in range(len(prior))
                ]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                        "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                        (cls, act, 100.0 * act / n_actual,
                         apr / n_apriori * n_actual, 100.0 * apr / n_apriori)
                        for cls, act, apr in zip(cls_values, actual, apriori))
                else:
                    text = ""
                outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format(
                    condition, n_actual, text[:-4]))

        def draw_legend(x0_x1, y0_y1):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if self.interior_coloring == self.PEARSON:
                names = [
                    "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                    "Residuals:"
                ]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
            else:
                names = get_variable_values_sorted(class_var) + \
                        [class_var.name + ":"]
                colors = [QColor(*col) for col in class_var.colors]

            names = [
                CanvasText(self.canvas, name, alignment=Qt.AlignVCenter)
                for name in names
            ]
            totalwidth = sum(text.boundingRect().width() for text in names)

            # compute the x position of the center of the legend
            y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
            distance = 30
            startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2

            names[-1].setPos(startx + 15, y)
            names[-1].show()
            xoffset = names[-1].boundingRect().width() + distance

            size = 8

            for i in range(len(names) - 1):
                if self.interior_coloring == self.PEARSON:
                    edgecolor = Qt.black
                else:
                    edgecolor = colors[i]

                CanvasRectangle(self.canvas, startx + xoffset, y - size / 2,
                                size, size, edgecolor, colors[i])
                names[i].setPos(startx + xoffset + 10, y)
                xoffset += distance + names[i].boundingRect().width()

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        subset = self.subset_data
        attr_list = self.get_attr_list()
        class_var = data.domain.class_var
        if class_var:
            sql = type(data) == SqlTable
            name = not sql and data.name
            # save class_var because it is removed in the next line
            data = data[:, attr_list + [class_var]]
            data.domain.class_var = class_var
            if not sql:
                data.name = name
        else:
            data = data[:, attr_list]
        # TODO: check this
        # data = Preprocessor_dropMissing(data)
        if len(data) == 0:
            self.warning(5, "No valid data for current attributes.")
            return
        else:
            self.warning(5)

        if self.interior_coloring == self.PEARSON:
            apriori_dists = [
                get_distribution(data, attr) for attr in attr_list
            ]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(data.domain[attr])
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        # get the maximum width of rectangle
        xoff = 20
        width = 20
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1], bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas, attr_list[3], bold=1, show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        # get the maximum height of rectangle
        height = 100
        yoff = 45
        square_size = min(self.canvas_view.width() - width - 20,
                          self.canvas_view.height() - height - 20)

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(),
                                      self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if subset:
            conditionalsubsetdict, _ = \
                get_conditional_distribution(subset, attr_list)

        # draw rectangles
        draw_data(attr_list, (xoff, xoff + square_size),
                  (yoff, yoff + square_size), 0, "", len(attr_list))
        draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size))
        self.update_selection_rects()
Beispiel #10
0
class OWAggregateColumns(widget.OWWidget):
    name = "Aggregate Columns"
    description = "Compute a sum, max, min ... of selected columns."
    icon = "icons/AggregateColumns.svg"
    priority = 100
    keywords = [
        "aggregate", "sum", "product", "max", "min", "mean", "median",
        "variance"
    ]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False

    settingsHandler = DomainContextHandler()
    variables: List[Variable] = ContextSetting([])
    operation = Setting("Sum")
    var_name = Setting("agg")
    auto_apply = Setting(True)

    Operations = {
        "Sum": np.nansum,
        "Product": np.nanprod,
        "Min": np.nanmin,
        "Max": np.nanmax,
        "Mean": np.nanmean,
        "Variance": np.nanvar,
        "Median": np.nanmedian
    }
    TimePreserving = ("Min", "Max", "Mean", "Median")

    def __init__(self):
        super().__init__()
        self.data = None

        box = gui.vBox(self.controlArea, box=True)

        self.variable_model = DomainModel(order=DomainModel.MIXED,
                                          valid_types=(ContinuousVariable, ))
        var_list = gui.listView(box,
                                self,
                                "variables",
                                model=self.variable_model,
                                callback=self.commit)
        var_list.setSelectionMode(var_list.ExtendedSelection)

        combo = gui.comboBox(box,
                             self,
                             "operation",
                             label="Operator: ",
                             orientation=Qt.Horizontal,
                             items=list(self.Operations),
                             sendSelectedValue=True,
                             callback=self.commit)
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

        gui.lineEdit(box,
                     self,
                     "var_name",
                     label="Variable name: ",
                     orientation=Qt.Horizontal,
                     callback=self.commit)

        gui.auto_apply(self.controlArea, self)

    @Inputs.data
    def set_data(self, data: Table = None):
        self.closeContext()
        self.data = data
        if self.data:
            self.variable_model.set_domain(data.domain)
            self.info.set_input_summary(len(self.data),
                                        format_summary_details(self.data))
            self.variables.clear()
            self.openContext(data)
        else:
            self.variable_model.set_domain(None)
            self.variables.clear()
            self.info.set_input_summary(self.info.NoInput)
        self.unconditional_commit()

    def commit(self):
        augmented = self._compute_data()
        self.Outputs.data.send(augmented)
        if augmented is None:
            self.info.set_output_summary(self.info.NoOutput)
        else:
            self.info.set_output_summary(len(augmented),
                                         format_summary_details(augmented))

    def _compute_data(self):
        if not self.data or not self.variables:
            return self.data

        new_col = self._compute_column()
        new_var = self._new_var()
        return self.data.add_column(new_var, new_col)

    def _compute_column(self):
        arr = np.empty((len(self.data), len(self.variables)))
        for i, var in enumerate(self.variables):
            arr[:, i] = self.data.get_column_view(var)[0].astype(float)
        func = self.Operations[self.operation]
        return func(arr, axis=1)

    def _new_var_name(self):
        return get_unique_names(self.data.domain, self.var_name)

    def _new_var(self):
        name = self._new_var_name()
        if self.operation in self.TimePreserving \
                and all(isinstance(var, TimeVariable) for var in self.variables):
            return TimeVariable(name)
        return ContinuousVariable(name)

    def send_report(self):
        # fp for self.variables, pylint: disable=unsubscriptable-object
        if not self.data or not self.variables:
            return
        var_list = ", ".join(f"'{var.name}'"
                             for var in self.variables[:31][:-1])
        if len(self.variables) > 30:
            var_list += f" and {len(self.variables) - 30} others"
        else:
            var_list += f" and '{self.variables[-1].name}'"
        self.report_items(((
            "Output:",
            f"'{self._new_var_name()}' as {self.operation.lower()} of {var_list}"
        ), ))
Beispiel #11
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"
    priority = 220
    keywords = []

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    vizrank = SettingProvider(MosaicVizRank)
    settings_version = 2
    use_boxes = Setting(True)
    variable1 = ContextSetting(None)
    variable2 = ContextSetting(None)
    variable3 = ContextSetting(None)
    variable4 = ContextSetting(None)
    variable_color = ContextSetting(None)
    selection = Setting(set(), schema_only=True)

    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [
        QColor(255, 255, 255),
        QColor(210, 210, 255),
        QColor(110, 110, 255),
        QColor(0, 0, 255)
    ]
    RED_COLORS = [
        QColor(255, 255, 255),
        QColor(255, 200, 200),
        QColor(255, 100, 100),
        QColor(255, 0, 0)
    ]
    graph_name = "canvas"

    attrs_changed_manually = Signal(list)

    class Warning(OWWidget.Warning):
        incompatible_subset = Msg("Data subset is incompatible with Data")
        no_valid_data = Msg("No valid data")
        no_cont_selection_sql = \
            Msg("Selection of numeric features on SQL is not supported")

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.subset_data = None
        self.subset_indices = None
        self.__pending_selection = self.selection
        self.selection = set()

        self.color_data = None

        self.areas = []

        self.canvas = QGraphicsScene(self)
        self.canvas_view = ViewWithPress(self.canvas,
                                         handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.model_1 = DomainModel(order=DomainModel.MIXED,
                                   valid_types=DomainModel.PRIMITIVE)
        self.model_234 = DomainModel(order=DomainModel.MIXED,
                                     valid_types=DomainModel.PRIMITIVE,
                                     placeholder="(None)")
        self.attr_combos = [
            gui.comboBox(box,
                         self,
                         value="variable{}".format(i),
                         orientation=Qt.Horizontal,
                         contentsLength=12,
                         searchable=True,
                         callback=self.attr_changed,
                         model=self.model_1 if i == 1 else self.model_234)
            for i in range(1, 5)
        ]
        self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank(
            box, self, "Find Informative Mosaics", self.set_attr)

        box2 = gui.vBox(self.controlArea, box="Interior Coloring")
        self.color_model = DomainModel(order=DomainModel.MIXED,
                                       valid_types=DomainModel.PRIMITIVE,
                                       placeholder="(Pearson residuals)")
        self.cb_attr_color = gui.comboBox(box2,
                                          self,
                                          value="variable_color",
                                          orientation=Qt.Horizontal,
                                          contentsLength=12,
                                          labelWidth=50,
                                          searchable=True,
                                          callback=self.set_color_data,
                                          model=self.color_model)
        self.bar_button = gui.checkBox(box2,
                                       self,
                                       'use_boxes',
                                       label='Compare with total',
                                       callback=self.update_graph)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(720, 530)

    def _get_discrete_data(self, data):
        """
        Discretize continuous attributes.
        Return None when there is no data, no rows, or no primitive attributes.
        """
        if (data is None or not len(data) or not any(
                attr.is_discrete or attr.is_continuous
                for attr in chain(data.domain.variables, data.domain.metas))):
            return None
        elif any(attr.is_continuous for attr in data.domain.variables):
            return Discretize(method=EqualFreq(n=4),
                              remove_const=False,
                              discretize_classes=True,
                              discretize_metas=True)(data)
        else:
            return data

    def init_combos(self, data):
        def set_combos(value):
            self.model_1.set_domain(value)
            self.model_234.set_domain(value)
            self.color_model.set_domain(value)

        if data is None:
            set_combos(None)
            self.variable1 = self.variable2 = self.variable3 \
                = self.variable4 = self.variable_color = None
            return
        set_combos(self.data.domain)

        if len(self.model_1) > 0:
            self.variable1 = self.model_1[0]
            self.variable2 = self.model_1[min(1, len(self.model_1) - 1)]
        self.variable3 = self.variable4 = None
        self.variable_color = self.data.domain.class_var  # None is OK, too

    def get_disc_attr_list(self):
        return [
            self.discrete_data.domain[var.name]
            for var in (self.variable1, self.variable2, self.variable3,
                        self.variable4) if var
        ]

    def set_attr(self, *attrs):
        self.variable1, self.variable2, self.variable3, self.variable4 = [
            attr and self.data.domain[attr.name] for attr in attrs
        ]
        self.reset_graph()

    def attr_changed(self):
        self.attrs_changed_manually.emit(self.get_disc_attr_list())
        self.reset_graph()

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    @Inputs.data
    def set_data(self, data):
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            self.discrete_data = None
            self.init_combos(None)
            return

        self.init_combos(self.data)
        self.openContext(self.data)

    @Inputs.data_subset
    def set_subset_data(self, data):
        self.subset_data = data

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.Warning.incompatible_subset.clear()
        self.subset_indices = None
        if self.data is not None and self.subset_data:
            transformed = self.subset_data.transform(self.data.domain)
            if np.all(np.isnan(transformed.X)) \
                    and np.all(np.isnan(transformed.Y)):
                self.Warning.incompatible_subset()
            else:
                indices = {e.id for e in transformed}
                self.subset_indices = [ex.id in indices for ex in self.data]
        if self.data is not None and self.__pending_selection is not None:
            self.selection = self.__pending_selection
            self.__pending_selection = None
        else:
            self.selection = set()
        self.set_color_data()
        self.update_graph()
        self.send_selection()

    def clear_selection(self):
        self.selection = set()
        self.update_selection_rects()
        self.send_selection()

    def coloring_changed(self):
        self.vizrank.coloring_changed()
        self.update_graph()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def set_color_data(self):
        if self.data is None:
            return
        self.bar_button.setEnabled(self.variable_color is not None)
        attrs = [v for v in self.model_1 if v and v is not self.variable_color]
        domain = Domain(attrs, self.variable_color, None)
        self.color_data = self.data.from_table(domain, self.data)
        self.discrete_data = self._get_discrete_data(self.color_data)
        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(True)
        self.coloring_changed()

    def update_selection_rects(self):
        pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine))
        for i, (_, _, area) in enumerate(self.areas):
            area.setPen(pens[i in self.selection])

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def get_counts(attr_vals, values):
            """Calculate rectangles' widths; if all are 0, they are set to 1."""
            if not attr_vals:
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [
                    conditionaldict[attr_vals + "-" + val] for val in values
                ]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)
            return total, counts

        def draw_data(attr_list,
                      x0_x1,
                      y0_y1,
                      side,
                      condition,
                      total_attrs,
                      used_attrs,
                      used_vals,
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0,
                         x1,
                         y0,
                         y1,
                         "",
                         used_attrs,
                         used_vals,
                         attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            total, counts = get_counts(attr_vals, values)

            # when visualizing the third attribute and the first attribute has
            # the last value, reverse the order in which the boxes are drawn;
            # otherwise, if the last cell, nearest to the labels of the fourth
            # attribute, is empty, we wouldn't be able to position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = to_html(val)
                newattrvals = attr_vals + "-" + val if attr_vals else val

                tooltip = "{}&nbsp;&nbsp;&nbsp;&nbsp;{}: <b>{}</b><br/>".format(
                    condition, attr.name, htmlval)
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1, tooltip, total_attrs,
                                  *args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1, tooltip,
                                  total_attrs, *args)
            draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                      used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs,
                      used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0
            total, counts = get_counts(attr_vals, values)
            aligns = [
                Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter,
                Qt.AlignBottom | Qt.AlignHCenter,
                Qt.AlignLeft | Qt.AlignVCenter
            ]
            align = aligns[side]
            for i, val in enumerate(values):
                if distributiondict[val] != 0:
                    perc = counts[i] / float(total)
                    rwidth = width * perc
                    xs = [
                        x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET,
                        x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET
                    ]
                    ys = [
                        y1 + self.ATTR_VAL_OFFSET,
                        y0 + currpos + height * 0.5 * perc,
                        y0 - self.ATTR_VAL_OFFSET,
                        y0 + currpos + height * 0.5 * perc
                    ]

                    CanvasText(self.canvas,
                               val,
                               xs[side],
                               ys[side],
                               align,
                               max_width=rwidth if side == 0 else None)
                    space = height if side % 2 else width
                    currpos += perc * space + spacing * (total_attrs - side)

            xs = [
                x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET
            ]
            ys = [
                y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET,
                y0 + (y1 - y0) / 2,
                y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET,
                y0 + (y1 - y0) / 2
            ]
            CanvasText(self.canvas,
                       attr.name,
                       xs[side],
                       ys[side],
                       align,
                       bold=True,
                       vertical=side % 2)

        def add_rect(x0,
                     x1,
                     y0,
                     y1,
                     condition,
                     used_attrs,
                     used_vals,
                     attr_vals=""):
            area_index = len(self.areas)
            x1 += (x0 == x1)
            y1 += (y0 == y1)
            # rectangles of width and height 1 are not shown - increase
            y1 += (x1 - x0 + y1 - y0 == 2)
            colors = class_var and [QColor(*col) for col in class_var.colors]

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(self.canvas,
                                           x,
                                           y,
                                           w,
                                           h,
                                           z=z,
                                           onclick=select_area,
                                           **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(self.canvas,
                                       x,
                                       y,
                                       w,
                                       h,
                                       pen_color,
                                       brush_color,
                                       z=z,
                                       onclick=select_area,
                                       **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.variable_color is None:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                    mul, (apriori_dists[i][used_vals[i]] / float(s)
                          for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = float((actual - expected) / sqrt(expected))
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                    condition + "<hr/>" + "Expected instances: %.1f<br>"
                    "Actual instances: %d<br>"
                    "Standardized (Pearson) residual: %.1f" %
                    (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        if self.subset_indices is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[attr_vals +
                                                                "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total, bar_width,
                                         v, 15, color)
                                    total += v

                actual = [
                    conditionaldict[attr_vals + "-" + cls_values[i]]
                    for i in range(len(prior))
                ]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                        "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                        (cls, act, 100.0 * act / n_actual,
                         apr / n_apriori * n_actual, 100.0 * apr / n_apriori)
                        for cls, act, apr in zip(cls_values, actual, apriori))
                else:
                    text = ""
                outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format(
                    condition, n_actual, text[:-4]))

        def create_legend():
            if self.variable_color is None:
                names = [
                    "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                    "Residuals:"
                ]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
                edges = repeat(Qt.black)
            else:
                names = get_variable_values_sorted(class_var)
                edges = colors = [QColor(*col) for col in class_var.colors]

            items = []
            size = 8
            for name, color, edgecolor in zip(names, colors, edges):
                item = QGraphicsItemGroup()
                item.addToGroup(
                    CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                    edgecolor, color))
                item.addToGroup(
                    CanvasText(None, name, size, 0, Qt.AlignVCenter))
                items.append(item)
            return wrap_legend_items(items,
                                     hspacing=20,
                                     vspacing=16 + size,
                                     max_width=self.canvas_view.width() - xoff)

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        attr_list = self.get_disc_attr_list()
        class_var = data.domain.class_var
        if class_var:
            sql = isinstance(data, SqlTable)
            name = not sql and data.name
            # save class_var because it is removed in the next line
            data = data[:, attr_list + [class_var]]
            data.domain.class_var = class_var
            if not sql:
                data.name = name
        else:
            data = data[:, attr_list]
        # TODO: check this
        # data = Preprocessor_dropMissing(data)
        if len(data) == 0:
            self.Warning.no_valid_data()
            return
        else:
            self.Warning.no_valid_data.clear()

        attrs = [attr for attr in attr_list if not attr.values]
        if attrs:
            CanvasText(self.canvas,
                       "Feature {} has no values".format(attrs[0]),
                       (self.canvas_view.width() - 120) / 2,
                       self.canvas_view.height() / 2)
            return
        if self.variable_color is None:
            apriori_dists = [
                get_distribution(data, attr) for attr in attr_list
            ]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(attr)
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        xoff = 20

        # get the maximum width of rectangle
        width = 20
        max_ylabel_w1 = max_ylabel_w2 = 0
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas,
                                  attr_list[3].name,
                                  bold=1,
                                  show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        legend = create_legend()

        # get the maximum height of rectangle
        yoff = 45
        legendoff = yoff + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
        square_size = min(
            self.canvas_view.width() - width - 20,
            self.canvas_view.height() - legendoff -
            legend.boundingRect().height())

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(),
                                      self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if self.subset_indices:
            conditionalsubsetdict, _ = get_conditional_distribution(
                self.discrete_data[self.subset_indices], attr_list)

        # draw rectangles
        draw_data(attr_list, (xoff, xoff + square_size),
                  (yoff, yoff + square_size), 0, "", len(attr_list), [], [])

        self.canvas.addItem(legend)
        legend.setPos(
            xoff - legend.boundingRect().x() +
            max(0, (square_size - legend.boundingRect().width()) / 2),
            legendoff + square_size)
        self.update_selection_rects()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            settings.migrate_str_to_variable(context,
                                             none_placeholder="(None)")
Beispiel #12
0
class OWSOM(OWWidget):
    name = "Self-Organizing Map"
    description = "Computation of self-organizing map."
    icon = "icons/SOM.svg"
    keywords = ["SOM"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    auto_dimension = Setting(True)
    size_x = Setting(10)
    size_y = Setting(10)
    hexagonal = Setting(1)
    initialization = Setting(0)

    attr_color = ContextSetting(None)
    size_by_instances = Setting(True)
    pie_charts = Setting(False)
    selection = Setting(None, schema_only=True)

    graph_name = "view"

    _grid_pen = QPen(QBrush(QColor(224, 224, 224)), 2)
    _grid_pen.setCosmetic(True)

    OptControls = namedtuple(
        "OptControls",
        ("shape", "auto_dim", "spin_x", "spin_y", "initialization", "start"))

    class Warning(OWWidget.Warning):
        ignoring_disc_variables = Msg("SOM ignores discrete variables.")
        missing_colors = \
            Msg("Some data instances have undefined value of '{}'.")
        missing_values = \
            Msg("{} data instance{} with undefined value(s) {} not shown.")
        single_attribute = Msg("Data contains a single numeric column.")

    class Error(OWWidget.Error):
        no_numeric_variables = Msg("Data contains no numeric columns.")
        no_defined_rows = Msg("All rows contain at least one undefined value.")

    def __init__(self):
        super().__init__()
        self.__pending_selection = self.selection
        self._optimizer = None
        self._optimizer_thread = None
        self.stop_optimization = False

        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.selection = None
        self.colors = self.thresholds = self.bin_labels = None

        box = gui.vBox(self.controlArea, box="SOM")
        shape = gui.comboBox(box,
                             self,
                             "",
                             items=("Hexagonal grid", "Square grid"))
        shape.setCurrentIndex(1 - self.hexagonal)

        box2 = gui.indentedBox(box, 10)
        auto_dim = gui.checkBox(box2,
                                self,
                                "auto_dimension",
                                "Set dimensions automatically",
                                callback=self.on_auto_dimension_changed)
        self.manual_box = box3 = gui.hBox(box2)
        spinargs = dict(value="",
                        widget=box3,
                        master=self,
                        minv=5,
                        maxv=100,
                        step=5,
                        alignment=Qt.AlignRight)
        spin_x = gui.spin(**spinargs)
        spin_x.setValue(self.size_x)
        gui.widgetLabel(box3, "×")
        spin_y = gui.spin(**spinargs)
        spin_y.setValue(self.size_y)
        gui.rubber(box3)
        self.manual_box.setEnabled(not self.auto_dimension)

        initialization = gui.comboBox(box,
                                      self,
                                      "initialization",
                                      items=("Initialize with PCA",
                                             "Random initialization",
                                             "Replicable random"))

        start = gui.button(box,
                           self,
                           "Restart",
                           callback=self.restart_som_pressed,
                           sizePolicy=(QSizePolicy.MinimumExpanding,
                                       QSizePolicy.Fixed))

        self.opt_controls = self.OptControls(shape, auto_dim, spin_x, spin_y,
                                             initialization, start)

        box = gui.vBox(self.controlArea, "Color")
        gui.comboBox(box,
                     self,
                     "attr_color",
                     maximumContentsLength=15,
                     callback=self.on_attr_color_change,
                     model=DomainModel(placeholder="(Same color)",
                                       valid_types=DomainModel.PRIMITIVE))
        gui.checkBox(box,
                     self,
                     "pie_charts",
                     label="Show pie charts",
                     callback=self.on_pie_chart_change)
        gui.checkBox(box,
                     self,
                     "size_by_instances",
                     label="Size by number of instances",
                     callback=self.on_attr_size_change)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene(self)

        self.view = SomView(self.scene)
        self.view.setMinimumWidth(400)
        self.view.setMinimumHeight(400)
        self.view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.selection_changed.connect(self.on_selection_change)
        self.view.selection_moved.connect(self.on_selection_move)
        self.view.selection_mark_changed.connect(self.on_selection_mark_change)
        self.mainArea.layout().addWidget(self.view)

        self.elements = None
        self.grid = None
        self.grid_cells = None
        self.legend = None

    @Inputs.data
    def set_data(self, data):
        def prepare_data():
            if len(cont_attrs) < len(attrs):
                self.Warning.ignoring_disc_variables()
            if len(cont_attrs) == 1:
                self.Warning.single_attribute()
            x = Table.from_table(Domain(cont_attrs), data).X
            if sp.issparse(x):
                self.data = data
                self.cont_x = x.tocsr()
            else:
                mask = np.all(np.isfinite(x), axis=1)
                if not np.any(mask):
                    self.Error.no_defined_rows()
                else:
                    if np.all(mask):
                        self.data = data
                        self.cont_x = x.copy()
                    else:
                        self.data = data[mask]
                        self.cont_x = x[mask]
                    self.cont_x -= np.min(self.cont_x, axis=0)[None, :]
                    sums = np.sum(self.cont_x, axis=0)[None, :]
                    sums[sums == 0] = 1
                    self.cont_x /= sums

        def set_warnings():
            missing = len(data) - len(self.data)
            if missing == 1:
                self.Warning.missing_values(1, "", "is")
            elif missing > 1:
                self.Warning.missing_values(missing, "s", "are")

        self.stop_optimization_and_wait()

        self.closeContext()
        self.clear()
        self.Error.clear()
        self.Warning.clear()

        if data is not None:
            attrs = data.domain.attributes
            cont_attrs = [var for var in attrs if var.is_continuous]
            if not cont_attrs:
                self.Error.no_numeric_variables()
            else:
                prepare_data()

        if self.data is not None:
            self.controls.attr_color.model().set_domain(data.domain)
            self.attr_color = data.domain.class_var
            set_warnings()

        self.openContext(self.data)
        self.set_color_bins()
        self.create_legend()
        self.recompute_dimensions()
        self._set_input_summary(data and len(data))
        self.start_som()

    def _set_input_summary(self, n_tot):
        if self.data is None:
            self.info.set_input_summary(self.info.NoInput)
            return

        n = len(self.data)
        inst = str(n)
        nvars = f"{self.cont_x.shape[1]} numeric variables"
        if n < n_tot:
            inst += f" ({n_tot})"
            details = f"{n_tot - n} out of {n_tot} instances ignored " \
                      f"because of missing values;\n{nvars}"
        else:
            details = f"{n} instances; {nvars}"

        self.info.set_input_summary(inst, details)

    def clear(self):
        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.attr_color = None
        self.colors = self.thresholds = self.bin_labels = None
        if self.elements is not None:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.clear_selection()
        self.controls.attr_color.model().set_domain(None)
        self.Warning.clear()
        self.Error.clear()

    def recompute_dimensions(self):
        if not self.auto_dimension or self.cont_x is None:
            return
        dim = max(5, int(np.ceil(np.sqrt(5 * np.sqrt(self.cont_x.shape[0])))))
        self.opt_controls.spin_x.setValue(dim)
        self.opt_controls.spin_y.setValue(dim)

    def on_auto_dimension_changed(self):
        self.manual_box.setEnabled(not self.auto_dimension)
        if self.auto_dimension:
            self.recompute_dimensions()
        else:
            spin_x = self.opt_controls.spin_x
            spin_y = self.opt_controls.spin_y
            dimx = int(5 * np.round(spin_x.value() / 5))
            dimy = int(5 * np.round(spin_y.value() / 5))
            spin_x.setValue(dimx)
            spin_y.setValue(dimy)

    def on_attr_color_change(self):
        self.controls.pie_charts.setEnabled(self.attr_color is not None)
        self.set_color_bins()
        self.create_legend()
        self.rescale()
        self._redraw()

    def on_attr_size_change(self):
        self._redraw()

    def on_pie_chart_change(self):
        self._redraw()

    def clear_selection(self):
        self.selection = None
        self.redraw_selection()

    def on_selection_change(self, selection, action=SomView.SelectionSet):
        if self.selection is None:
            self.selection = np.zeros(self.grid_cells.T.shape, dtype=np.int16)
        if action == SomView.SelectionSet:
            self.selection[:] = 0
            self.selection[selection] = 1
        elif action == SomView.SelectionAddToGroup:
            self.selection[selection] = max(1, np.max(self.selection))
        elif action == SomView.SelectionNewGroup:
            self.selection[selection] = 1 + np.max(self.selection)
        elif action & SomView.SelectionRemove:
            self.selection[selection] = 0
        self.redraw_selection()
        self.update_output()

    def on_selection_move(self, event: QKeyEvent):
        if self.selection is None or not np.any(self.selection):
            if event.key() in (Qt.Key_Right, Qt.Key_Down):
                x = y = 0
            else:
                x = self.size_x - 1
                y = self.size_y - 1
        else:
            x, y = np.nonzero(self.selection)
            if len(x) > 1:
                return
            if event.key() == Qt.Key_Up and y > 0:
                y -= 1
            if event.key() == Qt.Key_Down and y < self.size_y - 1:
                y += 1
            if event.key() == Qt.Key_Left and x:
                x -= 1
            if event.key() == Qt.Key_Right and x < self.size_x - 1:
                x += 1
            x -= self.hexagonal and x == self.size_x - 1 and y % 2

        if self.selection is not None and self.selection[x, y]:
            return
        selection = np.zeros(self.grid_cells.shape, dtype=bool)
        selection[x, y] = True
        self.on_selection_change(selection)

    def on_selection_mark_change(self, marks):
        self.redraw_selection(marks=marks)

    def redraw_selection(self, marks=None):
        if self.grid_cells is None:
            return

        sel_pen = QPen(QBrush(QColor(128, 128, 128)), 2)
        sel_pen.setCosmetic(True)
        mark_pen = QPen(QBrush(QColor(128, 128, 128)), 4)
        mark_pen.setCosmetic(True)
        pens = [self._grid_pen, sel_pen]

        mark_brush = QBrush(QColor(224, 255, 255))
        sels = self.selection is not None and np.max(self.selection)
        palette = ColorPaletteGenerator(number_of_colors=sels + 1)
        brushes = [QBrush(Qt.NoBrush)] + \
                  [QBrush(palette[i].lighter(165)) for i in range(sels)]

        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                cell = self.grid_cells[y, x]
                marked = marks is not None and marks[x, y]
                sel_group = self.selection is not None and self.selection[x, y]
                if marked:
                    cell.setBrush(mark_brush)
                    cell.setPen(mark_pen)
                else:
                    cell.setBrush(brushes[sel_group])
                    cell.setPen(pens[bool(sel_group)])
                cell.setZValue(marked or sel_group)

    def restart_som_pressed(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
        else:
            self.start_som()

    def start_som(self):
        self.read_controls()
        self.update_layout()
        self.clear_selection()
        if self.cont_x is not None:
            self.enable_controls(False)
            self._recompute_som()
        else:
            self.update_output()

    def read_controls(self):
        c = self.opt_controls
        self.hexagonal = c.shape.currentIndex() == 0
        self.size_x = c.spin_x.value()
        self.size_y = c.spin_y.value()

    def enable_controls(self, enable):
        c = self.opt_controls
        c.shape.setEnabled(enable)
        c.auto_dim.setEnabled(enable)
        c.start.setText("Start" if enable else "Stop")

    def update_layout(self):
        self.set_legend_pos()
        if self.elements:  # Prevent having redrawn grid but with old elements
            self.scene.removeItem(self.elements)
            self.elements = None
        self.redraw_grid()
        self.rescale()

    def _redraw(self):
        self.Warning.missing_colors.clear()
        if self.elements:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.view.set_dimensions(self.size_x, self.size_y, self.hexagonal)

        if self.cells is None:
            return
        sizes = self.cells[:, :, 1] - self.cells[:, :, 0]
        sizes = sizes.astype(float)
        if not self.size_by_instances:
            sizes[sizes != 0] = 0.8
        else:
            sizes *= 0.8 / np.max(sizes)

        self.elements = QGraphicsItemGroup()
        self.scene.addItem(self.elements)
        if self.attr_color is None:
            self._draw_same_color(sizes)
        elif self.pie_charts:
            self._draw_pie_charts(sizes)
        else:
            self._draw_colored_circles(sizes)

    @property
    def _grid_factors(self):
        return (0.5, sqrt3_2) if self.hexagonal else (0, 1)

    def _draw_same_color(self, sizes):
        fx, fy = self._grid_factors
        color = QColor(64, 64, 64)
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                n = len(self.get_member_indices(x, y))
                if not r:
                    continue
                ellipse = ColoredCircle(r / 2, color, 0)
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(f"{n} instances")
                self.elements.addToGroup(ellipse)

    def _get_color_column(self):
        color_column = \
            self.data.get_column_view(self.attr_color)[0].astype(float,
                                                                 copy=False)
        if self.attr_color.is_discrete:
            with np.errstate(invalid="ignore"):
                int_col = color_column.astype(int)
            int_col[np.isnan(color_column)] = len(self.colors)
        else:
            int_col = np.zeros(len(color_column), dtype=int)
            # The following line is not necessary because rows with missing
            # numeric data are excluded. Uncomment it if you change SOM to
            # tolerate missing values.
            # int_col[np.isnan(color_column)] = len(self.colors)
            for i, thresh in enumerate(self.thresholds, start=1):
                int_col[color_column >= thresh] = i
        return int_col

    def _tooltip(self, colors, distribution):
        if self.attr_color.is_discrete:
            values = self.attr_color.values
        else:
            values = self._bin_names()
        tot = np.sum(distribution)
        nbhp = "\N{NON-BREAKING HYPHEN}"
        return '<table style="white-space: nowrap">' + "".join(f"""
            <tr>
                <td>
                    <font color={color.name()}>■</font>
                    <b>{escape(val).replace("-", nbhp)}</b>:
                </td>
                <td>
                    {n} ({n / tot * 100:.1f}&nbsp;%)
                </td>
            </tr>
            """ for color, val, n in zip(colors, values, distribution) if n) \
            + "</table>"

    def _draw_pie_charts(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        colors = self.colors + [Qt.gray]
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    self.grid_cells[y, x].setToolTip("")
                    continue
                members = self.get_member_indices(x, y)
                color_dist = np.bincount(color_column[members],
                                         minlength=len(colors))
                rel_color_dist = color_dist.astype(float) / len(members)
                pie = PieChart(rel_color_dist, r / 2, colors)
                pie.setToolTip(self._tooltip(colors, color_dist))
                self.elements.addToGroup(pie)
                pie.setPos(x + (y % 2) * fx, y * fy)

    def _draw_colored_circles(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    continue
                members = self.get_member_indices(x, y)
                color_dist = color_column[members]
                color_dist = color_dist[color_dist < len(self.colors)]
                if len(color_dist) != len(members):
                    self.Warning.missing_colors(self.attr_color.name)
                bc = np.bincount(color_dist, minlength=len(self.colors))
                color = self.colors[np.argmax(bc)]
                ellipse = ColoredCircle(r / 2, color,
                                        np.max(bc) / len(members))
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(self._tooltip(self.colors, bc))
                self.elements.addToGroup(ellipse)

    def redraw_grid(self):
        if self.grid is not None:
            self.scene.removeItem(self.grid)
        self.grid = QGraphicsItemGroup()
        self.grid.setZValue(-200)
        self.grid_cells = np.full((self.size_y, self.size_x), None)
        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                if self.hexagonal:
                    cell = QGraphicsPathItem(_hexagon_path)
                    cell.setPos(x + (y % 2) / 2, y * sqrt3_2)
                else:
                    cell = QGraphicsRectItem(x - 0.5, y - 0.5, 1, 1)
                self.grid_cells[y, x] = cell
                cell.setPen(self._grid_pen)
                self.grid.addToGroup(cell)
        self.scene.addItem(self.grid)

    def get_member_indices(self, x, y):
        i, j = self.cells[x, y]
        return self.member_data[i:j]

    def _recompute_som(self):
        if self.cont_x is None:
            return

        class Optimizer(QObject):
            update = Signal(float, np.ndarray, np.ndarray)
            done = Signal(SOM)
            stopped = Signal()

            def __init__(self, data, widget):
                super().__init__()
                self.som = SOM(
                    widget.size_x,
                    widget.size_y,
                    hexagonal=widget.hexagonal,
                    pca_init=widget.initialization == 0,
                    random_seed=0 if widget.initialization == 2 else None)
                self.data = data
                self.widget = widget

            def callback(self, progress):
                self.update.emit(progress, self.som.weights.copy(),
                                 self.som.ssum_weights.copy())
                return not self.widget.stop_optimization

            def run(self):
                try:
                    self.som.fit(self.data,
                                 N_ITERATIONS,
                                 callback=self.callback)
                    # Report an exception, but still remove the thread
                finally:
                    self.done.emit(self.som)
                    self.stopped.emit()

        def update(_progress, weights, ssum_weights):
            progressbar.advance()
            self._assign_instances(weights, ssum_weights)
            self._redraw()

        def done(som):
            self.enable_controls(True)
            progressbar.finish()
            self._assign_instances(som.weights, som.ssum_weights)
            self._redraw()
            # This is the first time we know what was selected (assuming that
            # initialization is not set to random)
            if self.__pending_selection is not None:
                self.on_selection_change(self.__pending_selection)
                self.__pending_selection = None
            self.update_output()

        def thread_finished():
            self._optimizer = None
            self._optimizer_thread = None

        progressbar = gui.ProgressBar(self, N_ITERATIONS)

        self._optimizer = Optimizer(self.cont_x, self)
        self._optimizer_thread = QThread()
        self._optimizer_thread.setStackSize(5 * 2**20)
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._optimizer_thread.quit)
        self._optimizer.moveToThread(self._optimizer_thread)
        self._optimizer_thread.started.connect(self._optimizer.run)
        self._optimizer_thread.finished.connect(thread_finished)
        self.stop_optimization = False
        self._optimizer_thread.start()

    def stop_optimization_and_wait(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
            self._optimizer_thread.quit()
            self._optimizer_thread.wait()
            self._optimizer_thread = None

    def onDeleteWidget(self):
        self.stop_optimization_and_wait()
        self.clear()
        super().onDeleteWidget()

    def _assign_instances(self, weights, ssum_weights):
        if self.cont_x is None:
            return  # the widget is shutting down while signals still processed
        assignments = SOM.winner_from_weights(self.cont_x, weights,
                                              ssum_weights, self.hexagonal)
        members = defaultdict(list)
        for i, (x, y) in enumerate(assignments):
            members[(x, y)].append(i)
        members.pop(None, None)
        self.cells = np.empty((self.size_x, self.size_y, 2), dtype=int)
        self.member_data = np.empty(self.cont_x.shape[0], dtype=int)
        index = 0
        for x in range(self.size_x):
            for y in range(self.size_y):
                nmembers = len(members[(x, y)])
                self.member_data[index:index + nmembers] = members[(x, y)]
                self.cells[x, y] = [index, index + nmembers]
                index += nmembers

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.create_legend()  # re-wrap lines if necessary
        self.rescale()

    def rescale(self):
        if self.legend:
            leg_height = self.legend.boundingRect().height()
            leg_extra = 1.5
        else:
            leg_height = 0
            leg_extra = 1

        vw, vh = self.view.width(), self.view.height() - leg_height
        scale = min(vw / (self.size_x + 1),
                    vh / ((self.size_y + leg_extra) * self._grid_factors[1]))
        self.view.setTransform(QTransform.fromScale(scale, scale))
        if self.hexagonal:
            self.view.setSceneRect(0, -1, self.size_x - 1,
                                   (self.size_y + leg_extra) * sqrt3_2 +
                                   leg_height / scale)
        else:
            self.view.setSceneRect(-0.25, -0.25, self.size_x - 0.5,
                                   self.size_y - 0.5 + leg_height / scale)

    def update_output(self):
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return

        indices = np.zeros(len(self.data), dtype=int)
        if self.selection is not None and np.any(self.selection):
            for y in range(self.size_y):
                for x in range(self.size_x):
                    rows = self.get_member_indices(x, y)
                    indices[rows] = self.selection[x, y]

        if np.any(indices):
            sel_data = create_groups_table(self.data, indices, False, "Group")
            self.Outputs.selected_data.send(sel_data)
            self.info.set_output_summary(str(len(sel_data)))
        else:
            self.Outputs.selected_data.send(None)
            self.info.set_output_summary(self.info.NoOutput)

        if np.max(indices) > 1:
            annotated = create_groups_table(self.data, indices)
        else:
            annotated = create_annotated_table(self.data,
                                               np.flatnonzero(indices))
        self.Outputs.annotated_data.send(annotated)

    def set_color_bins(self):
        if self.attr_color is None:
            self.thresholds = self.bin_labels = self.colors = None
        elif self.attr_color.is_discrete:
            self.thresholds = self.bin_labels = None
            self.colors = [QColor(*color) for color in self.attr_color.colors]
        else:
            col = self.data.get_column_view(self.attr_color)[0].astype(float)
            if self.attr_color.is_time:
                binning = time_binnings(col, min_bins=4)[-1]
            else:
                binning = decimal_binnings(col, min_bins=4)[-1]
            self.thresholds = binning.thresholds[1:-1]
            self.bin_labels = (binning.labels[1:-1],
                               binning.short_labels[1:-1])
            palette = ContinuousPaletteGenerator(*self.attr_color.colors)
            nbins = len(self.thresholds) + 1
            self.colors = [palette[i / (nbins - 1)] for i in range(nbins)]

    def create_legend(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)
            self.legend = None
        if self.attr_color is None:
            return

        if self.attr_color.is_discrete:
            names = self.attr_color.values
        else:
            names = self._bin_names()

        items = []
        size = 8
        for name, color in zip(names, self.colors):
            item = QGraphicsItemGroup()
            item.addToGroup(
                CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                Qt.gray, color))
            item.addToGroup(CanvasText(None, name, size, 0, Qt.AlignVCenter))
            items.append(item)

        self.legend = wrap_legend_items(items,
                                        hspacing=20,
                                        vspacing=16 + size,
                                        max_width=self.view.width() - 25)
        self.legend.setFlags(self.legend.ItemIgnoresTransformations)
        self.legend.setTransform(
            QTransform.fromTranslate(-self.legend.boundingRect().width() / 2,
                                     0))
        self.scene.addItem(self.legend)
        self.set_legend_pos()

    def _bin_names(self):
        labels, short_labels = self.bin_labels
        return \
            [f"< {labels[0]}"] \
            + [f"{x} - {y}" for x, y in zip(labels, short_labels[1:])] \
            + [f"≥ {labels[-1]}"]

    def set_legend_pos(self):
        if self.legend is None:
            return
        self.legend.setPos(self.size_x / 2,
                           (self.size_y + 0.2 + 0.3 * self.hexagonal) *
                           self._grid_factors[1])

    def send_report(self):
        self.report_plot()
        if self.attr_color:
            self.report_caption(
                f"Self-organizing map colored by '{self.attr_color.name}'")
Beispiel #13
0
class OWRadviz(OWAnchorProjectionWidget):
    name = "Radviz"
    description = "显示RadViz投影"
    icon = "icons/Radviz.svg"
    priority = 241
    keywords = ["viz"]

    settings_version = 3

    selected_vars = ContextSetting([])
    vizrank = SettingProvider(RadvizVizRank)
    GRAPH_CLASS = OWRadvizGraph
    graph = SettingProvider(OWRadvizGraph)

    class Warning(OWAnchorProjectionWidget.Warning):
        invalid_embedding = widget.Msg("No projection for selected features")
        removed_vars = widget.Msg("Categorical variables with more than"
                                  " two values are not shown.")
        max_vars_selected = widget.Msg(
            "Maximum number of selected variables reached.")

    def _add_controls(self):
        self.model_selected = VariableSelectionModel(self.selected_vars,
                                                     max_vars=20)
        variables_selection(self.controlArea, self, self.model_selected)
        self.model_selected.selection_changed.connect(
            self.__model_selected_changed)
        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "建议特征", self.vizrank_set_attrs)
        self.controlArea.layout().addWidget(self.btn_vizrank)
        super()._add_controls()
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    @property
    def primitive_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [
            v for v in chain(dom.variables, dom.metas)
            if v.is_continuous or v.is_discrete and len(v.values) == 2
        ]

    @property
    def effective_variables(self):
        return self.selected_vars

    def vizrank_set_attrs(self, *attrs):
        if not attrs:
            return
        self.selected_vars[:] = attrs
        # Ugly, but the alternative is to have yet another signal to which
        # the view will have to connect
        self.model_selected.selection_changed.emit()

    def __model_selected_changed(self):
        if self.model_selected.is_full():
            self.Warning.max_vars_selected()
        else:
            self.Warning.max_vars_selected.clear()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        self._init_vizrank()
        self.init_projection()

    def _init_vizrank(self):
        is_enabled = self.data is not None and \
            len(self.primitive_variables) > 3 and \
            self.attr_color is not None and \
            not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all() and \
            np.sum(np.all(np.isfinite(self.data.X), axis=1)) > 1 and \
            np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        super().check_data()
        if self.data is not None:
            domain = self.data.domain
            vars_ = chain(domain.variables, domain.metas)
            n_vars = sum(v.is_primitive() for v in vars_)
            if len(self.primitive_variables) < n_vars:
                self.Warning.removed_vars()

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars[:] = self.primitive_variables[:5]
        self.model_selected[:] = self.primitive_variables

    def _manual_move(self, anchor_idx, x, y):
        angle = np.arctan2(y, x)
        super()._manual_move(anchor_idx, np.cos(angle), np.sin(angle))

    def _send_components_x(self):
        components_ = super()._send_components_x()
        angle = np.arctan2(*components_[::-1])
        return np.row_stack((components_, angle))

    def _send_components_metas(self):
        return np.vstack((super()._send_components_metas(), ["angle"]))

    def clear(self):
        super().clear()
        self.projector = RadViz()

    @classmethod
    def migrate_context(cls, context, version):
        values = context.values
        if version < 2:
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
        if version < 3 and "selected_vars" in values:
            values["selected_vars"] = (values["selected_vars"], -3)
class OWExplainModel(OWWidget, ConcurrentWidgetMixin):
    name = "Explain Model"
    description = "Model explanation widget."
    keywords = ["explain", "explain prediction", "explain model"]
    icon = "icons/ExplainModel.svg"
    priority = 100
    replaces = [
        "orangecontrib.prototypes.widgets.owexplainmodel.OWExplainModel"
    ]

    class Inputs:
        data = Input("Data", Table, default=True)
        model = Input("Model", Model)

    class Outputs:
        selected_data = Output("Selected Data", Table)
        scores = Output("Scores", Table)

    class Error(OWWidget.Error):
        domain_transform_err = Msg("{}")
        unknown_err = Msg("{}")

    class Information(OWWidget.Information):
        data_sampled = Msg("Data has been sampled.")

    settingsHandler = ClassValuesContextHandler()
    target_index = ContextSetting(0)
    n_attributes = Setting(10)
    show_legend = Setting(True)
    selection = Setting((), schema_only=True)  # type: Tuple[str, List[int]]
    auto_send = Setting(True)
    visual_settings = Setting({}, schema_only=True)

    graph_name = "scene"

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.__results = None  # type: Optional[Results]
        self.data = None  # type: Optional[Table]
        self.model = None  # type: Optional[Model]
        self._violin_plot = None  # type: Optional[ViolinPlot]
        self.setup_gui()
        self.__pending_selection = self.selection

        initial = ViolinPlot().parameter_setter.initial_settings
        VisualSettingsDialog(self, initial)

    def setup_gui(self):
        self._add_controls()
        self._add_plot()
        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    def _add_plot(self):
        self.scene = GraphicsScene()
        self.view = GraphicsView(self.scene)
        self.view.resized.connect(self.update_plot)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def _add_controls(self):
        box = gui.vBox(self.controlArea, "Target class")
        self._target_combo = gui.comboBox(
            box, self, "target_index",
            callback=self.__target_combo_changed,
            contentsLength=12)

        box = gui.hBox(self.controlArea, "Display features")
        gui.label(box, self, "Best ranked: ")
        gui.spin(box, self, "n_attributes", 1, ViolinPlot.MAX_N_ITEMS,
                 controlWidth=80, callback=self.__n_spin_changed)
        box = gui.hBox(self.controlArea, True)
        gui.checkBox(box, self, "show_legend", "Show legend",
                     callback=self.__show_check_changed)

        gui.rubber(self.controlArea)
        box = gui.vBox(self.controlArea, box=True)
        gui.auto_send(box, self, "auto_send", box=False)

    def __target_combo_changed(self):
        self.update_scene()
        self.clear_selection()

    def __n_spin_changed(self):
        if self._violin_plot is not None:
            self._violin_plot.set_n_visible(self.n_attributes)

    def __show_check_changed(self):
        if self._violin_plot is not None:
            self._violin_plot.show_legend(self.show_legend)

    @Inputs.data
    @check_sql_input
    def set_data(self, data: Optional[Table]):
        self.data = data
        summary = len(data) if data else self.info.NoInput
        details = format_summary_details(data) if data else ""
        self.info.set_input_summary(summary, details)

    @Inputs.model
    def set_model(self, model: Optional[Model]):
        self.closeContext()
        self.model = model
        self.setup_controls()
        self.openContext(self.model.domain.class_var if self.model else None)

    def setup_controls(self):
        self._target_combo.clear()
        self._target_combo.setEnabled(True)
        if self.model is not None:
            if self.model.domain.has_discrete_class:
                self._target_combo.addItems(self.model.domain.class_var.values)
                self.target_index = 0
            elif self.model.domain.has_continuous_class:
                self.target_index = -1
                self._target_combo.setEnabled(False)
            else:
                raise NotImplementedError

    def handleNewSignals(self):
        self.clear()
        self.start(run, self.data, self.model)

    def clear(self):
        self.__results = None
        self.cancel()
        self.clear_selection()
        self.clear_scene()
        self.clear_messages()

    def clear_selection(self):
        if self.selection:
            self.selection = ()
            self.commit()

    def clear_scene(self):
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self.view.setSceneRect(QRectF())
        self.view.setHeaderSceneRect(QRectF())
        self.view.setFooterSceneRect(QRectF())
        self._violin_plot = None

    def commit(self):
        if not self.selection or not self.selection[1]:
            self.info.set_output_summary(self.info.NoOutput)
            self.Outputs.selected_data.send(None)
        else:
            data = self.data[self.selection[1]]
            detail = format_summary_details(data)
            self.info.set_output_summary(len(data), detail)
            self.Outputs.selected_data.send(data)

    def update_scene(self):
        self.clear_scene()
        scores = None
        if self.__results is not None:
            assert isinstance(self.__results.x, list)
            x = self.__results.x[self.target_index]
            scores_x = np.mean(np.abs(x), axis=0)
            indices = np.argsort(scores_x)[::-1]
            colors = self.__results.colors
            names = [self.__results.names[i] for i in indices]
            if x.shape[1]:
                self.setup_plot(x[:, indices], colors[:, indices], names)
            scores = self.create_scores_table(scores_x, self.__results.names)
        self.Outputs.scores.send(scores)

    def setup_plot(self, x: np.ndarray, colors: np.ndarray, names: List[str]):
        width = int(self.view.viewport().rect().width())
        self._violin_plot = ViolinPlot()
        self._violin_plot.set_data(x, colors, names, self.n_attributes, width)
        self._violin_plot.show_legend(self.show_legend)
        self._violin_plot.apply_visual_settings(self.visual_settings)
        self._violin_plot.selection_cleared.connect(self.clear_selection)
        self._violin_plot.selection_changed.connect(self.update_selection)
        self._violin_plot.layout().activate()
        self._violin_plot.geometryChanged.connect(self.update_scene_rect)
        self._violin_plot.resized.connect(self.update_plot)
        self.scene.addItem(self._violin_plot)
        self.scene.mouse_clicked.connect(self._violin_plot.deselect)
        self.update_scene_rect()
        self.update_plot()

    def update_plot(self):
        if self._violin_plot is not None:
            width = int(self.view.viewport().rect().width())
            self._violin_plot.rescale(width)

    def update_selection(self, min_val: float, max_val: float, attr_name: str):
        assert self.__results is not None
        x = self.__results.x[self.target_index]
        column = self.__results.names.index(attr_name)
        mask = self.__results.mask.copy()
        mask[self.__results.mask] = np.logical_and(x[:, column] <= max_val,
                                                   x[:, column] >= min_val)
        if not self.selection and not any(mask):
            return
        self.selection = (attr_name, list(np.flatnonzero(mask)))
        self.commit()

    def update_scene_rect(self):
        def extend_horizontal(rect):
            rect = QRectF(rect)
            rect.setLeft(geom.left())
            rect.setRight(geom.right())
            return rect

        geom = self._violin_plot.geometry()
        self.scene.setSceneRect(geom)
        self.view.setSceneRect(geom)

        footer_geom = self._violin_plot.bottom_axis.geometry()
        footer = extend_horizontal(footer_geom.adjusted(0, -3, 0, 10))
        self.view.setFooterSceneRect(footer)

    @staticmethod
    def create_scores_table(scores: np.ndarray, names: List[str]):
        domain = Domain([ContinuousVariable("Score")],
                        metas=[StringVariable("Feature")])
        scores_table = Table(domain, scores[:, None],
                             metas=np.array(names)[:, None])
        scores_table.name = "Feature Scores"
        return scores_table

    def on_partial_result(self, _):
        pass

    def on_done(self, results: Optional[Results]):
        self.__results = results
        if self.data and results is not None and not all(results.mask):
            self.Information.data_sampled()
        self.update_scene()
        self.select_pending()

    def select_pending(self):
        if not self.__pending_selection or not self.__pending_selection[1] \
                or self.__results is None:
            return

        attr_name, row_indices = self.__pending_selection
        names = self.__results.names
        if not names or attr_name not in names:
            return
        col_index = names.index(attr_name)
        mask = np.zeros(self.__results.mask.shape, dtype=bool)
        mask[row_indices] = True
        mask = np.logical_and(self.__results.mask, mask)
        row_indices = np.flatnonzero(mask[self.__results.mask])
        column = self.__results.x[self.target_index][row_indices, col_index]
        x1, x2 = np.min(column), np.max(column)
        self._violin_plot.select_from_settings(x1, x2, attr_name)
        self.__pending_selection = ()
        self.unconditional_commit()

    def on_exception(self, ex: Exception):
        if isinstance(ex, DomainTransformationError):
            self.Error.domain_transform_err(ex)
        else:
            self.Error.unknown_err(ex)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(800, 520))

    def send_report(self):
        if not self.data or not self.model:
            return
        items = {"Target class": "None"}
        if self.model.domain.has_discrete_class:
            class_var = self.model.domain.class_var
            items["Target class"] = class_var.values[self.target_index]
        self.report_items(items)
        self.report_plot()

    def set_visual_settings(self, key, value):
        self.visual_settings[key] = value
        if self._violin_plot is not None:
            self._violin_plot.parameter_setter.set_parameter(key, value)
Beispiel #15
0
class OWContingencyTable(widget.OWWidget):
    name = "Contingency Table"
    description = "Construct a contingency table from given data."
    icon = "icons/Contingency.svg"
    priority = 2010

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        contingency = Output("Contingency Table", Table, default=True)
        selected_data = Output("Selected Data", Table)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler(metas_in_res=True)
    rows = ContextSetting(None)
    columns = ContextSetting(None)
    selection = ContextSetting(set())
    auto_apply = Setting(True)

    want_main_area = True

    def __init__(self):
        super().__init__()

        self.data = None
        self.feature_model = DomainModel(valid_types=DiscreteVariable)
        self.table = None

        box = gui.vBox(self.controlArea, "Rows")
        gui.comboBox(box,
                     self,
                     'rows',
                     sendSelectedValue=True,
                     model=self.feature_model,
                     callback=self._attribute_changed)

        box = gui.vBox(self.controlArea, "Columns")
        gui.comboBox(box,
                     self,
                     'columns',
                     sendSelectedValue=True,
                     model=self.feature_model,
                     callback=self._attribute_changed)

        gui.rubber(self.controlArea)

        box = gui.vBox(self.controlArea, "Scores")
        self.scores = gui.widgetLabel(box, "\n\n")

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "&Apply",
                                            box=False)

        self.tableview = ContingencyTable(self)
        self.mainArea.layout().addWidget(self.tableview)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.feature_model.set_domain(None)
        self.rows = None
        self.columns = None
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.feature_model:
                self.rows = self.feature_model[0]
                self.columns = self.feature_model[0]
                self.openContext(data)
                self.tableview.set_variables(self.rows, self.columns)
                self.table = contingency_table(self.data, self.columns,
                                               self.rows)
                self.tableview.update_table(self.table.X, formatstr="{:.0f}")
        else:
            self.tableview.clear()

    def handleNewSignals(self):
        self._attribute_changed()

    def commit(self):
        if len(self.selection):
            cells = []
            for ir, r in enumerate(self.rows.values):
                for ic, c in enumerate(self.columns.values):
                    if (ir, ic) in self.selection:
                        cells.append(
                            Values([
                                FilterDiscrete(self.rows, [r]),
                                FilterDiscrete(self.columns, [c])
                            ]))
            selected_data = Values(cells, conjunction=False)(self.data)
            annotated_data = create_annotated_table(
                self.data,
                np.where(np.in1d(self.data.ids, selected_data.ids, True)))
        else:
            selected_data = None
            annotated_data = create_annotated_table(self.data, [])
        self.Outputs.contingency.send(self.table)
        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        self.selection = self.tableview.get_selection()
        self.commit()

    def _attribute_changed(self):
        self.tableview.set_selection(self.selection)
        self.table = None
        if self.data and self.rows and self.columns:
            self.tableview.set_variables(self.rows, self.columns)
            self.table = contingency_table(self.data, self.columns, self.rows)
            self.tableview.update_table(self.table.X, formatstr="{:.0f}")

            chi = ChiSqStats(self.data, self.rows, self.columns)
            vardata1 = self.data.get_column_view(self.rows.name)[0]
            vardata2 = self.data.get_column_view(self.columns.name)[0]
            self.scores.setText(
                "ARI: {:.3f}\nAMI: {:.3f}\nχ²={:.2f}, p={:.3f}".format(
                    adjusted_rand_score(vardata1, vardata2),
                    adjusted_mutual_info_score(vardata1, vardata2), chi.chisq,
                    chi.p))
        else:
            self.scores.setText("\n\n")
        self._invalidate()

    def send_report(self):
        rows = None
        columns = None
        if self.data is not None:
            rows = self.rows
            if rows in self.data.domain:
                rows = self.data.domain[rows]
            columns = self.columns
            if columns in self.data.domain:
                columns = self.data.domain[columns]
        self.report_items((
            ("Rows", rows),
            ("Columns", columns),
        ))
Beispiel #16
0
class OWDifference(widget.OWWidget):
    name = 'Difference'
    description = 'Make the time series stationary by replacing it with ' \
                  '1st or 2nd order discrete difference along its values. '
    icon = 'icons/Difference.svg'
    priority = 570
    keywords = ['difference', 'derivative', 'quotient', 'percent change']

    class Inputs:
        time_series = Input("Time series", Table)

    class Outputs:
        time_series = Output("Time series", Timeseries)

    settingsHandler = DomainContextHandler()
    selected = ContextSetting([], schema_only=True)

    class Operation(str, Enum):
        DIFF = 'Difference'
        QUOT = 'Quotient'
        PERC = 'Percentage change'

    want_main_area = False
    resizing_enabled = False

    chosen_operation = settings.Setting(Operation.DIFF)
    diff_order = settings.Setting(1)
    shift_period = settings.Setting(1)
    invert_direction = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message('Series can be differentiated up to the 2nd order. '
                       'However, if the series is shifted by other than 1 '
                       'step, a differencing order of 1 is always assumed.',
                       'diff-shift')
    ]

    def __init__(self):
        self.data = None

        box = gui.vBox(self.controlArea, 'Differencing')

        gui.comboBox(box, self, 'chosen_operation',
                     orientation=Qt.Horizontal,
                     items=[el.value for el in self.Operation],
                     label='Compute:',
                     callback=self.on_changed,
                     sendSelectedValue=True)

        self.order_spin = gui.spin(
            box, self, 'diff_order', 1, 2,
            label='Differencing order:',
            callback=self.on_changed,
            tooltip='The value corresponds to n-th order numerical '
                    'derivative of the series. \nThe order is fixed to 1 '
                    'if the shift period is other than 1.')
        gui.spin(box, self, 'shift_period', 1, 100,
                 label='Shift:',
                 callback=self.on_changed,
                 tooltip='Set this to other than 1 if you don\'t want to '
                         'compute differences for subsequent values but for '
                         'values shifted number of spaces apart. \n'
                         'If this value is different from 1, differencing '
                         'order is fixed to 1.')
        gui.checkBox(box, self, 'invert_direction',
                     label='Invert differencing direction',
                     callback=self.on_changed,
                     tooltip='Influences where the series is padded with nan '
                             'values — at the beginning or at the end.')
        self.view = view = QListView(self,
                                     selectionMode=QListView.ExtendedSelection)
        self.model = model = VariableListModel(parent=self)
        view.setModel(model)
        view.selectionModel().selectionChanged.connect(self.on_changed)
        box.layout().addWidget(view)
        gui.auto_commit(box, self, 'autocommit', '&Apply')

    @Inputs.time_series
    def set_data(self, data):
        self.closeContext()
        self.data = data = None if data is None else \
                           Timeseries.from_data_table(data, detect_time_variable=True)
        if data is not None:
            self.model[:] = [var for var in data.domain.variables
                             if var.is_continuous and var is not
                             data.time_variable]
            self.select_default_variable()
            self.openContext(self.data)
            self._restore_selection()
        else:
            self.reset_model()
        self.on_changed()

    def _restore_selection(self):
        def restore(view, selection):
            with signal_blocking(view.selectionModel()):
                # gymnastics for transforming variable names back to indices
                var_list = [var for var in self.data.domain.variables
                            if var.is_continuous and var is not
                            self.data.time_variable]
                indices = [var_list.index(i) for i in selection]
                select_rows(view, indices)
        restore(self.view, self.selected)

    def select_default_variable(self):
        self.selected = [0]
        select_rows(self.view, self.selected)

    def reset_model(self):
        self.model.wrap([])

    def on_changed(self):
        var_names = [i.row()
                     for i in self.view.selectionModel().selectedRows()]
        self.order_spin.setEnabled(
            self.shift_period == 1
            and self.chosen_operation == self.Operation.DIFF)
        self.selected = [self.model[v] for v in var_names]
        self.commit()

    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.Outputs.time_series.send(None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        op = self.chosen_operation

        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if op == self.Operation.DIFF and shift == 1:
                out[order:] = np.diff(col, order)
                out[:order] = np.nan
            else:
                if op == self.Operation.DIFF:
                    out[shift:] = col[shift:] - col[:-shift]
                else:
                    out[shift:] = np.divide(col[shift:], col[:-shift])
                    if op == self.Operation.PERC:
                        out = (out - 1) * 100
                out[:shift] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            if op == self.Operation.DIFF and shift == 1:
                details = f'order={order}'
            else:
                details = f'shift={shift}'

            template = f'{var} ({op[:4].lower()}; {details})'
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries.from_numpy(Domain(data.domain.attributes + tuple(attrs),
                                          data.domain.class_vars,
                                          data.domain.metas),
                                   np.column_stack((data.X, np.column_stack(X))),
                                   data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.Outputs.time_series.send(ts)
Beispiel #17
0
class OWScatterPlot(OWDataProjectionWidget):
    """Scatterplot visualization with explorative analysis and intelligent
    data visualization enhancements."""

    name = 'Scatter Plot'
    description = "Interactive scatter plot visualization with " \
                  "intelligent data visualization enhancements."
    icon = "icons/ScatterPlot.svg"
    priority = 140
    keywords = []

    class Inputs(OWDataProjectionWidget.Inputs):
        features = Input("Features", AttributeList)

    class Outputs(OWDataProjectionWidget.Outputs):
        features = Output("Features", AttributeList, dynamic=False)

    settings_version = 4
    auto_sample = Setting(True)
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    tooltip_shows_all = Setting(True)

    GRAPH_CLASS = OWScatterPlotGraph
    graph = SettingProvider(OWScatterPlotGraph)
    embedding_variables_names = None

    xy_changed_manually = Signal(Variable, Variable)

    class Warning(OWDataProjectionWidget.Warning):
        missing_coords = Msg("Plot cannot be displayed because '{}' or '{}' "
                             "is missing for all data points")
        no_continuous_vars = Msg("Data has no continuous variables")

    class Information(OWDataProjectionWidget.Information):
        sampled_sql = Msg("Large SQL table; showing a sample.")
        missing_coords = Msg(
            "Points with missing '{}' or '{}' are not displayed")

    def __init__(self):
        self.sql_data = None  # Orange.data.sql.table.SqlTable
        self.attribute_selection_list = None  # list of Orange.data.Variable
        self.__timer = QTimer(self, interval=1200)
        self.__timer.timeout.connect(self.add_data)
        super().__init__()

        # manually register Matplotlib file writers
        self.graph_writers = self.graph_writers.copy()
        for w in [MatplotlibFormat, MatplotlibPDFFormat]:
            for ext in w.EXTENSIONS:
                self.graph_writers[ext] = w

    def _add_controls(self):
        self._add_controls_axis()
        self._add_controls_sampling()
        super()._add_controls()
        self.gui.add_widgets([
            self.gui.ShowGridLines, self.gui.ToolTipShowsAll,
            self.gui.RegressionLine
        ], self._plot_box)
        gui.checkBox(
            gui.indentedBox(self._plot_box),
            self,
            value="graph.orthonormal_regression",
            label="Treat variables as independent",
            callback=self.graph.update_regression_line,
            tooltip=
            "If checked, fit line to group (minimize distance from points);\n"
            "otherwise fit y as a function of x (minimize vertical distances)")

    def _add_controls_axis(self):
        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str,
                              contentsLength=14)
        box = gui.vBox(self.controlArea, True)
        dmod = DomainModel
        self.xy_model = DomainModel(dmod.MIXED, valid_types=ContinuousVariable)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self.set_attr_from_combo,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self.set_attr_from_combo,
                                      model=self.xy_model,
                                      **common_options)
        vizrank_box = gui.hBox(box)
        self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank(
            vizrank_box, self, "Find Informative Projections", self.set_attr)

    def _add_controls_sampling(self):
        self.sampling = gui.auto_commit(self.controlArea,
                                        self,
                                        "auto_sample",
                                        "Sample",
                                        box="Sampling",
                                        callback=self.switch_sampling,
                                        commit=lambda: self.add_data(1))
        self.sampling.setVisible(False)

    @property
    def effective_variables(self):
        return [self.attr_x, self.attr_y]

    def _vizrank_color_change(self):
        self.vizrank.initialize()
        is_enabled = self.data is not None and not self.data.is_sparse() and \
            len(self.xy_model) > 2 and len(self.data[self.valid_data]) > 1 \
            and np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.vizrank_button.setEnabled(
            is_enabled and self.attr_color is not None and not np.isnan(
                self.data.get_column_view(
                    self.attr_color)[0].astype(float)).all())
        text = "Color variable has to be selected." \
            if is_enabled and self.attr_color is None else ""
        self.vizrank_button.setToolTip(text)

    def set_data(self, data):
        if self.data and data and self.data.checksum() == data.checksum():
            return
        super().set_data(data)

        def findvar(name, iterable):
            """Find a Orange.data.Variable in `iterable` by name"""
            for el in iterable:
                if isinstance(el, Variable) and el.name == name:
                    return el
            return None

        # handle restored settings from  < 3.3.9 when attr_* were stored
        # by name
        if isinstance(self.attr_x, str):
            self.attr_x = findvar(self.attr_x, self.xy_model)
        if isinstance(self.attr_y, str):
            self.attr_y = findvar(self.attr_y, self.xy_model)
        if isinstance(self.attr_label, str):
            self.attr_label = findvar(self.attr_label, self.gui.label_model)
        if isinstance(self.attr_color, str):
            self.attr_color = findvar(self.attr_color, self.gui.color_model)
        if isinstance(self.attr_shape, str):
            self.attr_shape = findvar(self.attr_shape, self.gui.shape_model)
        if isinstance(self.attr_size, str):
            self.attr_size = findvar(self.attr_size, self.gui.size_model)

    def check_data(self):
        self.clear_messages()
        self.__timer.stop()
        self.sampling.setVisible(False)
        self.sql_data = None
        if isinstance(self.data, SqlTable):
            if self.data.approx_len() < 4000:
                self.data = Table(self.data)
            else:
                self.Information.sampled_sql()
                self.sql_data = self.data
                data_sample = self.data.sample_time(0.8, no_cache=True)
                data_sample.download_data(2000, partial=True)
                self.data = Table(data_sample)
                self.sampling.setVisible(True)
                if self.auto_sample:
                    self.__timer.start()

        if self.data is not None:
            if not self.data.domain.has_continuous_attributes(True, True):
                self.Warning.no_continuous_vars()
                self.data = None

        if self.data is not None and (len(self.data) == 0
                                      or len(self.data.domain) == 0):
            self.data = None

    def get_embedding(self):
        self.valid_data = None
        if self.data is None:
            return None

        x_data = self.get_column(self.attr_x, filter_valid=False)
        y_data = self.get_column(self.attr_y, filter_valid=False)
        if x_data is None or y_data is None:
            return None

        self.Warning.missing_coords.clear()
        self.Information.missing_coords.clear()
        self.valid_data = np.isfinite(x_data) & np.isfinite(y_data)
        if self.valid_data is not None and not np.all(self.valid_data):
            msg = self.Information if np.any(self.valid_data) else self.Warning
            msg.missing_coords(self.attr_x.name, self.attr_y.name)
        return np.vstack((x_data, y_data)).T

    # Tooltip
    def _point_tooltip(self, point_id, skip_attrs=()):
        point_data = self.data[point_id]
        xy_attrs = (self.attr_x, self.attr_y)
        text = "<br/>".join(
            escape('{} = {}'.format(var.name, point_data[var]))
            for var in xy_attrs)
        if self.tooltip_shows_all:
            others = super()._point_tooltip(point_id, skip_attrs=xy_attrs)
            if others:
                text = "<b>{}</b><br/><br/>{}".format(text, others)
        return text

    def add_data(self, time=0.4):
        if self.data and len(self.data) > 2000:
            self.__timer.stop()
            return
        data_sample = self.sql_data.sample_time(time, no_cache=True)
        if data_sample:
            data_sample.download_data(2000, partial=True)
            data = Table(data_sample)
            self.data = Table.concatenate((self.data, data), axis=0)
            self.handleNewSignals()

    def init_attr_values(self):
        super().init_attr_values()
        data = self.data
        domain = data.domain if data and len(data) else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def switch_sampling(self):
        self.__timer.stop()
        if self.auto_sample and self.sql_data:
            self.add_data()
            self.__timer.start()

    def set_subset_data(self, subset_data):
        self.warning()
        if isinstance(subset_data, SqlTable):
            if subset_data.approx_len() < AUTO_DL_LIMIT:
                subset_data = Table(subset_data)
            else:
                self.warning("Data subset does not support large Sql tables")
                subset_data = None
        super().set_subset_data(subset_data)

    # called when all signals are received, so the graph is updated only once
    def handleNewSignals(self):
        if self.attribute_selection_list and self.data is not None and \
                self.data.domain is not None and \
                all(attr in self.data.domain for attr
                        in self.attribute_selection_list):
            self.attr_x, self.attr_y = self.attribute_selection_list[:2]
            self.attribute_selection_list = None
        super().handleNewSignals()
        self._vizrank_color_change()

    @Inputs.features
    def set_shown_attributes(self, attributes):
        if attributes and len(attributes) >= 2:
            self.attribute_selection_list = attributes[:2]
            self._invalidated = self._invalidated \
                or self.attr_x != attributes[0] \
                or self.attr_y != attributes[1]
        else:
            self.attribute_selection_list = None

    def set_attr(self, attr_x, attr_y):
        if attr_x != self.attr_x or attr_y != self.attr_y:
            self.attr_x, self.attr_y = attr_x, attr_y
            self.attr_changed()

    def set_attr_from_combo(self):
        self.attr_changed()
        self.xy_changed_manually.emit(self.attr_x, self.attr_y)

    def attr_changed(self):
        self.setup_plot()
        self.commit()

    def get_axes(self):
        return {"bottom": self.attr_x, "left": self.attr_y}

    def colors_changed(self):
        super().colors_changed()
        self._vizrank_color_change()

    def commit(self):
        super().commit()
        self.send_features()

    def send_features(self):
        features = [attr for attr in [self.attr_x, self.attr_y] if attr]
        self.Outputs.features.send(features or None)

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)
        return None

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2 and "selection" in settings and settings["selection"]:
            settings["selection_group"] = [(a, 1)
                                           for a in settings["selection"]]
        if version < 3:
            if "auto_send_selection" in settings:
                settings["auto_commit"] = settings["auto_send_selection"]
            if "selection_group" in settings:
                settings["selection"] = settings["selection_group"]

    @classmethod
    def migrate_context(cls, context, version):
        values = context.values
        if version < 3:
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
        if version < 4:
            if values["attr_x"][1] % 100 == 1 or values["attr_y"][1] % 100 == 1:
                raise IncompatibleContext()
class OWStackAlign(OWWidget):
    # Widget's name as displayed in the canvas
    name = "Align Stack"

    # Short widget description
    description = ("Aligns and crops a stack of images using various methods.")

    icon = "icons/stackalign.svg"

    # Define inputs and outputs
    class Inputs:
        data = Input("Stack of images", Table, default=True)

    class Outputs:
        newstack = Output("Aligned image stack", Table, default=True)

    class Error(OWWidget.Error):
        nan_in_image = Msg("Unknown values within images: {} unknowns")
        invalid_axis = Msg("Invalid axis: {}")

    autocommit = settings.Setting(True)

    want_main_area = True
    want_control_area = True
    resizing_enabled = False

    settingsHandler = DomainContextHandler()

    sobel_filter = settings.Setting(False)
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    ref_frame_num = settings.Setting(0)

    def __init__(self):
        super().__init__()

        # TODO: add input box for selecting which should be the reference frame
        box = gui.widgetBox(self.controlArea, "Axes")

        common_options = dict(labelWidth=50,
                              orientation=Qt.Horizontal,
                              sendSelectedValue=True,
                              valueType=str)
        self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES,
                                    valid_types=ContinuousVariable)
        self.cb_attr_x = gui.comboBox(box,
                                      self,
                                      "attr_x",
                                      label="Axis x:",
                                      callback=self._update_attr,
                                      model=self.xy_model,
                                      **common_options)
        self.cb_attr_y = gui.comboBox(box,
                                      self,
                                      "attr_y",
                                      label="Axis y:",
                                      callback=self._update_attr,
                                      model=self.xy_model,
                                      **common_options)

        self.contextAboutToBeOpened.connect(self._init_interface_data)

        box = gui.widgetBox(self.controlArea, "Parameters")

        gui.checkBox(box,
                     self,
                     "sobel_filter",
                     label="Use sobel filter",
                     callback=self._sobel_changed)
        gui.separator(box)
        hbox = gui.hBox(box)
        self.le1 = lineEditIntRange(box,
                                    self,
                                    "ref_frame_num",
                                    bottom=1,
                                    default=1,
                                    callback=self._ref_frame_changed)
        hbox.layout().addWidget(QLabel("Reference frame:", self))
        hbox.layout().addWidget(self.le1)

        gui.rubber(self.controlArea)

        plot_box = gui.widgetBox(self.mainArea, "Shift curves")
        self.plotview = pg.PlotWidget(background="w")
        plot_box.layout().addWidget(self.plotview)
        # TODO:  resize widget to make it a bit smaller

        self.data = None

        gui.auto_commit(self.controlArea, self, "autocommit", "Send Data")

    def _sanitize_ref_frame(self):
        if self.ref_frame_num > self.data.X.shape[1]:
            self.ref_frame_num = self.data.X.shape[1]

    def _ref_frame_changed(self):
        self._sanitize_ref_frame()
        self.commit()

    def _sobel_changed(self):
        self.commit()

    def _init_attr_values(self, data):
        domain = data.domain if data is not None else None
        self.xy_model.set_domain(domain)
        self.attr_x = self.xy_model[0] if self.xy_model else None
        self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \
            else self.attr_x

    def _init_interface_data(self, args):
        data = args[0]
        same_domain = (self.data and data and data.domain == self.data.domain)
        if not same_domain:
            self._init_attr_values(data)

    def _update_attr(self):
        self.commit()

    @Inputs.data
    def set_data(self, dataset):
        self.closeContext()
        self.openContext(dataset)
        if dataset is not None:
            self.data = dataset
            self._sanitize_ref_frame()
        else:
            self.data = None
        self.Error.nan_in_image.clear()
        self.Error.invalid_axis.clear()
        self.commit()

    def commit(self):
        new_stack = None

        self.Error.nan_in_image.clear()
        self.Error.invalid_axis.clear()

        self.plotview.plotItem.clear()

        if self.data and len(
                self.data.domain.attributes) and self.attr_x and self.attr_y:
            try:
                shifts, new_stack = process_stack(
                    self.data,
                    self.attr_x,
                    self.attr_y,
                    upsample_factor=100,
                    use_sobel=self.sobel_filter,
                    ref_frame_num=self.ref_frame_num - 1)
            except NanInsideHypercube as e:
                self.Error.nan_in_image(e.args[0])
            except InvalidAxisException as e:
                self.Error.invalid_axis(e.args[0])
            else:
                # TODO: label axes
                frames = np.linspace(1, shifts.shape[0], shifts.shape[0])
                self.plotview.plotItem.plot(frames,
                                            shifts[:, 0],
                                            pen=pg.mkPen(color=(255, 40, 0),
                                                         width=3),
                                            symbol='o',
                                            symbolBrush=(255, 40, 0),
                                            symbolPen='w',
                                            symbolSize=7)
                self.plotview.plotItem.plot(frames,
                                            shifts[:, 1],
                                            pen=pg.mkPen(color=(0, 139, 139),
                                                         width=3),
                                            symbol='o',
                                            symbolBrush=(0, 139, 139),
                                            symbolPen='w',
                                            symbolSize=7)
                self.plotview.getPlotItem().setLabel('bottom', 'Frame number')
                self.plotview.getPlotItem().setLabel('left', 'Shift / pixel')
                self.plotview.getPlotItem().addLine(
                    self.ref_frame_num,
                    pen=pg.mkPen(color=(150, 150, 150),
                                 width=3,
                                 style=Qt.DashDotDotLine))

        self.Outputs.newstack.send(new_stack)

    def send_report(self):
        self.report_items((("Use sobel filter", str(self.sobel_filter)), ))
Beispiel #19
0
class OWDistanceMatrix(widget.OWWidget):
    name = "Distance Matrix"
    description = "View distance matrix."
    icon = "icons/DistanceMatrix.svg"
    priority = 200

    inputs = [("Distances", DistMatrix, "set_distances")]
    outputs = [("Distances", DistMatrix), ("Table", Table)]

    settingsHandler = DistanceMatrixContextHandler()
    auto_commit = Setting(True)
    annotation_idx = ContextSetting(1)
    selection = ContextSetting([])

    want_control_area = False

    def __init__(self):
        super().__init__()
        self.distances = None
        self.items = None

        self.tablemodel = DistanceMatrixModel()
        view = self.tableview = QTableView()
        view.setEditTriggers(QTableView.NoEditTriggers)
        view.setItemDelegate(TableBorderItem())
        view.setModel(self.tablemodel)
        view.setShowGrid(False)
        for header in (view.horizontalHeader(), view.verticalHeader()):
            header.setResizeMode(QHeaderView.ResizeToContents)
            header.setHighlightSections(True)
            header.setClickable(False)
        view.verticalHeader().setDefaultAlignment(Qt.AlignRight
                                                  | Qt.AlignVCenter)
        selmodel = SymmetricSelectionModel(view.model(), view)
        view.setSelectionModel(selmodel)
        view.setSelectionBehavior(QTableView.SelectItems)
        self.mainArea.layout().addWidget(view)

        settings_box = gui.hBox(self.mainArea)

        self.annot_combo = gui.comboBox(settings_box,
                                        self,
                                        "annotation_idx",
                                        label="Labels: ",
                                        orientation=Qt.Horizontal,
                                        callback=self._invalidate_annotations,
                                        contentsLength=12)
        self.annot_combo.setModel(VariableListModel())
        self.annot_combo.model()[:] = ["None", "Enumeration"]
        gui.rubber(settings_box)
        settings_box.layout().addWidget(self.report_button)
        gui.separator(settings_box, 40)
        acb = gui.auto_commit(settings_box,
                              self,
                              "auto_commit",
                              "Send Selected",
                              "Send Automatically",
                              box=None)
        acb.setFixedWidth(200)
        # Signal must be connected after self.commit is redirected
        selmodel.selectionChanged.connect(self.commit)

    def sizeHint(self):
        return QSize(800, 500)

    def set_distances(self, distances):
        self.closeContext()
        self.distances = distances
        self.tablemodel.set_data(self.distances)
        self.selection = []
        self.tableview.selectionModel().set_selected_items([])

        self.items = items = distances is not None and distances.row_items
        annotations = ["None", "Enumerate"]
        self.annotation_idx = 1
        if items and not distances.axis:
            annotations.append("Attribute names")
            self.annotation_idx = 2
        elif isinstance(items, list) and \
                all(isinstance(item, Variable) for item in items):
            annotations.append("Name")
            self.annotation_idx = 2
        elif isinstance(items, Table):
            annotations.extend(
                itertools.chain(items.domain, items.domain.metas))
            if items.domain.class_var:
                self.annotation_idx = 2 + len(items.domain.attributes)
        self.annot_combo.model()[:] = annotations

        if items:
            self.openContext(distances, annotations)
            self._update_labels()
            self.tableview.resizeColumnsToContents()
        self.commit()

    def _invalidate_annotations(self):
        if self.distances is not None:
            self._update_labels()

    def _update_labels(self):
        var = column = None
        if self.annotation_idx == 0:
            labels = None
        elif self.annotation_idx == 1:
            labels = [str(i + 1) for i in range(self.distances.shape[0])]
        elif self.annot_combo.model()[
                self.annotation_idx] == "Attribute names":
            attr = self.distances.row_items.domain.attributes
            labels = [str(attr[i]) for i in range(self.distances.shape[0])]
        elif self.annotation_idx == 2 and \
                isinstance(self.items, widget.AttributeList):
            labels = [v.name for v in self.items]
        elif isinstance(self.items, Table):
            var = self.annot_combo.model()[self.annotation_idx]
            column, _ = self.items.get_column_view(var)
            labels = [var.repr_val(value) for value in column]
        saved_selection = self.tableview.selectionModel().selected_items()
        self.tablemodel.set_labels(labels, var, column)
        if labels:
            self.tableview.horizontalHeader().show()
            self.tableview.verticalHeader().show()
        else:
            self.tableview.horizontalHeader().hide()
            self.tableview.verticalHeader().hide()
        self.tableview.resizeColumnsToContents()
        self.tableview.selectionModel().set_selected_items(saved_selection)

    def commit(self):
        sub_table = sub_distances = None
        if self.distances is not None:
            inds = self.tableview.selectionModel().selected_items()
            if inds:
                sub_distances = self.distances.submatrix(inds)
                if self.distances.axis and isinstance(self.items, Table):
                    sub_table = self.items[inds]
        self.send("Distances", sub_distances)
        self.send("Table", sub_table)

    def send_report(self):
        if self.distances is None:
            return
        model = self.tablemodel
        dim = self.distances.shape[0]
        col_cell = model.color_for_cell

        def _rgb(brush):
            return "rgb({}, {}, {})".format(*brush.color().getRgb())

        if model.labels:
            col_label = model.color_for_label
            label_colors = [_rgb(col_label(i)) for i in range(dim)]
            self.report_raw('<table style="border-collapse:collapse">')
            self.report_raw("<tr><td></td>")
            self.report_raw("".join(
                '<td style="background-color: {}">{}</td>'.format(*cv)
                for cv in zip(label_colors, model.labels)))
            self.report_raw("</tr>")
            for i in range(dim):
                self.report_raw("<tr>")
                self.report_raw(
                    '<td style="background-color: {}">{}</td>'.format(
                        label_colors[i], model.labels[i]))
                self.report_raw("".join(
                    '<td style="background-color: {};'
                    'border-top:1px solid {}; border-left:1px solid {};">'
                    '{:.3f}</td>'.format(_rgb(col_cell(i, j)), label_colors[i],
                                         label_colors[j], self.distances[i, j])
                    for j in range(dim)))
                self.report_raw("</tr>")
            self.report_raw("</table>")
        else:
            self.report_raw('<table>')
            for i in range(dim):
                self.report_raw("<tr>" + "".join(
                    '<td style="background-color: {}">{:.3f}</td>'.format(
                        _rgb(col_cell(i, j)), self.distances[i, j])
                    for j in range(dim)) + "</tr>")
            self.report_raw("</table>")
Beispiel #20
0
class OWLoadCorpus(OWWidget):
    name = "Corpus"
    description = "Load a corpus of text documents, (optionally) tagged with categories."
    icon = "icons/TextFile.svg"
    priority = 10

    outputs = [(Output.CORPUS, Corpus)]
    want_main_area = False
    resizing_enabled = False

    dlgFormats = ("All readable files ({});;".format(
        '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join(
            "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
            for f in sorted(set(FileFormat.readers.values()),
                            key=list(FileFormat.readers.values()).index)))

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    recent_files = Setting([])
    used_attrs = ContextSetting([])

    class Error(OWWidget.Error):
        read_file = Msg("Can't read file {} ({})")

    def __init__(self):
        super().__init__()

        self.corpus = None

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)
        widget = widgets.FileWidget(recent_files=self.recent_files,
                                    icon_size=(16, 16),
                                    on_open=self.open_file,
                                    directory_aliases={
                                        "Browse documentation corpora ...":
                                        get_sample_corpora_dir()
                                    },
                                    dialog_format=self.dlgFormats,
                                    dialog_title='Open Orange Document Corpus',
                                    allow_empty=False,
                                    reload_label='Reload',
                                    browse_label='Browse')
        fbox.layout().addWidget(widget)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs_model = VariableListModel(enable_dnd=True)
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs_model)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs_model
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs_model = VariableListModel(enable_dnd=True)
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs_model)
        ibox.layout().addWidget(self.unused_attrs_view)

        # load first file
        widget.select(0)

    def open_file(self, path):
        self.closeContext()
        self.Error.read_file.clear()
        self.used_attrs_model[:] = []
        self.unused_attrs_model[:] = []
        if path:
            try:
                self.corpus = Corpus.from_file(path)
                self.corpus.name = os.path.splitext(os.path.basename(path))[0]
                self.info_label.setText("Corpus of {} documents.".format(
                    len(self.corpus)))
                self.used_attrs = list(self.corpus.text_features)
                self.openContext(self.corpus)
                self.used_attrs_model.extend(self.used_attrs)
                self.unused_attrs_model.extend([
                    f for f in self.corpus.domain.metas
                    if f.is_string and f not in self.used_attrs_model
                ])
            except BaseException as err:
                self.Error.read_file(path, str(err))

    def update_feature_selection(self):
        # TODO fix VariablesListItemView so it does not emit
        # duplicated data when reordering inside a single window
        def remove_duplicates(l):
            unique = []
            for i in l:
                if i not in unique:
                    unique.append(i)
            return unique

        if self.corpus is not None:
            self.corpus.set_text_features(
                remove_duplicates(self.used_attrs_model))
            self.send(Output.CORPUS, self.corpus)
            self.used_attrs = list(self.used_attrs_model)
Beispiel #21
0
class OWFile(widget.OWWidget, RecentPathsWComboMixin):
    name = "读取文件"
    id = "orange.widgets.data.file"
    description = "从输入文件或网络加载数据并且" \
                  "发送一个数据表到输出端口。"
    icon = "icons/File.svg"
    priority = 10
    category = "Data"
    keywords = ["file", "load", "read", "open"]

    class Outputs:
        data = Output("Data", Table, doc="Attribute-valued dataset read from the input file.")

    want_main_area = False

    SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())]
    SIZE_LIMIT = 1e7
    LOCAL_FILE, URL = range(2)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL
    )

    # Overload RecentPathsWidgetMixin.recent_paths to set defaults
    recent_paths = Setting([
        RecentPath("", "sample-datasets", "iris.tab"),
        RecentPath("", "sample-datasets", "titanic.tab"),
        RecentPath("", "sample-datasets", "housing.tab"),
        RecentPath("", "sample-datasets", "heart_disease.tab"),
    ])
    recent_urls = Setting([])
    source = Setting(LOCAL_FILE)
    xls_sheet = ContextSetting("")
    sheet_names = Setting({})
    url = Setting("")

    variables = ContextSetting([])

    domain_editor = SettingProvider(DomainEditor)

    class Warning(widget.OWWidget.Warning):
        file_too_big = widget.Msg("The file is too large to load automatically."
                                  " Press Reload to load.")
        load_warning = widget.Msg("Read warning:\n{}")

    class Error(widget.OWWidget.Error):
        file_not_found = widget.Msg("File not found.")
        missing_reader = widget.Msg("Missing reader.")
        sheet_error = widget.Msg("Error listing available sheets.")
        unknown = widget.Msg("Read error:\n{}")

    def __init__(self):
        super().__init__()
        RecentPathsWComboMixin.__init__(self)
        self.domain = None
        self.data = None
        self.loaded_file = ""
        self.reader = None

        layout = QGridLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)
        vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True,
                                callback=self.load_data, addToLayout=False)

        rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False)
        layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter)

        box = gui.hBox(None, addToLayout=False, margin=0)
        box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        self.file_combo.activated[int].connect(self.select_file)
        box.layout().addWidget(self.file_combo)
        layout.addWidget(box, 0, 1)

        file_button = gui.button(
            None, self, '...', callback=self.browse_file, autoDefault=False)
        file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon))
        file_button.setSizePolicy(Policy.Maximum, Policy.Fixed)
        layout.addWidget(file_button, 0, 2)

        reload_button = gui.button(
            None, self, "Reload", callback=self.load_data, autoDefault=False)
        reload_button.setIcon(self.style().standardIcon(
            QStyle.SP_BrowserReload))
        reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed)
        layout.addWidget(reload_button, 0, 3)

        self.sheet_box = gui.hBox(None, addToLayout=False, margin=0)
        self.sheet_combo = gui.comboBox(None, self, "xls_sheet",
                                        callback=self.select_sheet,
                                        sendSelectedValue=True,)
        self.sheet_combo.setSizePolicy(
            Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_label = QLabel()
        self.sheet_label.setText('Sheet')
        self.sheet_label.setSizePolicy(
            Policy.MinimumExpanding, Policy.Fixed)
        self.sheet_box.layout().addWidget(
            self.sheet_label, Qt.AlignLeft)
        self.sheet_box.layout().addWidget(
            self.sheet_combo, Qt.AlignVCenter)
        layout.addWidget(self.sheet_box, 2, 1)
        self.sheet_box.hide()

        rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False)
        layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter)

        self.url_combo = url_combo = QComboBox()
        url_model = NamedURLModel(self.sheet_names)
        url_model.wrap(self.recent_urls)
        url_combo.setLineEdit(LineEditSelectOnFocus())
        url_combo.setModel(url_model)
        url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed)
        url_combo.setEditable(True)
        url_combo.setInsertPolicy(url_combo.InsertAtTop)
        url_edit = url_combo.lineEdit()
        l, t, r, b = url_edit.getTextMargins()
        url_edit.setTextMargins(l + 5, t, r, b)
        layout.addWidget(url_combo, 3, 1, 3, 3)
        url_combo.activated.connect(self._url_set)

        box = gui.vBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, 'No data loaded.')
        self.warnings = gui.widgetLabel(box, '')

        box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)")
        self.domain_editor = DomainEditor(self)
        self.editor_model = self.domain_editor.model()
        box.layout().addWidget(self.domain_editor)

        box = gui.hBox(self.controlArea)
        gui.button(
            box, self, "Browse documentation datasets",
            callback=lambda: self.browse_file(True), autoDefault=False)
        gui.rubber(box)

        self.apply_button = gui.button(
            box, self, "Apply", callback=self.apply_domain_edit)
        self.apply_button.setEnabled(False)
        self.apply_button.setFixedWidth(170)
        self.editor_model.dataChanged.connect(
            lambda: self.apply_button.setEnabled(True))

        self.set_file_list()
        # Must not call open_file from within __init__. open_file
        # explicitly re-enters the event loop (by a progress bar)

        self.setAcceptDrops(True)

        if self.source == self.LOCAL_FILE:
            last_path = self.last_path()
            if last_path and os.path.exists(last_path) and \
                    os.path.getsize(last_path) > self.SIZE_LIMIT:
                self.Warning.file_too_big()
                return

        QTimer.singleShot(0, self.load_data)

    def sizeHint(self):
        return QSize(600, 550)

    def select_file(self, n):
        assert n < len(self.recent_paths)
        super().select_file(n)
        if self.recent_paths:
            self.source = self.LOCAL_FILE
            self.load_data()
            self.set_file_list()

    def select_sheet(self):
        self.recent_paths[0].sheet = self.sheet_combo.currentText()
        self.load_data()

    def _url_set(self):
        url = self.url_combo.currentText()
        pos = self.recent_urls.index(url)
        url = url.strip()

        if not urlparse(url).scheme:
            url = 'http://' + url
            self.url_combo.setItemText(pos, url)
            self.recent_urls[pos] = url

        self.source = self.URL
        self.load_data()

    def browse_file(self, in_demos=False):
        if in_demos:
            start_file = get_sample_datasets_dir()
            if not os.path.exists(start_file):
                QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with documentation datasets")
                return
        else:
            start_file = self.last_path() or os.path.expanduser("~/")

        readers = [f for f in FileFormat.formats
                   if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)]
        filename, reader, _ = open_filename_dialog(start_file, None, readers)
        if not filename:
            return
        self.add_path(filename)
        if reader is not None:
            self.recent_paths[0].file_format = reader.qualified_name()

        self.source = self.LOCAL_FILE
        self.load_data()

    # Open a file, create data from it and send it over the data channel
    def load_data(self):
        # We need to catch any exception type since anything can happen in
        # file readers
        self.closeContext()
        self.domain_editor.set_domain(None)
        self.apply_button.setEnabled(False)
        self.clear_messages()
        self.set_file_list()

        error = self._try_load()
        if error:
            error()
            self.data = None
            self.sheet_box.hide()
            self.Outputs.data.send(None)
            self.info.setText("No data.")

    def _try_load(self):
        # pylint: disable=broad-except
        if self.last_path() and not os.path.exists(self.last_path()):
            return self.Error.file_not_found

        try:
            self.reader = self._get_reader()
            assert self.reader is not None
        except Exception:
            return self.Error.missing_reader

        try:
            self._update_sheet_combo()
        except Exception:
            return self.Error.sheet_error

        with catch_warnings(record=True) as warnings:
            try:
                data = self.reader.read()
            except Exception as ex:
                log.exception(ex)
                return lambda x=ex: self.Error.unknown(str(x))
            if warnings:
                self.Warning.load_warning(warnings[-1].message.args[0])

        self.info.setText(self._describe(data))

        self.loaded_file = self.last_path()
        add_origin(data, self.loaded_file)
        self.data = data
        self.openContext(data.domain)
        self.apply_domain_edit()  # sends data

    def _get_reader(self):
        """

        Returns
        -------
        FileFormat
        """
        if self.source == self.LOCAL_FILE:
            path = self.last_path()
            if self.recent_paths and self.recent_paths[0].file_format:
                qname = self.recent_paths[0].file_format
                reader_class = class_from_qualified_name(qname)
                reader = reader_class(path)
            else:
                reader = FileFormat.get_reader(path)
            if self.recent_paths and self.recent_paths[0].sheet:
                reader.select_sheet(self.recent_paths[0].sheet)
            return reader
        elif self.source == self.URL:
            url = self.url_combo.currentText().strip()
            if url:
                return UrlReader(url)

    def _update_sheet_combo(self):
        if len(self.reader.sheets) < 2:
            self.sheet_box.hide()
            self.reader.select_sheet(None)
            return

        self.sheet_combo.clear()
        self.sheet_combo.addItems(self.reader.sheets)
        self._select_active_sheet()
        self.sheet_box.show()

    def _select_active_sheet(self):
        if self.reader.sheet:
            try:
                idx = self.reader.sheets.index(self.reader.sheet)
                self.sheet_combo.setCurrentIndex(idx)
            except ValueError:
                # Requested sheet does not exist in this file
                self.reader.select_sheet(None)
        else:
            self.sheet_combo.setCurrentIndex(0)

    def _describe(self, table):
        domain = table.domain
        text = ""

        attrs = getattr(table, "attributes", {})
        descs = [attrs[desc]
                 for desc in ("Name", "Description") if desc in attrs]
        if len(descs) == 2:
            descs[0] = "<b>{}</b>".format(descs[0])
        if descs:
            text += "<p>{}</p>".format("<br/>".join(descs))

        text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\
            format(len(table), len(domain.attributes), len(domain.metas))
        if domain.has_continuous_class:
            text += "<br/>Regression; numerical class."
        elif domain.has_discrete_class:
            text += "<br/>Classification; categorical class with {} values.".\
                format(len(domain.class_var.values))
        elif table.domain.class_vars:
            text += "<br/>Multi-target; {} target variables.".format(
                len(table.domain.class_vars))
        else:
            text += "<br/>Data has no target variable."
        text += "</p>"

        if 'Timestamp' in table.domain:
            # Google Forms uses this header to timestamp responses
            text += '<p>First entry: {}<br/>Last entry: {}</p>'.format(
                table[0, 'Timestamp'], table[-1, 'Timestamp'])
        return text

    def storeSpecificSettings(self):
        self.current_context.modified_variables = self.variables[:]

    def retrieveSpecificSettings(self):
        if hasattr(self.current_context, "modified_variables"):
            self.variables[:] = self.current_context.modified_variables

    def apply_domain_edit(self):
        if self.data is None:
            table = None
        else:
            domain, cols = self.domain_editor.get_domain(self.data.domain, self.data)
            if not (domain.variables or domain.metas):
                table = None
            else:
                X, y, m = cols
                table = Table.from_numpy(domain, X, y, m, self.data.W)
                table.name = self.data.name
                table.ids = np.array(self.data.ids)
                table.attributes = getattr(self.data, 'attributes', {})

        self.Outputs.data.send(table)
        self.apply_button.setEnabled(False)

    def get_widget_name_extension(self):
        _, name = os.path.split(self.loaded_file)
        return os.path.splitext(name)[0]

    def send_report(self):
        def get_ext_name(filename):
            try:
                return FileFormat.names[os.path.splitext(filename)[1]]
            except KeyError:
                return "unknown"

        if self.data is None:
            self.report_paragraph("File", "No file.")
            return

        if self.source == self.LOCAL_FILE:
            home = os.path.expanduser("~")
            if self.loaded_file.startswith(home):
                # os.path.join does not like ~
                name = "~" + os.path.sep + \
                       self.loaded_file[len(home):].lstrip("/").lstrip("\\")
            else:
                name = self.loaded_file
            if self.sheet_combo.isVisible():
                name += " ({})".format(self.sheet_combo.currentText())
            self.report_items("File", [("File name", name),
                                       ("Format", get_ext_name(name))])
        else:
            self.report_items("Data", [("Resource", self.url),
                                       ("Format", get_ext_name(self.url))])

        self.report_data("Data", self.data)

    def dragEnterEvent(self, event):
        """Accept drops of valid file urls"""
        urls = event.mimeData().urls()
        if urls:
            try:
                FileFormat.get_reader(OSX_NSURL_toLocalFile(urls[0]) or
                                      urls[0].toLocalFile())
                event.acceptProposedAction()
            except IOError:
                pass

    def dropEvent(self, event):
        """Handle file drops"""
        urls = event.mimeData().urls()
        if urls:
            self.add_path(OSX_NSURL_toLocalFile(urls[0]) or
                          urls[0].toLocalFile())  # add first file
            self.source = self.LOCAL_FILE
            self.load_data()

    def workflowEnvChanged(self, key, value, oldvalue):
        """
        Function called when environment changes (e.g. while saving the scheme)
        It make sure that all environment connected values are modified
        (e.g. relative file paths are changed)
        """
        self.update_file_list(key, value, oldvalue)
Beispiel #22
0
class OWTranspose(OWWidget):
    name = "矩阵转置"
    description = "转置数据表。"
    icon = "icons/Transpose.svg"
    priority = 2000
    keywords = []

    class Inputs:
        data = Input("数据", Table)

    class Outputs:
        data = Output("数据", Table, dynamic=False)

    GENERIC, FROM_META_ATTR = range(2)

    resizing_enabled = False
    want_main_area = False

    DEFAULT_PREFIX = "Feature"

    settingsHandler = DomainContextHandler()
    feature_type = ContextSetting(GENERIC)
    feature_name = ContextSetting("")
    feature_names_column = ContextSetting(None)
    auto_apply = Setting(True)

    class Error(OWWidget.Error):
        value_error = Msg("{}")

    def __init__(self):
        super().__init__()
        self.data = None

        box = gui.radioButtons(self.controlArea,
                               self,
                               "feature_type",
                               box="特征名称",
                               callback=lambda: self.apply())

        button = gui.appendRadioButton(box, "通用")
        edit = gui.lineEdit(gui.indentedBox(box,
                                            gui.checkButtonOffsetHint(button)),
                            self,
                            "feature_name",
                            placeholderText="输入前缀......",
                            toolTip="自定义特征名称")
        edit.editingFinished.connect(self._apply_editing)

        self.meta_button = gui.appendRadioButton(box, "来自元特征:")
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=StringVariable,
                                         alphabetical=True)
        self.feature_combo = gui.comboBox(gui.indentedBox(
            box, gui.checkButtonOffsetHint(button)),
                                          self,
                                          "feature_names_column",
                                          contentsLength=12,
                                          callback=self._feature_combo_changed,
                                          model=self.feature_model)

        self.apply_button = gui.auto_commit(self.controlArea,
                                            self,
                                            "auto_apply",
                                            "应用",
                                            box=False,
                                            commit=self.apply)
        self.apply_button.button.setAutoDefault(False)

        self.set_controls()

    def _apply_editing(self):
        self.feature_type = self.GENERIC
        self.feature_name = self.feature_name.strip()
        self.apply()

    def _feature_combo_changed(self):
        self.feature_type = self.FROM_META_ATTR
        self.apply()

    @Inputs.data
    def set_data(self, data):
        # Skip the context if the combo is empty: a context with
        # feature_model == None would then match all domains
        if self.feature_model:
            self.closeContext()
        self.data = data
        self.set_controls()
        if self.feature_model:
            self.openContext(data)
        self.apply()

    def set_controls(self):
        self.feature_model.set_domain(self.data and self.data.domain)
        self.meta_button.setEnabled(bool(self.feature_model))
        if self.feature_model:
            self.feature_names_column = self.feature_model[0]
            self.feature_type = self.FROM_META_ATTR
        else:
            self.feature_names_column = None

    def apply(self):
        self.clear_messages()
        transposed = None
        if self.data:
            try:
                transposed = Table.transpose(
                    self.data,
                    self.feature_type == self.FROM_META_ATTR
                    and self.feature_names_column,
                    feature_name=self.feature_name or self.DEFAULT_PREFIX)
            except ValueError as e:
                self.Error.value_error(e)
        self.Outputs.data.send(transposed)

    def send_report(self):
        if self.feature_type == self.GENERIC:
            names = self.feature_name or self.DEFAULT_PREFIX
        else:
            names = "from meta attribute"
            if self.feature_names_column:
                names += "  '{}'".format(self.feature_names_column.name)
        self.report_items("", [("Feature names", names)])
        if self.data:
            self.report_data("Data", self.data)
Beispiel #23
0
class OWPivot(OWWidget):
    name = "Pivot Table"
    description = "Reshape data table based on column values."
    icon = "icons/Pivot.svg"
    priority = 1000
    keywords = ["pivot", "group", "aggregate"]

    class Inputs:
        data = Input("Data", Table, default=True)

    class Outputs:
        pivot_table = Output("Pivot Table", Table, default=True)
        filtered_data = Output("Filtered Data", Table)
        grouped_data = Output("Grouped Data", Table)

    class Warning(OWWidget.Warning):
        # TODO - inconsistent for different variable types
        no_col_feature = Msg("Column feature should be selected.")
        cannot_aggregate = Msg("Some aggregations ({}) cannot be performed.")

    settingsHandler = DomainContextHandler()
    row_feature = ContextSetting(None)
    col_feature = ContextSetting(None)
    val_feature = ContextSetting(None)
    sel_agg_functions = Setting(set([Pivot.Count]))
    selection = ContextSetting(set())
    auto_commit = Setting(True)

    AGGREGATIONS = (Pivot.Count, Pivot.Count_defined, None, Pivot.Sum,
                    Pivot.Mean, Pivot.Mode, Pivot.Min, Pivot.Max, Pivot.Median,
                    Pivot.Var, None, Pivot.Majority)

    def __init__(self):
        super().__init__()
        self.data = None  # type: Table
        self.pivot = None  # type: Pivot
        self._add_control_area_controls()
        self._add_main_area_controls()

    def _add_control_area_controls(self):
        box = gui.vBox(self.controlArea, "Rows")
        gui.comboBox(box,
                     self,
                     "row_feature",
                     contentsLength=12,
                     searchable=True,
                     model=DomainModel(valid_types=DomainModel.PRIMITIVE),
                     callback=self.__feature_changed)
        box = gui.vBox(self.controlArea, "Columns")
        gui.comboBox(
            box,
            self,
            "col_feature",
            contentsLength=12,
            searchable=True,
            model=DomainModel(placeholder="(Same as rows)",
                              valid_types=DiscreteVariable),
            callback=self.__feature_changed,
        )
        box = gui.vBox(self.controlArea, "Values")
        gui.comboBox(box,
                     self,
                     "val_feature",
                     contentsLength=12,
                     searchable=True,
                     orientation=Qt.Horizontal,
                     model=DomainModel(placeholder="(None)"),
                     callback=self.__val_feature_changed)
        self.__add_aggregation_controls()
        gui.rubber(self.controlArea)
        gui.auto_apply(self.controlArea, self, "auto_commit")

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    def __add_aggregation_controls(self):
        box = gui.vBox(self.controlArea, "Aggregations")
        for agg in self.AGGREGATIONS:
            if agg is None:
                gui.separator(box, height=1)
                line = QFrame()
                line.setFrameShape(QFrame.HLine)
                line.setLineWidth(1)
                line.setFrameShadow(QFrame.Sunken)
                box.layout().addWidget(line)
                continue
            check_box = QCheckBox(str(agg), box)
            check_box.setChecked(agg in self.sel_agg_functions)
            check_box.clicked.connect(
                lambda *args, a=agg: self.__aggregation_cb_clicked(a, args[0]))
            box.layout().addWidget(check_box)

    def _add_main_area_controls(self):
        self.table_view = PivotTableView()
        self.table_view.selection_changed.connect(self.__invalidate_filtered)
        self.mainArea.layout().addWidget(self.table_view)

    @property
    def no_col_feature(self):
        return self.col_feature is None and self.row_feature is not None \
            and self.row_feature.is_continuous

    @property
    def skipped_aggs(self):
        def add(fun):
            data, var = self.data, self.val_feature
            return data and not var and fun not in Pivot.AutonomousFunctions \
                or var and var.is_discrete and fun in Pivot.ContVarFunctions \
                or var and var.is_continuous and fun in Pivot.DiscVarFunctions

        skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)]
        return ", ".join(sorted(skipped))

    def __feature_changed(self):
        self.selection = set()
        self.pivot = None
        self.commit()

    def __val_feature_changed(self):
        self.selection = set()
        if self.no_col_feature:
            return
        self.pivot.update_pivot_table(self.val_feature)
        self.commit()

    def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions,
                                 checked: bool):
        self.selection = set()
        if checked:
            self.sel_agg_functions.add(agg_fun)
        else:
            self.sel_agg_functions.remove(agg_fun)
        if self.no_col_feature or not self.pivot or not self.data:
            return
        self.pivot.update_group_table(self.sel_agg_functions, self.val_feature)
        self.commit()

    def __invalidate_filtered(self):
        self.selection = self.table_view.get_selection()
        self.commit()

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.data = data
        self.pivot = None
        self.check_data()
        self.init_attr_values()
        self.openContext(self.data)
        self.unconditional_commit()

    def check_data(self):
        self.clear_messages()
        if not self.data:
            self.table_view.clear()
            self.info.set_input_summary(self.info.NoInput)
        else:
            self.info.set_input_summary(len(self.data),
                                        format_summary_details(self.data))

    def init_attr_values(self):
        domain = self.data.domain if self.data and len(self.data) else None
        for attr in ("row_feature", "col_feature", "val_feature"):
            getattr(self.controls, attr).model().set_domain(domain)
            setattr(self, attr, None)
        model = self.controls.row_feature.model()
        if model:
            self.row_feature = model[0]
        model = self.controls.val_feature.model()
        if model and len(model) > 2:
            self.val_feature = domain.variables[0] \
                if domain.variables[0] in model else model[2]

    def commit(self):
        if self.pivot is None:
            self.Warning.no_col_feature.clear()
            if self.no_col_feature:
                self.Warning.no_col_feature()
                return
            self.pivot = Pivot(self.data, self.sel_agg_functions,
                               self.row_feature, self.col_feature,
                               self.val_feature)
        self.Warning.cannot_aggregate.clear()
        if self.skipped_aggs:
            self.Warning.cannot_aggregate(self.skipped_aggs)
        self._update_graph()
        filtered_data = self.get_filtered_data()
        self.Outputs.grouped_data.send(self.pivot.group_table)
        self.Outputs.pivot_table.send(self.pivot.pivot_table)
        self.Outputs.filtered_data.send(filtered_data)

        summary = len(filtered_data) if filtered_data else self.info.NoOutput
        details = format_summary_details(
            filtered_data) if filtered_data else ""
        self.info.set_output_summary(summary, details)

    def _update_graph(self):
        self.table_view.clear()
        if self.pivot.pivot_table:
            col_feature = self.col_feature or self.row_feature
            self.table_view.update_table(col_feature.name,
                                         self.row_feature.name,
                                         *self.pivot.pivot_tables)
            self.table_view.set_selection(self.selection)

    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values([f for f in cond], conjunction=False)(self.data)

    def sizeHint(self):
        return QSize(640, 525)

    def send_report(self):
        self.report_items((("Row feature", self.row_feature),
                           ("Column feature", self.col_feature),
                           ("Value feature", self.val_feature)))
        if self.data and self.val_feature is not None:
            self.report_table("", self.table_view)
        if not self.data:
            self.report_items((("Group by", self.row_feature), ))
            self.report_table(self.table_view)
Beispiel #24
0
class OWAlignDatasets(widget.OWWidget):
    name = "Align Datasets"
    description = "Alignment of multiple datasets with a diagram of correlation visualization."
    icon = "icons/AlignDatasets.svg"
    priority = 240

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        transformed_data = Output("Transformed Data", Table)
        genes_components = Output("Genes per n. Components", Table)

    settingsHandler = PerfectDomainContextHandler()
    axis_labels = ContextSetting(10)
    source_id = ContextSetting(None)
    ncomponents = ContextSetting(20)
    ngenes = ContextSetting(30)
    scoring = ContextSetting(list(SCORINGS.keys())[0])
    quantile_normalization = ContextSetting(False)
    quantile_normalization_perc = ContextSetting(2.5)
    dynamic_time_warping = ContextSetting(False)

    auto_update = Setting(True)
    auto_commit = Setting(True)

    graph_name = "plot.plotItem"

    class Error(widget.OWWidget.Error):
        no_features = widget.Msg("At least 1 feature is required")
        no_instances = widget.Msg(
            "At least 2 data instances are required for each class")
        no_class = widget.Msg("At least 1 Discrete class variable is required")
        nan_class = widget.Msg(
            "Data contains undefined instances for the selected Data source indicator"
        )
        nan_input = widget.Msg("Input data contains non numeric values")
        sparse_data = widget.Msg("Sparse data is not supported")
        only_one_dataset = widget.Msg(
            "Data source indicator attribute column must indicate at least two datasets."
        )

    def __init__(self):
        super().__init__()
        self.data = None
        self.source_id = None
        self._mas = None
        self._Ws = None
        self._transformed = None
        self._components = None
        self._use_genes = None
        self._shared_correlations = None
        self._transformed_table = None
        self._line = False
        self._feature_model = DomainModel(valid_types=DiscreteVariable,
                                          separators=False)
        self._feature_model.set_domain(None)
        self._init_mas()
        self._legend = None
        form = QFormLayout(labelAlignment=Qt.AlignLeft,
                           formAlignment=Qt.AlignLeft,
                           fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow,
                           verticalSpacing=10)
        # Data source indicator
        box = gui.vBox(self.controlArea, "Data source indicator")

        gui.comboBox(
            box,
            self,
            "source_id",
            sendSelectedValue=True,
            callback=self._update_combo_source_id,
            model=self._feature_model,
        )

        # Canonical correlation analysis
        box = gui.vBox(self.controlArea, "Canonical correlation analysis")
        gui.spin(box,
                 self,
                 "ncomponents",
                 1,
                 MAX_COMPONENTS,
                 callback=self._update_selection_component_spin,
                 keyboardTracking=False,
                 label="Num. of components")

        # Shared genes
        box = gui.vBox(self.controlArea, "Shared genes")
        gui.spin(
            box,
            self,
            "ngenes",
            1,
            MAX_GENES,
            callback=self._update_ngenes_spin,
            keyboardTracking=False,
        )
        form.addRow("Num. of genes", self.controls.ngenes)

        gui.comboBox(
            box,
            self,
            "scoring",
            callback=self._update_scoring_combo,
            items=list(SCORINGS.keys()),
            sendSelectedValue=True,
            editable=False,
        )
        form.addRow("Scoring:", self.controls.scoring)

        box.layout().addLayout(form)

        # Post-processing
        box = gui.vBox(self.controlArea, "Post-processing")
        gui.doubleSpin(
            box,
            self,
            "quantile_normalization_perc",
            minv=0,
            maxv=49,
            step=5e-1,
            callback=self._update_quantile_normalization,
            checkCallback=self._update_quantile_normalization,
            controlWidth=80,
            alignment=Qt.AlignRight,
            label="Quantile normalization",
            checked="quantile_normalization",
        )
        self.controls.quantile_normalization_perc.setSuffix("%")
        b = gui.vBox(box)
        gui.checkBox(b,
                     self,
                     "dynamic_time_warping",
                     callback=self._update_dynamic_time_warping,
                     label="Dynamic time warping")

        self.controlArea.layout().addStretch()

        gui.auto_commit(self.controlArea,
                        self,
                        "auto_commit",
                        "Apply",
                        callback=self._invalidate_selection(),
                        checkbox_label="Apply automatically")

        self.plot = pg.PlotWidget(background="w")

        axis = self.plot.getAxis("bottom")
        axis.setLabel("Correlation components")
        axis = self.plot.getAxis("left")
        axis.setLabel("Correlation strength")
        self.plot_horlabels = []
        self.plot_horlines = []

        self.plot.getViewBox().setMenuEnabled(False)
        self.plot.getViewBox().setMouseEnabled(False, False)
        self.plot.showGrid(True, True, alpha=0.5)
        self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0))

        self.mainArea.layout().addWidget(self.plot)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.closeContext()
        self.clear_messages()
        self.clear()
        self.information()
        self.clear_outputs()
        self._feature_model.set_domain(None)
        self.data = data

        if self.data:
            self._feature_model.set_domain(self.data.domain)
            if self._feature_model:
                # if source id is available we assume that it is the feature that describes a dataset source
                if "Source ID" in self.data.domain:
                    self.source_id = self.data.domain["Source ID"]
                self.openContext(self.data.domain)
                if self.source_id is None or self.source_id == '':
                    for model in self._feature_model:
                        y = np.array(self.data.get_column_view(model)[0],
                                     dtype=np.float64)
                        _, counts = np.unique(y, return_counts=True)
                        if np.isfinite(y).all() and min(counts) > 1:
                            self.source_id = model
                            self._reset_max_components()
                            break

                if not self.source_id:
                    self.Error.nan_class()
                    return
                if len(self.data.domain.attributes) == 0:
                    self.Error.no_features()
                    return
                if len(self.data) == 0:
                    self.Error.no_instances()
                    return
                if np.isnan(self.data.X).any():
                    self.Error.nan_input()
                    return
                y = np.array(self.data.get_column_view(self.source_id)[0],
                             dtype=np.float64)
                _, counts = np.unique(y, return_counts=True)
                if min(counts) < 2:
                    self.Error.no_instances()
                    return
                self._reset_max_components()
                self.fit()

            else:
                self.Error.no_class()
                self.clear()
                return

    def fit(self):
        if self.data is None:
            return
        global MAX_COMPONENTS
        if self.ncomponents > MAX_COMPONENTS:
            self.ncomponents = MAX_COMPONENTS

        X = self.data.X
        y = self.data.get_column_view(self.source_id)[0]

        if len(set(y)) < 2:
            self.Error.only_one_dataset()
            return
        self._init_mas()

        self._Ws = self._mas.fit(X, y)
        self._shared_correlations = self._mas.shared_correlations
        if np.isnan(np.sum(self._shared_correlations)):
            self._shared_correlations = np.array(
                [interpolate_nans(x) for x in self._shared_correlations])
        self._use_genes = self._mas.use_genes

        self._setup_plot()
        if self.auto_commit:
            self.commit()

    def clear(self):
        self.data = None
        self.source_id = None
        self._mas = None
        self._Ws = None
        self._transformed = None
        self._transformed_table = None
        self._components = None
        self._use_genes = None
        self._shared_correlations = None
        self._feature_model.set_domain(None)
        self.clear_plot()

    def clear_legend(self):
        if self._legend is None:
            return

        scene = self._legend.scene()
        if scene is None:
            return

        scene.removeItem(self._legend)
        self._legend = None

    def clear_plot(self):
        self.clear_legend()
        self._line = False
        self.plot_horlabels = []
        self.plot_horlines = []
        self._mas = None
        self._setup_plot()

    def clear_outputs(self):
        self.Outputs.transformed_data.send(None)
        self.Outputs.genes_components.send(None)

    def _reset_max_components(self):
        y = np.array(self.data.get_column_view(self.source_id)[0],
                     dtype=np.float64)
        _, counts = np.unique(y, return_counts=True)
        global MAX_COMPONENTS
        if min(counts) < MAX_COMPONENTS_DEFAULT or len(
                self.data.domain.attributes) < MAX_COMPONENTS_DEFAULT:
            MAX_COMPONENTS = min(min(counts), len(
                self.data.domain.attributes)) - 1
            if self.ncomponents > MAX_COMPONENTS:
                self.ncomponents = MAX_COMPONENTS // 2
            self.controls.ncomponents.setMaximum(MAX_COMPONENTS)
        else:
            MAX_COMPONENTS = MAX_COMPONENTS_DEFAULT
            self.ncomponents = 20
            self.controls.ncomponents.setMaximum(MAX_COMPONENTS)

    def _init_mas(self):
        self._mas = SeuratAlignmentModel(
            n_components=MAX_COMPONENTS,
            n_metagenes=self.ngenes,
            gene_scoring=SCORINGS[self.scoring],
        )

    def get_model(self):
        if self.data is None:
            return

        self.fit()
        self._setup_plot()
        self.commit()

    def _setup_plot(self):
        self.plot.clear()
        if self._mas is None:
            return

        shared_correlations = self._shared_correlations
        p = MAX_COMPONENTS

        # Colors chosen based on: http://colorbrewer2.org/?type=qualitative&scheme=Set1&n=9
        colors = [
            '#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33',
            '#a65628', '#f781bf', '#999999'
        ]

        self.clear_legend()
        self._legend = self.plot.addLegend(offset=(-1, 1))
        # correlation lines
        offset = 2
        if MAX_COMPONENTS > 2 * offset + 1:
            smoothed_correlations = smooth_correlations(shared_correlations,
                                                        offset=offset)
        else:
            smoothed_correlations = shared_correlations
        plotitem = dict()
        for i, corr in enumerate(smoothed_correlations):
            plotitem[i] = self.plot.plot(
                np.arange(p),
                corr,
                pen=pg.mkPen(QColor(colors[i]), width=2),
                antialias=True)  # name=self.source_id.values[i]
        # self.plot.plotItem.legend.addItem(3, "maximum value")

        for i in range(len(plotitem)):
            self._legend.addItem(
                MyLegendItem(pg.ScatterPlotItem(pen=colors[i])),
                self.source_id.values[i])

        # vertical movable line
        cutpos = self.ncomponents - 1
        self._line = pg.InfiniteLine(angle=90,
                                     pos=cutpos,
                                     movable=True,
                                     bounds=(0, p - 1))
        self._line.setCursor(Qt.SizeHorCursor)
        self._line.setPen(pg.mkPen(QColor(Qt.black), width=2))
        self._line.sigPositionChanged.connect(self._on_cut_changed)
        self.plot.addItem(self._line)

        # horizontal lines
        self.plot_horlines = tuple(
            pg.PlotCurveItem(
                pen=pg.mkPen(QColor(colors[i]), style=Qt.DashLine))
            for i in range(len(shared_correlations)))
        self.plot_horlabels = tuple(
            pg.TextItem(color=QColor('k'), anchor=(0, 1))
            for _ in range(len(shared_correlations)))

        for item in self.plot_horlabels + self.plot_horlines:
            self.plot.addItem(item)
        self._set_horline_pos()

        # self.plot.setRange(xRange=(0.0, p - 1), yRange=(0.0, 1.0))
        self.plot.setXRange(0.0, p - 1, padding=0)
        self.plot.setYRange(0.0, 1.0, padding=0)
        self._update_axis()

    def _set_horline_pos(self):
        cutidx = self.ncomponents - 1
        for line, label, curve in zip(self.plot_horlines, self.plot_horlabels,
                                      self._shared_correlations):
            y = curve[cutidx]
            line.setData([-1, cutidx], 2 * [y])
            label.setPos(cutidx, y)
            label.setPlainText("{:.3f}".format(y))

    def _on_cut_changed(self, line):
        # cut changed by means of a cut line over the scree plot.
        value = int(round(line.value()))
        components = value + 1

        if not (self.ncomponents == 0 and components == len(self._components)):
            self.ncomponents = components

        self._line.setValue(value)
        self._set_horline_pos()
        self.commit()

    def _update_selection_component_spin(self):
        # cut changed by "ncomponents" spin.
        if self._mas is None:
            self._invalidate_selection()
            return

        if np.floor(self._line.value()) + 1 != self.ncomponents:
            self._line.setValue(self.ncomponents - 1)

        self.commit()

    def _invalidate_selection(self):
        if self.data is not None:
            self._transformed = None
            self.commit()

    def _update_scoring_combo(self):
        self.fit()
        self._invalidate_selection()

    def _update_dynamic_time_warping(self):
        self._invalidate_selection()

    def _update_quantile_normalization(self):
        self._invalidate_selection()

    def _update_ngenes_spin(self):
        self.clear_plot()
        if self.data is None:
            return
        if self._has_nan_classes():
            self.Error.nan_class()
            return
        self.clear_messages()
        self.fit()
        self._invalidate_selection()

    def _update_combo_source_id(self):
        self.clear_plot()
        if self.data is None:
            return
        y = np.array(self.data.get_column_view(self.source_id)[0],
                     dtype=np.float64)
        _, counts = np.unique(y, return_counts=True)
        if min(counts) < 2:
            self.Error.no_instances()
            return
        self._reset_max_components()
        if self._has_nan_classes():
            self.Error.nan_class()
            return
        self.clear_messages()
        self.fit()
        self._invalidate_selection()

    def _update_axis(self):
        p = MAX_COMPONENTS
        axis = self.plot.getAxis("bottom")
        d = max((p - 1) // (self.axis_labels - 1), 1)
        axis.setTicks([[(i, str(i + 1)) for i in range(0, p, d)]])

    def _has_nan_classes(self):
        y = np.array(self.data.get_column_view(self.source_id)[0],
                     dtype=np.float64)
        return not np.isfinite(y).all()

    def commit(self):
        transformed_table = meta_genes = None
        if self._mas is not None:
            # Compute the full transform (MAX_COMPONENTS components) only once.
            if self._transformed is None:
                X = self.data.X
                y = self.data.get_column_view(self.source_id)[0]
                self._transformed = self._mas.transform(
                    X,
                    y,
                    normalize=self.quantile_normalization,
                    quantile=self.quantile_normalization_perc,
                    dtw=self.dynamic_time_warping)

                attributes = tuple(
                    ContinuousVariable.make("CCA{}".format(x + 1))
                    for x in range(MAX_COMPONENTS))
                dom = Domain(attributes, self.data.domain.class_vars,
                             self.data.domain.metas)

                # Meta-genes
                meta_genes = self.data.transform(dom)
                genes_components = np.zeros(
                    (self.data.X.shape[1], MAX_COMPONENTS))
                for key, genes in self._mas.use_genes.items():
                    for gene in genes:
                        genes_components[gene - 1, key] = genes.index(gene) + 1
                genes_components[genes_components == 0] = np.NaN
                meta_genes.X = genes_components
                self.meta_genes = Table.from_numpy(Domain(attributes),
                                                   genes_components)

                # Transformed data
                transformed = self._transformed
                new_domain = add_columns(self.data.domain,
                                         attributes=attributes)
                transformed_table_temp = self.data.transform(new_domain)
                transformed_table_temp.X[:, -MAX_COMPONENTS:] = transformed
                self.transformed_table = Table.from_table(
                    dom, transformed_table_temp)

            ncomponents_attributes = tuple(
                ContinuousVariable.make("CCA{}".format(x + 1))
                for x in range(self.ncomponents))
            ncomponents_domain = Domain(ncomponents_attributes,
                                        self.data.domain.class_vars,
                                        self.data.domain.metas)

            meta_genes = self.meta_genes.transform(
                Domain(ncomponents_attributes))
            transformed_table = self.transformed_table.transform(
                ncomponents_domain)

        self.Outputs.transformed_data.send(transformed_table)
        self.Outputs.genes_components.send(meta_genes)

    def send_report(self):
        if self.data is None:
            return
        self.report_items(
            (("Source ID", self.source_id), ("Selected num. of components",
                                             self.ncomponents),
             ("Selected num. of genes", self.ngenes), ("Scoring",
                                                       self.scoring),
             ("Quantile normalization",
              True if self.quantile_normalization else "False"),
             ("Quantile normalization percentage",
              self.quantile_normalization_perc if self.quantile_normalization
              else False), ("Dynamic time warping",
                            True if self.dynamic_time_warping else "False")))
        self.report_plot()

    """
Beispiel #25
0
class OWScatterPlotGraph(gui.OWComponent, ScaleScatterPlotData):
    attr_color = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_label = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_shape = ContextSetting(None, required=ContextSetting.OPTIONAL)
    attr_size = ContextSetting(None, required=ContextSetting.OPTIONAL)
    label_only_selected = Setting(False)

    point_width = Setting(10)
    alpha_value = Setting(128)
    show_grid = Setting(False)
    show_legend = Setting(True)
    tooltip_shows_all = Setting(False)
    class_density = Setting(False)
    show_reg_line = Setting(False)
    resolution = 256

    CurveSymbols = np.array("o x t + d s ?".split())
    MinShapeSize = 6
    DarkerValue = 120
    UnknownColor = (168, 50, 168)

    def __init__(self,
                 scatter_widget,
                 parent=None,
                 _="None",
                 view_box=InteractiveViewBox):
        gui.OWComponent.__init__(self, scatter_widget)
        self.view_box = view_box(self)
        self.plot_widget = pg.PlotWidget(viewBox=self.view_box,
                                         parent=parent,
                                         background="w")
        self.plot_widget.getPlotItem().buttonsHidden = True
        self.plot_widget.setAntialiasing(True)
        self.plot_widget.sizeHint = lambda: QSize(500, 500)
        scene = self.plot_widget.scene()
        self._create_drag_tooltip(scene)
        self._data = None  # Original Table as passed from widget to new_data before transformations
        self.attr_x = None
        self.attr_y = None

        self.replot = self.plot_widget.replot
        ScaleScatterPlotData.__init__(self)
        self.density_img = None
        self.scatterplot_item = None
        self.scatterplot_item_sel = None
        self.reg_line_item = None

        self.labels = []

        self.master = scatter_widget
        self.master.Warning.add_message(
            "missing_coords",
            "Plot cannot be displayed because '{}' or '{}' is missing for "
            "all data points")
        self.master.Information.add_message(
            "missing_coords",
            "Points with missing '{}' or '{}' are not displayed")
        self.master.Information.add_message(
            "missing_size",
            "Points with undefined '{}' are shown in smaller size")
        self.master.Information.add_message(
            "missing_shape",
            "Points with undefined '{}' are shown as crossed circles")
        self.shown_attribute_indices = []
        self.shown_x = self.shown_y = None
        self.pen_colors = self.brush_colors = None

        self.valid_data = None  # np.ndarray
        self.selection = None  # np.ndarray
        self.n_points = 0

        self.gui = OWPlotGUI(self)
        self.continuous_palette = ContinuousPaletteGenerator(
            QColor(255, 255, 0), QColor(0, 0, 255), True)
        self.discrete_palette = ColorPaletteGenerator()

        self.selection_behavior = 0

        self.legend = self.color_legend = None
        self.__legend_anchor = (1, 0), (1, 0)
        self.__color_legend_anchor = (1, 1), (1, 1)

        self.scale = None  # DiscretizedScale

        self.subset_indices = None

        # self.setMouseTracking(True)
        # self.grabGesture(QPinchGesture)
        # self.grabGesture(QPanGesture)

        self.update_grid()

        self._tooltip_delegate = HelpEventDelegate(self.help_event)
        self.plot_widget.scene().installEventFilter(self._tooltip_delegate)

    def _create_drag_tooltip(self, scene):
        tip_parts = [(Qt.ShiftModifier, "Shift: Add group"),
                     (Qt.ShiftModifier + Qt.ControlModifier,
                      "Shift-{}: Append to group".format(
                          "Cmd" if sys.platform == "darwin" else "Ctrl")),
                     (Qt.AltModifier, "Alt: Remove")]
        all_parts = ", ".join(part for _, part in tip_parts)
        self.tiptexts = {
            int(modifier): all_parts.replace(part, "<b>{}</b>".format(part))
            for modifier, part in tip_parts
        }
        self.tiptexts[0] = all_parts

        self.tip_textitem = text = QGraphicsTextItem()
        # Set to the longest text
        text.setHtml(self.tiptexts[Qt.ShiftModifier + Qt.ControlModifier])
        text.setPos(4, 2)
        r = text.boundingRect()
        rect = QGraphicsRectItem(0, 0, r.width() + 8, r.height() + 4)
        rect.setBrush(QColor(224, 224, 224, 212))
        rect.setPen(QPen(Qt.NoPen))
        self.update_tooltip(Qt.NoModifier)

        scene.drag_tooltip = scene.createItemGroup([rect, text])
        scene.drag_tooltip.hide()

    def update_tooltip(self, modifiers):
        modifiers &= Qt.ShiftModifier + Qt.ControlModifier + Qt.AltModifier
        text = self.tiptexts.get(int(modifiers), self.tiptexts[0])
        self.tip_textitem.setHtml(text)

    def new_data(self, data, subset_data=None, new=True, **args):
        if new:
            self.plot_widget.clear()
            self.remove_legend()

            self.density_img = None
            self.scatterplot_item = None
            self.scatterplot_item_sel = None
            self.reg_line_item = None
            self.labels = []
            self.selection = None
            self.valid_data = None

        self.subset_indices = set(
            e.id for e in subset_data) if subset_data else None

        self._data = data
        data = self.sparse_to_dense()
        self.set_data(data, **args)

    def sparse_to_dense(self):
        data = self._data
        if data is None or not data.is_sparse():
            return data

        attrs = {
            self.attr_x, self.attr_y, self.attr_color, self.attr_shape,
            self.attr_size, self.attr_label
        }
        domain = data.domain
        all_attrs = domain.variables + domain.metas
        attrs = list(set(all_attrs) & attrs)
        selected_data = data[:, attrs]
        if sp.issparse(selected_data.X):
            selected_data.X = selected_data.X.toarray()
        if sp.issparse(selected_data.Y):
            selected_data.Y = selected_data.Y.toarray()
        if sp.issparse(selected_data.metas):
            selected_data.metas = selected_data.metas.toarray()
        return selected_data

    def _clear_plot_widget(self):
        self.remove_legend()
        if self.density_img:
            self.plot_widget.removeItem(self.density_img)
            self.density_img = None
        if self.scatterplot_item:
            self.plot_widget.removeItem(self.scatterplot_item)
            self.scatterplot_item = None
        if self.scatterplot_item_sel:
            self.plot_widget.removeItem(self.scatterplot_item_sel)
            self.scatterplot_item_sel = None
        if self.reg_line_item:
            self.plot_widget.removeItem(self.reg_line_item)
            self.reg_line_item = None
        for label in self.labels:
            self.plot_widget.removeItem(label)
        self.labels = []
        self.set_axis_title("bottom", "")
        self.set_axis_title("left", "")

    def update_data(self, attr_x, attr_y, reset_view=True):
        self.attr_x = attr_x
        self.attr_y = attr_y
        if attr_x not in self.data.domain or attr_y not in self.data.domain:
            data = self.sparse_to_dense()
            self.set_data(data)

        self.master.Warning.missing_coords.clear()
        self.master.Information.missing_coords.clear()
        self._clear_plot_widget()

        if self.shown_y != attr_y:
            # 'reset' the axis text width estimation. Without this the left
            # axis tick labels space only ever expands
            yaxis = self.plot_widget.getAxis("left")
            yaxis.textWidth = 30

        self.shown_x, self.shown_y = attr_x, attr_y

        if self.jittered_data is None or not len(self.jittered_data):
            self.valid_data = None
        else:
            index_x = self.domain.index(attr_x)
            index_y = self.domain.index(attr_y)
            self.valid_data = self.get_valid_list([index_x, index_y])
            if not np.any(self.valid_data):
                self.valid_data = None
        if self.valid_data is None:
            self.selection = None
            self.n_points = 0
            self.master.Warning.missing_coords(self.shown_x.name,
                                               self.shown_y.name)
            return

        x_data, y_data = self.get_xy_data_positions(attr_x, attr_y,
                                                    self.valid_data)
        self.n_points = len(x_data)

        if reset_view:
            min_x, max_x = np.nanmin(x_data), np.nanmax(x_data)
            min_y, max_y = np.nanmin(y_data), np.nanmax(y_data)
            self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x,
                                          max_y - min_y),
                                   padding=0.025)
            self.view_box.init_history()
            self.view_box.tag_history()
        [min_x, max_x], [min_y, max_y] = self.view_box.viewRange()

        for axis, name, index in (("bottom", attr_x, index_x), ("left", attr_y,
                                                                index_y)):
            self.set_axis_title(axis, name)
            var = self.domain[index]
            if var.is_discrete:
                self.set_labels(axis, get_variable_values_sorted(var))
            else:
                self.set_labels(axis, None)

        color_data, brush_data = self.compute_colors()
        color_data_sel, brush_data_sel = self.compute_colors_sel()
        size_data = self.compute_sizes()
        shape_data = self.compute_symbols()

        if self.should_draw_density():
            rgb_data = [pen.color().getRgb()[:3] for pen in color_data]
            self.density_img = classdensity.class_density_image(
                min_x, max_x, min_y, max_y, self.resolution, x_data, y_data,
                rgb_data)
            self.plot_widget.addItem(self.density_img)

        self.data_indices = np.flatnonzero(self.valid_data)
        if len(self.data_indices) != self.original_data.shape[1]:
            self.master.Information.missing_coords(self.shown_x.name,
                                                   self.shown_y.name)

        self.scatterplot_item = ScatterPlotItem(x=x_data,
                                                y=y_data,
                                                data=self.data_indices,
                                                symbol=shape_data,
                                                size=size_data,
                                                pen=color_data,
                                                brush=brush_data)
        self.scatterplot_item_sel = ScatterPlotItem(x=x_data,
                                                    y=y_data,
                                                    data=self.data_indices,
                                                    symbol=shape_data,
                                                    size=size_data +
                                                    SELECTION_WIDTH,
                                                    pen=color_data_sel,
                                                    brush=brush_data_sel)
        self.plot_widget.addItem(self.scatterplot_item_sel)
        self.plot_widget.addItem(self.scatterplot_item)

        self.scatterplot_item.selected_points = []
        self.scatterplot_item.sigClicked.connect(self.select_by_click)

        self.draw_regression_line(x_data, y_data, min_x, max_x)
        self.update_labels()
        self.make_legend()
        self.plot_widget.replot()

    def draw_regression_line(self, x_data, y_data, min_x, max_x):
        if self.show_reg_line and self.can_draw_regresssion_line():
            slope, intercept, _, _, _ = linregress(x_data, y_data)
            start_y = min_x * slope + intercept
            end_y = max_x * slope + intercept
            angle = np.degrees(np.arctan((end_y - start_y) / (max_x - min_x)))
            rotate = ((angle + 45) % 180) - 45 > 90
            color = QColor("#505050")
            l_opts = dict(color=color,
                          position=abs(int(rotate) - 0.85),
                          rotateAxis=(1, 0),
                          movable=True)
            self.reg_line_item = InfiniteLine(pos=QPointF(min_x, start_y),
                                              pen=pg.mkPen(color=color,
                                                           width=1),
                                              angle=angle,
                                              label="r = {:.2f}".format(slope),
                                              labelOpts=l_opts)
            if rotate:
                self.reg_line_item.label.angle = 180
                self.reg_line_item.label.updateTransform()
            self.plot_widget.addItem(self.reg_line_item)

    def can_draw_density(self):
        return self.domain is not None and \
            self.attr_color is not None and \
            self.attr_color.is_discrete and \
            self.shown_x.is_continuous and \
            self.shown_y.is_continuous

    def should_draw_density(self):
        return self.class_density and self.n_points > 1 and self.can_draw_density(
        )

    def can_draw_regresssion_line(self):
        return self.domain is not None and \
               self.shown_x.is_continuous and \
               self.shown_y.is_continuous

    def set_labels(self, axis, labels):
        axis = self.plot_widget.getAxis(axis)
        if labels:
            ticks = [[(i, labels[i]) for i in range(len(labels))]]
            axis.setTicks(ticks)
        else:
            axis.setTicks(None)

    def set_axis_title(self, axis, title):
        self.plot_widget.setLabel(axis=axis, text=title)

    def get_size_index(self):
        if self.attr_size is None:
            return -1
        return self.domain.index(self.attr_size)

    def compute_sizes(self):
        self.master.Information.missing_size.clear()
        size_index = self.get_size_index()
        if size_index == -1:
            size_data = np.full((self.n_points, ),
                                self.point_width,
                                dtype=float)
        else:
            size_data = \
                self.MinShapeSize + \
                self.scaled_data[size_index, self.valid_data] * \
                self.point_width
        nans = np.isnan(size_data)
        if np.any(nans):
            size_data[nans] = self.MinShapeSize - 2
            self.master.Information.missing_size(self.attr_size)
        return size_data

    def update_sizes(self):
        self.set_data(self.sparse_to_dense())
        self.update_point_size()

    def update_point_size(self):
        if self.scatterplot_item:
            size_data = self.compute_sizes()
            self.scatterplot_item.setSize(size_data)
            self.scatterplot_item_sel.setSize(size_data + SELECTION_WIDTH)

    def get_color_index(self):
        if self.attr_color is None:
            return -1
        colors = self.attr_color.colors
        if self.attr_color.is_discrete:
            self.discrete_palette = ColorPaletteGenerator(
                number_of_colors=len(colors), rgb_colors=colors)
        else:
            self.continuous_palette = ContinuousPaletteGenerator(*colors)
        return self.domain.index(self.attr_color)

    def compute_colors_sel(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors_sel = self.brush_colors_sel = None

        def make_pen(color, width):
            p = QPen(color, width)
            p.setCosmetic(True)
            return p

        nopen = QPen(Qt.NoPen)
        if self.selection is not None:
            sels = np.max(self.selection)
            if sels == 1:
                pens = [
                    nopen,
                    make_pen(QColor(255, 190, 0, 255), SELECTION_WIDTH + 1.)
                ]
            else:
                # Start with the first color so that the colors of the
                # additional attribute in annotation (which start with 0,
                # unselected) will match these colors
                palette = ColorPaletteGenerator(number_of_colors=sels + 1)
                pens = [nopen] + \
                       [make_pen(palette[i + 1], SELECTION_WIDTH + 1.)
                        for i in range(sels)]
            pen = [pens[a] for a in self.selection[self.valid_data]]
        else:
            pen = [nopen] * self.n_points
        brush = [QBrush(QColor(255, 255, 255, 0))] * self.n_points
        return pen, brush

    def compute_colors(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors = self.brush_colors = None
        color_index = self.get_color_index()

        def make_pen(color, width):
            p = QPen(color, width)
            p.setCosmetic(True)
            return p

        subset = None
        if self.subset_indices:
            subset = np.array([
                ex.id in self.subset_indices
                for ex in self.data[self.valid_data]
            ])

        if color_index == -1:  # same color
            color = self.plot_widget.palette().color(OWPalette.Data)
            pen = [make_pen(color, 1.5)] * self.n_points
            if subset is not None:
                brush = [(QBrush(QColor(128, 128, 128,
                                        0)), QBrush(QColor(128, 128, 128,
                                                           255)))[s]
                         for s in subset]
            else:
                brush = [QBrush(QColor(128, 128, 128, self.alpha_value))] \
                        * self.n_points
            return pen, brush

        c_data = self.original_data[color_index, self.valid_data]
        if self.domain[color_index].is_continuous:
            if self.pen_colors is None:
                self.scale = DiscretizedScale(np.nanmin(c_data),
                                              np.nanmax(c_data))
                c_data -= self.scale.offset
                c_data /= self.scale.width
                c_data = np.floor(c_data) + 0.5
                c_data /= self.scale.bins
                c_data = np.clip(c_data, 0, 1)
                palette = self.continuous_palette
                self.pen_colors = palette.getRGB(c_data)
                self.brush_colors = np.hstack([
                    self.pen_colors,
                    np.full((self.n_points, 1), self.alpha_value, dtype=int)
                ])
                self.pen_colors *= 100
                self.pen_colors //= self.DarkerValue
                self.pen_colors = [
                    make_pen(QColor(*col), 1.5)
                    for col in self.pen_colors.tolist()
                ]
            if subset is not None:
                self.brush_colors[:, 3] = 0
                self.brush_colors[subset, 3] = 255
            else:
                self.brush_colors[:, 3] = self.alpha_value
            pen = self.pen_colors
            brush = np.array(
                [QBrush(QColor(*col)) for col in self.brush_colors.tolist()])
        else:
            if self.pen_colors is None:
                palette = self.discrete_palette
                n_colors = palette.number_of_colors
                c_data = c_data.copy()
                c_data[np.isnan(c_data)] = n_colors
                c_data = c_data.astype(int)
                colors = np.r_[palette.getRGB(np.arange(n_colors)),
                               [[128, 128, 128]]]
                pens = np.array([
                    make_pen(QColor(*col).darker(self.DarkerValue), 1.5)
                    for col in colors
                ])
                self.pen_colors = pens[c_data]
                alpha = self.alpha_value if subset is None else 255
                self.brush_colors = np.array([[
                    QBrush(QColor(0, 0, 0, 0)),
                    QBrush(QColor(col[0], col[1], col[2], alpha))
                ] for col in colors])
                self.brush_colors = self.brush_colors[c_data]
            if subset is not None:
                brush = np.where(subset, self.brush_colors[:, 1],
                                 self.brush_colors[:, 0])
            else:
                brush = self.brush_colors[:, 1]
            pen = self.pen_colors
        return pen, brush

    def update_colors(self, keep_colors=False):
        self.master.update_colors()
        self.set_data(self.sparse_to_dense())
        self.update_alpha_value(keep_colors)

    def update_alpha_value(self, keep_colors=False):
        if self.scatterplot_item:
            pen_data, brush_data = self.compute_colors(keep_colors)
            pen_data_sel, brush_data_sel = self.compute_colors_sel(keep_colors)
            self.scatterplot_item.setPen(pen_data, update=False, mask=None)
            self.scatterplot_item.setBrush(brush_data, mask=None)
            self.scatterplot_item_sel.setPen(pen_data_sel,
                                             update=False,
                                             mask=None)
            self.scatterplot_item_sel.setBrush(brush_data_sel, mask=None)
            if not keep_colors:
                self.make_legend()

                if self.should_draw_density():
                    self.update_data(self.shown_x, self.shown_y)
                elif self.density_img:
                    self.plot_widget.removeItem(self.density_img)

    def create_labels(self):
        for x, y in zip(*self.scatterplot_item.getData()):
            ti = TextItem()
            self.plot_widget.addItem(ti)
            ti.setPos(x, y)
            self.labels.append(ti)

    def _create_label_column(self):
        if self.attr_label in self.data.domain:
            label_column = self.data.get_column_view(self.attr_label)[0]
        else:
            label_column = self.master.data.get_column_view(self.attr_label)[0]
        return label_column[self.data_indices]

    def update_labels(self):
        if self.attr_label is None or \
                self.label_only_selected and self.selection is None:
            for label in self.labels:
                label.setText("")
            return
        self.assure_attribute_present(self.attr_label)
        if not self.labels:
            self.create_labels()
        label_column = self._create_label_column()
        formatter = self.attr_label.str_val
        label_data = map(formatter, label_column)
        black = pg.mkColor(0, 0, 0)
        selection = self.selection[
            self.valid_data] if self.selection is not None else []
        if self.label_only_selected:
            for label, text, selected \
                    in zip(self.labels, label_data, selection):
                label.setText(text if selected else "", black)
        else:
            for label, text in zip(self.labels, label_data):
                label.setText(text, black)

    def get_shape_index(self):
        if self.attr_shape is None or \
                len(self.attr_shape.values) > len(self.CurveSymbols):
            return -1
        return self.domain.index(self.attr_shape)

    def compute_symbols(self):
        self.master.Information.missing_shape.clear()
        shape_index = self.get_shape_index()
        if shape_index == -1:
            shape_data = self.CurveSymbols[np.zeros(self.n_points, dtype=int)]
        else:
            shape_data = self.original_data[shape_index, self.valid_data]
            nans = np.isnan(shape_data)
            if np.any(nans):
                shape_data[nans] = len(self.CurveSymbols) - 1
                self.master.Information.missing_shape(self.attr_shape)
            shape_data = self.CurveSymbols[shape_data.astype(int)]
        return shape_data

    def update_shapes(self):
        self.assure_attribute_present(self.attr_shape)
        if self.scatterplot_item:
            shape_data = self.compute_symbols()
            self.scatterplot_item.setSymbol(shape_data)
        self.make_legend()

    def assure_attribute_present(self, attr):
        if self.data is not None and attr not in self.data.domain:
            self.set_data(self.sparse_to_dense())

    def update_grid(self):
        self.plot_widget.showGrid(x=self.show_grid, y=self.show_grid)

    def update_legend(self):
        if self.legend:
            self.legend.setVisible(self.show_legend)

    def create_legend(self):
        self.legend = LegendItem()
        self.legend.setParentItem(self.plot_widget.getViewBox())
        self.legend.restoreAnchor(self.__legend_anchor)

    def remove_legend(self):
        if self.legend:
            anchor = legend_anchor_pos(self.legend)
            if anchor is not None:
                self.__legend_anchor = anchor
            self.legend.setParent(None)
            self.legend = None
        if self.color_legend:
            anchor = legend_anchor_pos(self.color_legend)
            if anchor is not None:
                self.__color_legend_anchor = anchor
            self.color_legend.setParent(None)
            self.color_legend = None

    def make_legend(self):
        self.remove_legend()
        self.make_color_legend()
        self.make_shape_legend()
        self.update_legend()

    def make_color_legend(self):
        color_index = self.get_color_index()
        if color_index == -1:
            return
        color_var = self.domain[color_index]
        use_shape = self.get_shape_index() == color_index
        if color_var.is_discrete:
            if not self.legend:
                self.create_legend()
            palette = self.discrete_palette
            for i, value in enumerate(color_var.values):
                color = QColor(*palette.getRGB(i))
                brush = color.lighter(self.DarkerValue)
                self.legend.addItem(
                    ScatterPlotItem(
                        pen=color,
                        brush=brush,
                        size=10,
                        symbol=self.CurveSymbols[i] if use_shape else "o"),
                    escape(value))
        else:
            legend = self.color_legend = LegendItem()
            legend.setParentItem(self.plot_widget.getViewBox())
            legend.restoreAnchor(self.__color_legend_anchor)

            label = PaletteItemSample(self.continuous_palette, self.scale)
            legend.addItem(label, "")
            legend.setGeometry(label.boundingRect())

    def make_shape_legend(self):
        shape_index = self.get_shape_index()
        if shape_index == -1 or shape_index == self.get_color_index():
            return
        if not self.legend:
            self.create_legend()
        shape_var = self.domain[shape_index]
        color = QColor(0, 0, 0)
        color.setAlpha(self.alpha_value)
        for i, value in enumerate(shape_var.values):
            self.legend.addItem(
                ScatterPlotItem(pen=color,
                                brush=color,
                                size=10,
                                symbol=self.CurveSymbols[i]), escape(value))

    def zoom_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def pan_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().PanMode)

    def select_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def reset_button_clicked(self):
        self.update_data(self.shown_x, self.shown_y,
                         reset_view=True)  # also redraw density image
        # self.view_box.autoRange()

    def select_by_click(self, _, points):
        if self.scatterplot_item is not None:
            self.select(points)

    def select_by_rectangle(self, value_rect):
        if self.scatterplot_item is not None:
            points = [
                point for point in self.scatterplot_item.points()
                if value_rect.contains(QPointF(point.pos()))
            ]
            self.select(points)

    def unselect_all(self):
        self.selection = None
        self.update_colors(keep_colors=True)
        if self.label_only_selected:
            self.update_labels()
        self.master.selection_changed()

    def select(self, points):
        # noinspection PyArgumentList
        if self.data is None:
            return
        if self.selection is None:
            self.selection = np.zeros(len(self.data), dtype=np.uint8)
        indices = [p.data() for p in points]
        keys = QApplication.keyboardModifiers()
        # Remove from selection
        if keys & Qt.AltModifier:
            self.selection[indices] = 0
        # Append to the last group
        elif keys & Qt.ShiftModifier and keys & Qt.ControlModifier:
            self.selection[indices] = np.max(self.selection)
        # Create a new group
        elif keys & Qt.ShiftModifier:
            self.selection[indices] = np.max(self.selection) + 1
        # No modifiers: new selection
        else:
            self.selection = np.zeros(len(self.data), dtype=np.uint8)
            self.selection[indices] = 1
        self.update_colors(keep_colors=True)
        if self.label_only_selected:
            self.update_labels()
        self.master.selection_changed()

    def get_selection(self):
        if self.selection is None:
            return np.array([], dtype=np.uint8)
        else:
            return np.flatnonzero(self.selection)

    def set_palette(self, p):
        self.plot_widget.setPalette(p)

    def save_to_file(self, size):
        pass

    def help_event(self, event):
        if self.scatterplot_item is None:
            return False

        act_pos = self.scatterplot_item.mapFromScene(event.scenePos())
        points = self.scatterplot_item.pointsAt(act_pos)
        text = ""
        if len(points):
            for i, p in enumerate(points):
                index = p.data()
                text += "Attributes:\n"
                if self.tooltip_shows_all and \
                        len(self.domain.attributes) < 30:
                    text += "".join('   {} = {}\n'.format(
                        attr.name, self.data[index][attr])
                                    for attr in self.domain.attributes)
                else:
                    text += '   {} = {}\n   {} = {}\n'.format(
                        self.shown_x, self.data[index][self.shown_x],
                        self.shown_y, self.data[index][self.shown_y])
                    if self.tooltip_shows_all:
                        text += "   ... and {} others\n\n".format(
                            len(self.domain.attributes) - 2)
                if self.domain.class_var:
                    text += 'Class:\n   {} = {}\n'.format(
                        self.domain.class_var.name,
                        self.data[index][self.data.domain.class_var])
                if i < len(points) - 1:
                    text += '------------------\n'

            text = ('<span style="white-space:pre">{}</span>'.format(
                escape(text)))

            QToolTip.showText(event.screenPos(), text, widget=self.plot_widget)
            return True
        else:
            return False

    def box_zoom_select(self, parent):
        g = self.gui
        box_zoom_select = gui.vBox(parent, "Zoom/Select")
        zoom_select_toolbar = g.zoom_select_toolbar(box_zoom_select,
                                                    nomargin=True,
                                                    buttons=[
                                                        g.StateButtonsBegin,
                                                        g.SimpleSelect, g.Pan,
                                                        g.Zoom,
                                                        g.StateButtonsEnd,
                                                        g.ZoomReset
                                                    ])
        buttons = zoom_select_toolbar.buttons
        buttons[g.Zoom].clicked.connect(self.zoom_button_clicked)
        buttons[g.Pan].clicked.connect(self.pan_button_clicked)
        buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked)
        return box_zoom_select

    def zoom_actions(self, parent):
        def zoom(s):
            """Zoom in/out by factor `s`."""
            viewbox = self.plot.getViewBox()
            # scaleBy scales the view's bounds (the axis range)
            viewbox.scaleBy((1 / s, 1 / s))

        def fit_to_view():
            viewbox = self.plot.getViewBox()
            viewbox.autoRange()

        zoom_in = QAction("Zoom in", parent, triggered=lambda: zoom(1.25))
        zoom_in.setShortcuts([
            QKeySequence(QKeySequence.ZoomIn),
            QKeySequence(parent.tr("Ctrl+="))
        ])
        zoom_out = QAction("Zoom out",
                           parent,
                           shortcut=QKeySequence.ZoomOut,
                           triggered=lambda: zoom(1 / 1.25))
        zoom_fit = QAction("Fit in view",
                           parent,
                           shortcut=QKeySequence(Qt.ControlModifier
                                                 | Qt.Key_0),
                           triggered=fit_to_view)
        parent.addActions([zoom_in, zoom_out, zoom_fit])
Beispiel #26
0
class OWCreateClass(widget.OWWidget):
    name = "Create Class"
    description = "Create class attribute from a string attribute"
    icon = "icons/CreateClass.svg"
    category = "Data"
    keywords = []

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False

    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    class_name = ContextSetting("class")
    rules = ContextSetting({})
    match_beginning = ContextSetting(False)
    case_sensitive = ContextSetting(False)

    TRANSFORMERS = {StringVariable: ValueFromStringSubstring,
                    DiscreteVariable: ValueFromDiscreteSubstring}

    # Cached variables are used so that two instances of the widget with the
    # same settings will create the same variable. The usual `make` wouldn't
    # work here because variables with `compute_value` are not reused.
    cached_variables = {}

    class Warning(widget.OWWidget.Warning):
        no_nonnumeric_vars = Msg("Data contains only numeric variables.")

    class Error(widget.OWWidget.Error):
        class_name_duplicated = Msg("Class name duplicated.")
        class_name_empty = Msg("Class name should not be empty.")

    def __init__(self):
        super().__init__()
        self.data = None

        # The following lists are of the same length as self.active_rules

        #: list of pairs with counts of matches for each patter when the
        #     patterns are applied in order and when applied on the entire set,
        #     disregarding the preceding patterns
        self.match_counts = []

        #: list of list of QLineEdit: line edit pairs for each pattern
        self.line_edits = []
        #: list of QPushButton: list of remove buttons
        self.remove_buttons = []
        #: list of list of QLabel: pairs of labels with counts
        self.counts = []

        combo = gui.comboBox(
            self.controlArea, self, "attribute", label="From column: ",
            box=True, orientation=Qt.Horizontal, searchable=True,
            callback=self.update_rules,
            model=DomainModel(valid_types=(StringVariable, DiscreteVariable)))
        # Don't use setSizePolicy keyword argument here: it applies to box,
        # not the combo
        combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)

        patternbox = gui.vBox(self.controlArea, box=True)
        #: QWidget: the box that contains the remove buttons, line edits and
        #    count labels. The lines are added and removed dynamically.
        self.rules_box = rules_box = QGridLayout()
        patternbox.layout().addLayout(self.rules_box)
        box = gui.hBox(patternbox)
        gui.button(
            box, self, "+", callback=self.add_row, autoDefault=False, flat=True,
            minimumSize=(QSize(20, 20)))
        gui.rubber(box)
        self.rules_box.setColumnMinimumWidth(1, 70)
        self.rules_box.setColumnMinimumWidth(0, 10)
        self.rules_box.setColumnStretch(0, 1)
        self.rules_box.setColumnStretch(1, 1)
        self.rules_box.setColumnStretch(2, 100)
        rules_box.addWidget(QLabel("Name"), 0, 1)
        rules_box.addWidget(QLabel("Substring"), 0, 2)
        rules_box.addWidget(QLabel("#Instances"), 0, 3, 1, 2)
        self.update_rules()

        gui.lineEdit(
            self.controlArea, self, "class_name",
            label="Name for the new class:",
            box=True, orientation=Qt.Horizontal)

        optionsbox = gui.vBox(self.controlArea, box=True)
        gui.checkBox(
            optionsbox, self, "match_beginning", "Match only at the beginning",
            callback=self.options_changed)
        gui.checkBox(
            optionsbox, self, "case_sensitive", "Case sensitive",
            callback=self.options_changed)

        box = gui.hBox(self.controlArea)
        gui.rubber(box)
        gui.button(box, self, "Apply", autoDefault=False, width=180,
                   callback=self.apply)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        # TODO: Resizing upon changing the number of rules does not work
        self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)

    @property
    def active_rules(self):
        """
        Returns the class names and patterns corresponding to the currently
            selected attribute. If the attribute is not yet in the dictionary,
            set the default.
        """
        return self.rules.setdefault(self.attribute and self.attribute.name,
                                     [["", ""], ["", ""]])

    def rules_to_edits(self):
        """Fill the line edites with the rules from the current settings."""
        for editr, textr in zip(self.line_edits, self.active_rules):
            for edit, text in zip(editr, textr):
                edit.setText(text)

    @Inputs.data
    def set_data(self, data):
        """Input data signal handler."""
        self.closeContext()
        self.rules = {}
        self.data = data
        summary = len(data) if data else self.info.NoInput
        details = format_summary_details(data) if data else ""
        self.info.set_input_summary(summary, details)
        model = self.controls.attribute.model()
        model.set_domain(data and data.domain)
        self.Warning.no_nonnumeric_vars(shown=data is not None and not model)
        if not model:
            self.attribute = None
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        self.attribute = model[0]
        self.openContext(data)
        self.update_rules()
        self.apply()

    def update_rules(self):
        """Called when the rules are changed: adjust the number of lines in
        the form and fill them, update the counts. The widget does not have
        auto-apply."""
        self.adjust_n_rule_rows()
        self.rules_to_edits()
        self.update_counts()
        # TODO: Indicator that changes need to be applied

    def options_changed(self):
        self.update_counts()

    def adjust_n_rule_rows(self):
        """Add or remove lines if needed and fix the tab order."""
        def _add_line():
            self.line_edits.append([])
            n_lines = len(self.line_edits)
            for coli in range(1, 3):
                edit = QLineEdit()
                self.line_edits[-1].append(edit)
                self.rules_box.addWidget(edit, n_lines, coli)
                edit.textChanged.connect(self.sync_edit)
            button = gui.button(
                None, self, label='×', flat=True, height=20,
                styleSheet='* {font-size: 16pt; color: silver}'
                           '*:hover {color: black}',
                autoDefault=False, callback=self.remove_row)
            button.setMinimumSize(QSize(12, 20))
            self.remove_buttons.append(button)
            self.rules_box.addWidget(button, n_lines, 0)
            self.counts.append([])
            for coli, kwargs in enumerate(
                    (dict(alignment=Qt.AlignRight),
                     dict(alignment=Qt.AlignLeft, styleSheet="color: gray"))):
                label = QLabel(**kwargs)
                self.counts[-1].append(label)
                self.rules_box.addWidget(label, n_lines, 3 + coli)

        def _remove_line():
            for edit in self.line_edits.pop():
                edit.deleteLater()
            self.remove_buttons.pop().deleteLater()
            for label in self.counts.pop():
                label.deleteLater()

        def _fix_tab_order():
            prev = None
            for row, rule in zip(self.line_edits, self.active_rules):
                for col_idx, edit in enumerate(row):
                    edit.row, edit.col_idx = rule, col_idx
                    if prev is not None:
                        self.setTabOrder(prev, edit)
                    prev = edit

        n = len(self.active_rules)
        while n > len(self.line_edits):
            _add_line()
        while len(self.line_edits) > n:
            _remove_line()
        _fix_tab_order()

    def add_row(self):
        """Append a new row at the end."""
        self.active_rules.append(["", ""])
        self.adjust_n_rule_rows()
        self.update_counts()

    def remove_row(self):
        """Remove a row."""
        remove_idx = self.remove_buttons.index(self.sender())
        del self.active_rules[remove_idx]
        self.update_rules()
        self.update_counts()

    def sync_edit(self, text):
        """Handle changes in line edits: update the active rules and counts"""
        edit = self.sender()
        edit.row[edit.col_idx] = text
        self.update_counts()

    def class_labels(self):
        """Construct a list of class labels. Empty labels are replaced with
        C1, C2, C3. If C<n> already appears in the list of values given by
        the user, the labels start at C<n+1> instead.
        """
        largest_c = max((int(label[1:]) for label, _ in self.active_rules
                         if re.match("^C\\d+", label)),
                        default=0)
        class_count = count(largest_c + 1)
        return [label_edit.text() or "C{}".format(next(class_count))
                for label_edit, _ in self.line_edits]

    def update_counts(self):
        """Recompute and update the counts of matches."""
        def _matcher(strings, pattern):
            """Return indices of strings into patterns; consider case
            sensitivity and matching at the beginning. The given strings are
            assumed to be in lower case if match is case insensitive. Patterns
            are fixed on the fly."""
            if not self.case_sensitive:
                pattern = pattern.lower()
            indices = np.char.find(strings, pattern.strip())
            return indices == 0 if self.match_beginning else indices != -1

        def _lower_if_needed(strings):
            return strings if self.case_sensitive else np.char.lower(strings)

        def _string_counts():
            """
            Generate pairs of arrays for each rule until running out of data
            instances. np.sum over the two arrays in each pair gives the
            number of matches of the remaining instances (considering the
            order of patterns) and of the original data.

            For _string_counts, the arrays contain bool masks referring to the
            original data
            """
            nonlocal data
            data = data.astype(str)
            data = data[~np.char.equal(data, "")]
            data = _lower_if_needed(data)
            remaining = np.array(data)
            for _, pattern in self.active_rules:
                matching = _matcher(remaining, pattern)
                total_matching = _matcher(data, pattern)
                yield matching, total_matching
                remaining = remaining[~matching]
                if not remaining.size:
                    break

        def _discrete_counts():
            """
            Generate pairs similar to _string_counts, except that the arrays
            contain bin counts for the attribute's values matching the pattern.
            """
            attr_vals = np.array(attr.values)
            attr_vals = _lower_if_needed(attr_vals)
            bins = bincount(data, max_val=len(attr.values) - 1)[0]
            remaining = np.array(bins)
            for _, pattern in self.active_rules:
                matching = _matcher(attr_vals, pattern)
                yield remaining[matching], bins[matching]
                remaining[matching] = 0
                if not np.any(remaining):
                    break

        def _clear_labels():
            """Clear all labels"""
            for lab_matched, lab_total in self.counts:
                lab_matched.setText("")
                lab_total.setText("")

        def _set_labels():
            """Set the labels to show the counts"""
            for (n_matched, n_total), (lab_matched, lab_total), (lab, patt) in \
                    zip(self.match_counts, self.counts, self.active_rules):
                n_before = n_total - n_matched
                lab_matched.setText("{}".format(n_matched))
                if n_before and (lab or patt):
                    lab_total.setText("+ {}".format(n_before))
                    if n_matched:
                        tip = "{} of the {} matching instances are already " \
                              "covered above".format(n_before, n_total)
                    else:
                        tip = "All matching instances are already covered above"
                    lab_total.setToolTip(tip)
                    lab_matched.setToolTip(tip)

        def _set_placeholders():
            """Set placeholders for empty edit lines"""
            matches = [n for n, _ in self.match_counts] + \
                      [0] * len(self.line_edits)
            for n_matched, (_, patt) in zip(matches, self.line_edits):
                if not patt.text():
                    patt.setPlaceholderText(
                        "(remaining instances)" if n_matched else "(unused)")

            labels = self.class_labels()
            for label, (lab_edit, _) in zip(labels, self.line_edits):
                if not lab_edit.text():
                    lab_edit.setPlaceholderText(label)

        _clear_labels()
        attr = self.attribute
        if attr is None:
            return
        counters = {StringVariable: _string_counts,
                    DiscreteVariable: _discrete_counts}
        data = self.data.get_column_view(attr)[0]
        self.match_counts = [[int(np.sum(x)) for x in matches]
                             for matches in counters[type(attr)]()]
        _set_labels()
        _set_placeholders()

    def apply(self):
        """Output the transformed data."""
        self.Error.clear()
        self.class_name = self.class_name.strip()
        if not self.attribute:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        domain = self.data.domain
        if not self.class_name:
            self.Error.class_name_empty()
        if self.class_name in domain:
            self.Error.class_name_duplicated()
        if not self.class_name or self.class_name in domain:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return
        new_class = self._create_variable()
        new_domain = Domain(
            domain.attributes, new_class, domain.metas + domain.class_vars)
        new_data = self.data.transform(new_domain)
        summary = len(new_data) if new_data is not None else self.info.NoOutput
        details = format_summary_details(new_data) if new_data is not None else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.data.send(new_data)

    def _create_variable(self):
        rules = self.active_rules
        # Transposition + stripping
        valid_rules = [label or pattern or n_matches
                       for (label, pattern), n_matches in
                       zip(rules, self.match_counts)]
        patterns = tuple(
            pattern for (_, pattern), valid in zip(rules, valid_rules) if valid)
        names = tuple(
            name for name, valid in zip(self.class_labels(), valid_rules)
            if valid)
        transformer = self.TRANSFORMERS[type(self.attribute)]

        var_key = (self.attribute, self.class_name, names,
                   patterns, self.case_sensitive, self.match_beginning)
        if var_key in self.cached_variables:
            return self.cached_variables[var_key]

        compute_value = transformer(
            self.attribute, patterns, self.case_sensitive, self.match_beginning)
        new_var = DiscreteVariable(
            self.class_name, names, compute_value=compute_value)
        self.cached_variables[var_key] = new_var
        return new_var

    def send_report(self):
        # Pylint gives false positives: these functions are always called from
        # within the loop
        # pylint: disable=undefined-loop-variable
        def _cond_part():
            rule = "<b>{}</b> ".format(class_name)
            if patt:
                rule += "if <b>{}</b> contains <b>{}</b>".format(
                    self.attribute.name, patt)
            else:
                rule += "otherwise"
            return rule

        def _count_part():
            if not n_matched:
                return "all {} matching instances are already covered " \
                       "above".format(n_total)
            elif n_matched < n_total and patt:
                return "{} matching instances (+ {} that are already " \
                       "covered above".format(n_matched, n_total - n_matched)
            else:
                return "{} matching instances".format(n_matched)

        if not self.attribute:
            return
        self.report_items("Input", [("Source attribute", self.attribute.name)])
        output = ""
        names = self.class_labels()
        for (n_matched, n_total), class_name, (lab, patt) in \
                zip(self.match_counts, names, self.active_rules):
            if lab or patt or n_total:
                output += "<li>{}; {}</li>".format(_cond_part(), _count_part())
        if output:
            self.report_items("Output", [("Class name", self.class_name)])
            self.report_raw("<ol>{}</ol>".format(output))
class OWLouvainClustering(widget.OWWidget):
    name = 'Louvain Clustering'
    description = 'Detects communities in a network of nearest neighbours.'
    icon = 'icons/LouvainClustering.svg'
    priority = 2150

    want_main_area = False

    settingsHandler = DomainContextHandler()

    class Inputs:
        data = Input('Data', Table, default=True)

    if Graph is not None:

        class Outputs:
            annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME,
                                    Table,
                                    default=True)
            graph = Output('Network', Graph)
    else:

        class Outputs:
            annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME,
                                    Table,
                                    default=True)

    apply_pca = ContextSetting(True)
    pca_components = ContextSetting(_DEFAULT_PCA_COMPONENTS)
    metric_idx = ContextSetting(0)
    k_neighbours = ContextSetting(_DEFAULT_K_NEIGHBOURS)
    resolution = ContextSetting(1.)
    auto_commit = Setting(True)

    class Error(widget.OWWidget.Error):
        data_has_nans = Msg(
            'Data has missing values. Please impute the missing values before '
            'continuing\n')
        empty_dataset = Msg('No features in data')
        general_error = Msg('Error occured during clustering\n{}')

    class State(Enum):
        Pending, Running = range(2)

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.graph = None  # type: Optional[nx.Graph]
        self.partition = None  # type: Optional[np.array]

        self.__executor = ThreadExecutor(parent=self)
        self.__future = None  # type: Optional[Future]
        self.__state = self.State.Pending

        pca_box = gui.vBox(self.controlArea, 'PCA Preprocessing')
        self.apply_pca_cbx = gui.checkBox(
            pca_box,
            self,
            'apply_pca',
            label='Apply PCA preprocessing',
            callback=self._invalidate_graph,
        )  # type: QCheckBox
        self.pca_components_slider = gui.hSlider(
            pca_box,
            self,
            'pca_components',
            label='Components: ',
            minValue=2,
            maxValue=_MAX_PCA_COMPONENTS,
            callback=self._invalidate_pca_projection,
        )  # type: QSlider

        graph_box = gui.vBox(self.controlArea, 'Graph parameters')
        self.metric_combo = gui.comboBox(
            graph_box,
            self,
            'metric_idx',
            label='Distance metric',
            items=[m[0] for m in METRICS],
            callback=self._invalidate_graph,
            orientation=Qt.Horizontal,
        )  # type: gui.OrangeComboBox
        self.k_neighbours_spin = gui.spin(
            graph_box,
            self,
            'k_neighbours',
            minv=1,
            maxv=_MAX_K_NEIGBOURS,
            label='k neighbours',
            controlWidth=80,
            alignment=Qt.AlignRight,
            callback=self._invalidate_graph,
        )  # type: gui.SpinBoxWFocusOut
        self.cls_epsilon_spin = gui.spin(
            graph_box,
            self,
            'resolution',
            0,
            5.,
            1e-2,
            spinType=float,
            label='Resolution',
            controlWidth=80,
            alignment=Qt.AlignRight,
            callback=self._invalidate_partition,
        )  # type: gui.SpinBoxWFocusOut

        self.apply_button = gui.auto_commit(
            self.controlArea,
            self,
            'auto_commit',
            'Apply',
            box=False,
            commit=lambda: self.commit(force=True),
            callback=self.commit,
        )  # type: QWidget

    def _compute_pca_projection(self):
        if self.pca_projection is None and self.apply_pca:
            self.setStatusMessage('Computing PCA...')

            pca = PCA(n_components=self.pca_components, random_state=0)
            model = pca(self.data)
            self.pca_projection = model(self.data)

    def _compute_graph(self, progress_callback=None):
        if self.graph is None:
            self.setStatusMessage('Building graph...')

            data = self.pca_projection if self.apply_pca else self.data

            self.graph = table_to_graph(
                data,
                k_neighbours=self.k_neighbours,
                metric=METRICS[self.metric_idx][1],
                progress_callback=progress_callback,
            )

    def _compute_partition(self):
        if self.partition is None:
            self.setStatusMessage('Detecting communities...')
            self.setBlocking(True)

            partition = best_partition(self.graph, resolution=self.resolution)
            self.partition = np.fromiter(list(
                zip(*sorted(partition.items())))[1],
                                         dtype=int)

    def _processing_complete(self):
        self.setStatusMessage('')
        self.setBlocking(False)
        self.progressBarFinished()

    def _handle_exceptions(self, ex):
        self.Error.general_error(str(ex))

    def cancel(self):
        """Cancel any running jobs."""
        if self.__state == self.State.Running:
            assert self.__future is not None
            self.__future.cancel()
            self.__future = None

        self.__state = self.State.Pending

    def commit(self, force=False):
        self.Error.clear()
        # Kill any running jobs
        self.cancel()
        assert self.__state == self.State.Pending

        if self.data is None:
            return

        # We commit if auto_commit is on or when we force commit
        if not self.auto_commit and not force:
            return

        # Make sure the dataset is ok
        if np.any(np.isnan(self.data.X)):
            self.Error.data_has_nans()
            return

        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            return

        # Prepare the tasks to run
        queue = TaskQueue(parent=self)

        if self.pca_projection is None and self.apply_pca:
            queue.push(namespace(task=self._compute_pca_projection))

        if self.graph is None:
            queue.push(
                namespace(task=self._compute_graph, progress_callback=True))

        if self.partition is None:
            queue.push(namespace(task=self._compute_partition))

        # Prepare callbacks
        queue.on_progress.connect(lambda val: self.progressBarSet(100 * val))
        queue.on_complete.connect(self._processing_complete)
        queue.on_complete.connect(self._send_data)
        queue.on_exception.connect(self._handle_exceptions)

        # Run the task queue
        self.progressBarInit()
        self.setBlocking(True)
        self.__future = self.__executor.submit(queue.start)
        self.__state = self.State.Running

    def _send_data(self):
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_next_name(domain, 'Cluster'),
            values=[
                'C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))
            ])

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition
        self.Outputs.annotated_data.send(new_table)

        if Graph is not None:
            graph = Graph(self.graph)
            graph.set_items(new_table)
            self.Outputs.graph.send(graph)

    def _invalidate_pca_projection(self):
        self.pca_projection = None
        self._invalidate_graph()

    def _invalidate_graph(self):
        self.graph = None
        self._invalidate_partition()

    def _invalidate_partition(self):
        self.partition = None
        self.commit()

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.Error.clear()
        self.data = data
        self._invalidate_pca_projection()
        self.Outputs.annotated_data.send(None)
        if Graph is not None:
            self.Outputs.graph.send(None)
        self.openContext(self.data)

        if self.data is None:
            return

        # Can't have more PCA components than the number of attributes
        n_attrs = len(data.domain.attributes)
        self.pca_components_slider.setMaximum(min(_MAX_PCA_COMPONENTS,
                                                  n_attrs))
        self.pca_components_slider.setValue(
            min(_DEFAULT_PCA_COMPONENTS, n_attrs))
        # Can't have more k neighbours than there are data points
        self.k_neighbours_spin.setMaximum(min(_MAX_K_NEIGBOURS, len(data) - 1))
        self.k_neighbours_spin.setValue(
            min(_DEFAULT_K_NEIGHBOURS,
                len(data) - 1))

        self.commit()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Beispiel #28
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    search_indices = ContextSetting(
        [], exclude_metas=False)  # features included in search
    display_indices = ContextSetting(
        [], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    regexp_filter = ContextSetting("")

    selection = [0]  # TODO: DataHashContextHandler

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.corpus_docs = None  # Documents generated from Corpus
        self.output_mask = []  # Output corpus indices
        self.doc_webview = None  # WebView for showing content
        self.search_features = [
        ]  # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea,
            self,
            'search_indices',
            'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features',
            callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box,
            self,
            'display_list_indices',
            'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs,
            enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box,
                                                 self,
                                                 'show_tokens',
                                                 'Show Tokens && Tags',
                                                 callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data',
                        'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea,
                                         self,
                                         'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(
                                             QSizePolicy.MinimumExpanding,
                                             QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selection = [0]
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        search_keyword = self.regexp_filter.strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(
                zip(self.corpus, self.corpus.titles, self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

    def reset_selection(self):
        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')

    def set_selection(self):
        view = self.doc_list
        if len(self.selection):
            selection = QItemSelection()

            for row in self.selection:
                selection.append(
                    QItemSelectionRange(view.model().index(row, 0),
                                        view.model().index(row, 0)))
            view.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}
        
        .content {{
            /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */
        
            /* These are technically the same, but use both */
            overflow-wrap: break-word;
            word-wrap: break-word;
        
            -ms-word-break: break-all;
            /* This is the dangerous one in WebKit, as it breaks things wherever */
            word-break: break-all;
            /* Instead use this non-standard one: */
            word-break: break-word;
        
            /* Adds a hyphen where the word breaks, if supported (No Blink) */
            -ms-hyphens: auto;
            -moz-hyphens: auto;
            -webkit-hyphens: auto;
            hyphens: auto;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [
            f for i, f in enumerate(self.search_features)
            if i in self.search_indices
        ]

        html = '<table>'
        selection = [
            i.row() for i in self.doc_list.selectionModel().selectedRows()
        ]
        if selection != []:
            self.selection = selection
        for doc_count, index in enumerate(
                self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:  # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                value = str(index.data(Qt.UserRole)[feature.name])
                if feature in marked_search_features:
                    value = self.__mark_text(value)
                value = value.replace('\n', '<br/>')
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td class="content">{}</td></tr>'.format(
                    feature.name, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def __mark_text(self, text):
        search_keyword = self.regexp_filter.strip('|')
        if not search_keyword:
            return text

        try:
            reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
        except sre_constants.error:
            return text

        matches = list(reg.finditer(text))
        if not matches:
            return text

        text = list(text)
        for m in matches[::-1]:
            text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
                .format("".join(text[m.start():m.end()])))

        return "".join(text)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.reset_selection()
            self.update_info()
            self.commit()

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(),
                                             self.n_documents)
            self.n_tokens = sum(
                map(len,
                    self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(
                self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
        ))
Beispiel #29
0
class OWScatterPlotGraph(gui.OWComponent, ScaleScatterPlotData):
    attr_color = ContextSetting("", ContextSetting.OPTIONAL)
    attr_label = ContextSetting("", ContextSetting.OPTIONAL)
    attr_shape = ContextSetting("", ContextSetting.OPTIONAL)
    attr_size = ContextSetting("", ContextSetting.OPTIONAL)

    point_width = Setting(10)
    alpha_value = Setting(128)
    show_grid = Setting(False)
    show_legend = Setting(True)
    tooltip_shows_all = Setting(False)
    class_density = Setting(False)
    resolution = 256

    CurveSymbols = np.array("o x t + d s ?".split())
    MinShapeSize = 6
    DarkerValue = 120
    UnknownColor = (168, 50, 168)

    def __init__(self, scatter_widget, parent=None, _="None"):
        gui.OWComponent.__init__(self, scatter_widget)
        self.view_box = InteractiveViewBox(self)
        self.plot_widget = pg.PlotWidget(viewBox=self.view_box,
                                         parent=parent,
                                         background="w")
        self.plot_widget.getPlotItem().buttonsHidden = True
        self.plot_widget.setAntialiasing(True)
        self.plot_widget.sizeHint = lambda: QtCore.QSize(500, 500)

        self.replot = self.plot_widget.replot
        ScaleScatterPlotData.__init__(self)
        self.density_img = None
        self.scatterplot_item = None
        self.scatterplot_item_sel = None

        self.labels = []

        self.master = scatter_widget
        self.shown_attribute_indices = []
        self.shown_x = ""
        self.shown_y = ""
        self.pen_colors = self.brush_colors = None

        self.valid_data = None  # np.ndarray
        self.selection = None  # np.ndarray
        self.n_points = 0

        self.gui = OWPlotGUI(self)
        self.continuous_palette = ContinuousPaletteGenerator(
            QColor(255, 255, 0), QColor(0, 0, 255), True)
        self.discrete_palette = ColorPaletteGenerator()

        self.selection_behavior = 0

        self.legend = self.color_legend = None
        self.__legend_anchor = (1, 0), (1, 0)
        self.__color_legend_anchor = (1, 1), (1, 1)

        self.scale = None  # DiscretizedScale

        self.subset_indices = None

        # self.setMouseTracking(True)
        # self.grabGesture(QPinchGesture)
        # self.grabGesture(QPanGesture)

        self.update_grid()

        self._tooltip_delegate = HelpEventDelegate(self.help_event)
        self.plot_widget.scene().installEventFilter(self._tooltip_delegate)

    def new_data(self, data, subset_data=None, **args):
        self.plot_widget.clear()

        self.density_img = None
        self.scatterplot_item = None
        self.scatterplot_item_sel = None
        self.labels = []
        self.selection = None
        self.valid_data = None

        self.subset_indices = set(
            e.id for e in subset_data) if subset_data else None

        self.set_data(data, **args)

    def update_data(self, attr_x, attr_y, reset_view=True):
        self.shown_x = attr_x
        self.shown_y = attr_y

        self.remove_legend()
        if self.density_img:
            self.plot_widget.removeItem(self.density_img)
            self.density_img = None
        if self.scatterplot_item:
            self.plot_widget.removeItem(self.scatterplot_item)
            self.scatterplot_item = None
        if self.scatterplot_item_sel:
            self.plot_widget.removeItem(self.scatterplot_item_sel)
            self.scatterplot_item_sel = None
        for label in self.labels:
            self.plot_widget.removeItem(label)
        self.labels = []
        self.set_axis_title("bottom", "")
        self.set_axis_title("left", "")

        if self.scaled_data is None or not len(self.scaled_data):
            self.valid_data = None
            self.selection = None
            self.n_points = 0
            return

        index_x = self.attribute_name_index[attr_x]
        index_y = self.attribute_name_index[attr_y]
        self.valid_data = self.get_valid_list([index_x, index_y],
                                              also_class_if_exists=False)
        x_data, y_data = self.get_xy_data_positions(attr_x, attr_y,
                                                    self.valid_data)
        self.n_points = len(x_data)

        if reset_view:
            min_x, max_x = np.nanmin(x_data), np.nanmax(x_data)
            min_y, max_y = np.nanmin(y_data), np.nanmax(y_data)
            self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x,
                                          max_y - min_y),
                                   padding=0.025)
            self.view_box.init_history()
            self.view_box.tag_history()
        [min_x, max_x], [min_y, max_y] = self.view_box.viewRange()

        for axis, name, index in (("bottom", attr_x, index_x), ("left", attr_y,
                                                                index_y)):
            self.set_axis_title(axis, name)
            var = self.data_domain[index]
            if var.is_discrete:
                self.set_labels(axis, get_variable_values_sorted(var))
            else:
                self.set_labels(axis, None)

        color_data, brush_data = self.compute_colors()
        color_data_sel, brush_data_sel = self.compute_colors_sel()
        size_data = self.compute_sizes()
        shape_data = self.compute_symbols()

        if self.should_draw_density():
            rgb_data = [pen.color().getRgb()[:3] for pen in color_data]
            self.density_img = classdensity.class_density_image(
                min_x, max_x, min_y, max_y, self.resolution, x_data, y_data,
                rgb_data)
            self.plot_widget.addItem(self.density_img)

        data_indices = np.flatnonzero(self.valid_data)
        self.scatterplot_item = ScatterPlotItem(x=x_data,
                                                y=y_data,
                                                data=data_indices,
                                                symbol=shape_data,
                                                size=size_data,
                                                pen=color_data,
                                                brush=brush_data)
        self.scatterplot_item_sel = ScatterPlotItem(x=x_data,
                                                    y=y_data,
                                                    data=data_indices,
                                                    symbol=shape_data,
                                                    size=size_data +
                                                    SELECTION_WIDTH,
                                                    pen=color_data_sel,
                                                    brush=brush_data_sel)
        self.plot_widget.addItem(self.scatterplot_item_sel)
        self.plot_widget.addItem(self.scatterplot_item)

        self.scatterplot_item.selected_points = []
        self.scatterplot_item.sigClicked.connect(self.select_by_click)

        self.update_labels()
        self.make_legend()
        self.plot_widget.replot()

    def can_draw_density(self):
        if self.data_domain is None:
            return False
        discrete_color = False
        attr_color = self.attr_color
        if attr_color != "" and attr_color != "(Same color)":
            color_var = self.data_domain[attr_color]
            discrete_color = color_var.is_discrete
        continuous_x = False
        continuous_y = False
        if self.shown_x and self.shown_y:
            continuous_x = self.data_domain[self.shown_x].is_continuous
            continuous_y = self.data_domain[self.shown_y].is_continuous
        return discrete_color and continuous_x and continuous_y

    def should_draw_density(self):
        return self.class_density and self.n_points > 1 and self.can_draw_density(
        )

    def set_labels(self, axis, labels):
        axis = self.plot_widget.getAxis(axis)
        if labels:
            ticks = [[(i, labels[i]) for i in range(len(labels))]]
            axis.setTicks(ticks)
        else:
            axis.setTicks(None)

    def set_axis_title(self, axis, title):
        self.plot_widget.setLabel(axis=axis, text=title)

    def get_size_index(self):
        size_index = -1
        attr_size = self.attr_size
        if attr_size != "" and attr_size != "(Same size)":
            size_index = self.attribute_name_index[attr_size]
        return size_index

    def compute_sizes(self):
        size_index = self.get_size_index()
        if size_index == -1:
            size_data = np.full((self.n_points, ), self.point_width)
        else:
            size_data = \
                self.MinShapeSize + \
                self.no_jittering_scaled_data[size_index, self.valid_data] * self.point_width
        size_data[np.isnan(size_data)] = self.MinShapeSize - 2
        return size_data

    def update_sizes(self):
        if self.scatterplot_item:
            size_data = self.compute_sizes()
            self.scatterplot_item.setSize(size_data)
            self.scatterplot_item_sel.setSize(size_data + SELECTION_WIDTH)

    update_point_size = update_sizes

    def get_color_index(self):
        color_index = -1
        attr_color = self.attr_color
        if attr_color != "" and attr_color != "(Same color)":
            color_index = self.attribute_name_index[attr_color]
            color_var = self.data_domain[attr_color]
            if color_var.is_discrete:
                self.discrete_palette.set_number_of_colors(
                    len(color_var.values))
        return color_index

    def compute_colors_sel(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors_sel = self.brush_colors_sel = None

        def make_pen(color, width):
            p = QPen(color, width)
            p.setCosmetic(True)
            return p

        pens = [
            QPen(Qt.NoPen),
            make_pen(QColor(255, 190, 0, 255), SELECTION_WIDTH + 1.)
        ]
        if self.selection is not None:
            pen = [pens[a] for a in self.selection[self.valid_data]]
        else:
            pen = [pens[0]] * self.n_points
        brush = [QBrush(QColor(255, 255, 255, 0))] * self.n_points
        return pen, brush

    def compute_colors(self, keep_colors=False):
        if not keep_colors:
            self.pen_colors = self.brush_colors = None
        color_index = self.get_color_index()

        def make_pen(color, width):
            p = QPen(color, width)
            p.setCosmetic(True)
            return p

        subset = None
        if self.subset_indices:
            subset = np.array([
                ex.id in self.subset_indices
                for ex in self.raw_data[self.valid_data]
            ])

        if color_index == -1:  #color = "Same color"
            color = self.plot_widget.palette().color(OWPalette.Data)
            pen = [make_pen(color, 1.5)] * self.n_points
            if subset is not None:
                brush = [(QBrush(QColor(128, 128, 128, 0)),
                          QBrush(QColor(128, 128, 128, self.alpha_value)))[s]
                         for s in subset]
            else:
                brush = [QBrush(QColor(128, 128, 128))] * self.n_points
            return pen, brush

        c_data = self.original_data[color_index, self.valid_data]
        if self.data_domain[color_index].is_continuous:
            if self.pen_colors is None:
                self.scale = DiscretizedScale(np.nanmin(c_data),
                                              np.nanmax(c_data))
                c_data -= self.scale.offset
                c_data /= self.scale.width
                c_data = np.floor(c_data) + 0.5
                c_data /= self.scale.bins
                c_data = np.clip(c_data, 0, 1)
                palette = self.continuous_palette
                self.pen_colors = palette.getRGB(c_data)
                self.brush_colors = np.hstack([
                    self.pen_colors,
                    np.full((self.n_points, 1), self.alpha_value)
                ])
                self.pen_colors *= 100 / self.DarkerValue
                self.pen_colors = [
                    make_pen(QColor(*col), 1.5)
                    for col in self.pen_colors.tolist()
                ]
            if subset is not None:
                self.brush_colors[:, 3] = 0
                self.brush_colors[subset, 3] = self.alpha_value
            else:
                self.brush_colors[:, 3] = self.alpha_value
            pen = self.pen_colors
            brush = np.array(
                [QBrush(QColor(*col)) for col in self.brush_colors.tolist()])
        else:
            if self.pen_colors is None:
                palette = self.discrete_palette
                n_colors = palette.number_of_colors
                c_data = c_data.copy()
                c_data[np.isnan(c_data)] = n_colors
                c_data = c_data.astype(int)
                colors = np.r_[palette.getRGB(np.arange(n_colors)),
                               [[128, 128, 128]]]
                pens = np.array([
                    make_pen(QColor(*col).darker(self.DarkerValue), 1.5)
                    for col in colors
                ])
                self.pen_colors = pens[c_data]
                self.brush_colors = np.array([[
                    QBrush(QColor(0, 0, 0, 0)),
                    QBrush(QColor(col[0], col[1], col[2], self.alpha_value))
                ] for col in colors])
                self.brush_colors = self.brush_colors[c_data]
            if subset is not None:
                brush = np.where(subset, self.brush_colors[:, 1],
                                 self.brush_colors[:, 0])
            else:
                brush = self.brush_colors[:, 1]
            pen = self.pen_colors
        return pen, brush

    def update_colors(self, keep_colors=False):
        if self.scatterplot_item:
            pen_data, brush_data = self.compute_colors(keep_colors)
            pen_data_sel, brush_data_sel = self.compute_colors_sel(keep_colors)
            self.scatterplot_item.setPen(pen_data, update=False, mask=None)
            self.scatterplot_item.setBrush(brush_data, mask=None)
            self.scatterplot_item_sel.setPen(pen_data_sel,
                                             update=False,
                                             mask=None)
            self.scatterplot_item_sel.setBrush(brush_data_sel, mask=None)
            if not keep_colors:
                self.make_legend()

                if self.should_draw_density():
                    self.update_data(self.shown_x, self.shown_y)
                elif self.density_img:
                    self.plot_widget.removeItem(self.density_img)

    update_alpha_value = update_colors

    def create_labels(self):
        for x, y in zip(*self.scatterplot_item.getData()):
            ti = TextItem()
            self.plot_widget.addItem(ti)
            ti.setPos(x, y)
            self.labels.append(ti)

    def update_labels(self):
        if not self.attr_label:
            for label in self.labels:
                label.setText("")
            return
        if not self.labels:
            self.create_labels()
        label_column = self.raw_data.get_column_view(self.attr_label)[0]
        formatter = self.raw_data.domain[self.attr_label].str_val
        label_data = map(formatter, label_column)
        black = pg.mkColor(0, 0, 0)
        for label, text in zip(self.labels, label_data):
            label.setText(text, black)

    def get_shape_index(self):
        shape_index = -1
        attr_shape = self.attr_shape
        if attr_shape and attr_shape != "(Same shape)" and \
                len(self.data_domain[attr_shape].values) <= \
                len(self.CurveSymbols):
            shape_index = self.attribute_name_index[attr_shape]
        return shape_index

    def compute_symbols(self):
        shape_index = self.get_shape_index()
        if shape_index == -1:
            shape_data = self.CurveSymbols[np.zeros(self.n_points, dtype=int)]
        else:
            shape_data = self.original_data[shape_index, self.valid_data]
            shape_data[np.isnan(shape_data)] = len(self.CurveSymbols) - 1
            shape_data = self.CurveSymbols[shape_data.astype(int)]
        return shape_data

    def update_shapes(self):
        if self.scatterplot_item:
            shape_data = self.compute_symbols()
            self.scatterplot_item.setSymbol(shape_data)
        self.make_legend()

    def update_grid(self):
        self.plot_widget.showGrid(x=self.show_grid, y=self.show_grid)

    def update_legend(self):
        if self.legend:
            self.legend.setVisible(self.show_legend)

    def create_legend(self):
        self.legend = LegendItem()
        self.legend.setParentItem(self.plot_widget.getViewBox())
        self.legend.anchor(*self.__legend_anchor)

    def remove_legend(self):
        if self.legend:
            anchor = legend_anchor_pos(self.legend)
            if anchor is not None:
                self.__legend_anchor = anchor
            self.legend.setParent(None)
            self.legend = None
        if self.color_legend:
            anchor = legend_anchor_pos(self.color_legend)
            if anchor is not None:
                self.__color_legend_anchor = anchor
            self.color_legend.setParent(None)
            self.color_legend = None

    def make_legend(self):
        self.remove_legend()
        self.make_color_legend()
        self.make_shape_legend()
        self.update_legend()

    def make_color_legend(self):
        color_index = self.get_color_index()
        if color_index == -1:
            return
        color_var = self.data_domain[color_index]
        use_shape = self.get_shape_index() == color_index
        if color_var.is_discrete:
            if not self.legend:
                self.create_legend()
            palette = self.discrete_palette
            for i, value in enumerate(color_var.values):
                color = QColor(*palette.getRGB(i))
                brush = color.lighter(self.DarkerValue)
                self.legend.addItem(
                    ScatterPlotItem(
                        pen=color,
                        brush=brush,
                        size=10,
                        symbol=self.CurveSymbols[i] if use_shape else "o"),
                    escape(value))
        else:
            legend = self.color_legend = LegendItem()
            legend.setParentItem(self.plot_widget.getViewBox())
            legend.anchor(*self.__color_legend_anchor)

            label = PaletteItemSample(self.continuous_palette, self.scale)
            legend.addItem(label, "")
            legend.setGeometry(label.boundingRect())

    def make_shape_legend(self):
        shape_index = self.get_shape_index()
        if shape_index == -1 or shape_index == self.get_color_index():
            return
        if not self.legend:
            self.create_legend()
        shape_var = self.data_domain[shape_index]
        color = self.plot_widget.palette().color(OWPalette.Data)
        pen = QPen(color.darker(self.DarkerValue))
        color.setAlpha(self.alpha_value)
        for i, value in enumerate(shape_var.values):
            self.legend.addItem(
                ScatterPlotItem(pen=pen,
                                brush=color,
                                size=10,
                                symbol=self.CurveSymbols[i]), escape(value))

    def zoom_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def pan_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().PanMode)

    def select_button_clicked(self):
        self.plot_widget.getViewBox().setMouseMode(
            self.plot_widget.getViewBox().RectMode)

    def reset_button_clicked(self):
        self.update_data(self.shown_x, self.shown_y,
                         reset_view=True)  # also redraw density image
        # self.view_box.autoRange()

    def select_by_click(self, _, points):
        if self.scatterplot_item is not None:
            self.select(points)

    def select_by_rectangle(self, value_rect):
        if self.scatterplot_item is not None:
            points = [
                point for point in self.scatterplot_item.points()
                if value_rect.contains(QPointF(point.pos()))
            ]
            self.select(points)

    def unselect_all(self):
        self.selection = None
        self.update_colors(keep_colors=True)
        self.master.selection_changed()

    def select(self, points):
        # noinspection PyArgumentList
        if self.raw_data is None:
            return
        keys = QApplication.keyboardModifiers()
        if self.selection is None or not keys & (
                Qt.ShiftModifier + Qt.ControlModifier + Qt.AltModifier):
            self.selection = np.full(len(self.raw_data), False, dtype=np.bool)
        indices = [p.data() for p in points]
        if keys & Qt.AltModifier:
            self.selection[indices] = False
        elif keys & Qt.ControlModifier:
            self.selection[indices] = ~self.selection[indices]
        else:  # Handle shift and no modifiers
            self.selection[indices] = True
        self.update_colors(keep_colors=True)
        self.master.selection_changed()

    def get_selection(self):
        if self.selection is None:
            return np.array([], dtype=int)
        else:
            return np.flatnonzero(self.selection)

    def set_palette(self, p):
        self.plot_widget.setPalette(p)

    def save_to_file(self, size):
        pass

    def help_event(self, event):
        if self.scatterplot_item is None:
            return False

        act_pos = self.scatterplot_item.mapFromScene(event.scenePos())
        points = self.scatterplot_item.pointsAt(act_pos)
        text = ""
        if len(points):
            for i, p in enumerate(points):
                index = p.data()
                text += "Attributes:\n"
                if self.tooltip_shows_all and \
                        len(self.data_domain.attributes) < 30:
                    text += "".join('   {} = {}\n'.format(
                        attr.name, self.raw_data[index][attr])
                                    for attr in self.data_domain.attributes)
                else:
                    text += '   {} = {}\n   {} = {}\n'.format(
                        self.shown_x, self.raw_data[index][self.shown_x],
                        self.shown_y, self.raw_data[index][self.shown_y])
                    if self.tooltip_shows_all:
                        text += "   ... and {} others\n\n".format(
                            len(self.data_domain.attributes) - 2)
                if self.data_domain.class_var:
                    text += 'Class:\n   {} = {}\n'.format(
                        self.data_domain.class_var.name,
                        self.raw_data[index][self.raw_data.domain.class_var])
                if i < len(points) - 1:
                    text += '------------------\n'

            text = ('<span style="white-space:pre">{}</span>'.format(
                escape(text)))

            QToolTip.showText(event.screenPos(), text, widget=self.plot_widget)
            return True
        else:
            return False
Beispiel #30
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        view = gui.listView(self.controlArea,
                            self,
                            "attribute",
                            box="Variable",
                            model=self.attrs,
                            callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(
            view.box,
            self,
            "order_by_importance",
            "Order by relevance",
            tooltip="Order by 𝜒² or ANOVA over the subgroups",
            callback=self.apply_sorting)
        self.group_vars = DomainModel(placeholder="None",
                                      separators=False,
                                      valid_types=Orange.data.DiscreteVariable)
        self.group_view = view = gui.listView(self.controlArea,
                                              self,
                                              "group_var",
                                              box="Subgroups",
                                              model=self.group_vars,
                                              callback=self.grouping_changed)
        view.setEnabled(False)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(self.controlArea,
                                    "Display",
                                    sizePolicy=(QSizePolicy.Minimum,
                                                QSizePolicy.Maximum),
                                    addSpace=False)

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(self.controlArea,
                                             box="Display",
                                             sizePolicy=(QSizePolicy.Minimum,
                                                         QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(box,
                     self,
                     'stretched',
                     "Stretch bars",
                     callback=self.display_changed)
        gui.checkBox(box,
                     self,
                     'show_labels',
                     "Show box labels",
                     callback=self.display_changed)
        gui.rubber(box)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(600)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(100, 500)  # Vertical size is regulated by mainArea

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    def reset_attrs(self, domain):
        self.attrs[:] = [
            var for var in chain(domain.class_vars, domain.metas,
                                 domain.attributes) if var.is_primitive()
        ]

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.reset_attrs(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(),
                              f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.clear()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(
                    box.filter.conditions in [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(dataset, attr,
                                                     self.group_var)
            if self.is_continuous:
                self.stats = [
                    BoxData(cont, attr, i, self.group_var)
                    for i, cont in enumerate(self.conts) if np.sum(cont) > 0
                ]
            self.label_txts_all = \
                [v for v, c in zip(self.group_var.values, self.conts)
                 if np.sum(c) > 0]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.mean_labels = [
            self.mean_label(stat, attr, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, attr, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width,
                   len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts if np.sum(cont) > 0
                ]
            else:
                self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))]

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = \
                [self.strudel(cont, i) for i, cont in enumerate(self.conts)
                 if np.sum(cont) > 0]
            self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0]
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box in enumerate(self.boxes):
            y = (-len(self.boxes) + row) * 40 + 10
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, row)
            if not self.stretched:
                self.__draw_row_counts(y, row)
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, row):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.labels[row]
        b = label.boundingRect()
        if self.group_var:
            right = self.scale_x * sum(self.conts[row])
        else:
            right = self.scale_x * sum(self.dist)
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        label = bar_part = None
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n == 0 or d2.n == 0:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            if pooled_var == 0:
                return np.nan, np.nan
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        decimals = max(3, 4 - int(math.log10(step)))
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(
                repr(round(val, decimals)) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        if self.stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x,
                                          -height / 2,
                                          (stat.q75 - stat.q25) * scale_x,
                                          height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2, stat.median,
                               height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [
            item.filter for item in self.box_scene.selectedItems()
            if item.filter
        ]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(self.dataset.ids,
                                selected.ids,
                                assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if last_to == to or frm == to:
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        if self.attribute:
            return self.attribute.name

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)