class OWSieveDiagram(OWWidget): name = "Sieve Diagram" description = "Visualize the observed and expected frequencies " \ "for a combination of values." icon = "icons/SieveDiagram.svg" priority = 200 keywords = [] class Inputs: data = Input("Data", Table, default=True) features = Input("Features", AttributeList) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) graph_name = "canvas" want_control_area = False settings_version = 1 settingsHandler = DomainContextHandler() attr_x = ContextSetting(None) attr_y = ContextSetting(None) selection = ContextSetting(set()) xy_changed_manually = Signal(Variable, Variable) def __init__(self): # pylint: disable=missing-docstring super().__init__() = self.discrete_data = None self.attrs = [] self.input_features = None self.areas = [] self.selection = set() self.attr_box = gui.hBox(self.mainArea) self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE) combo_args = dict(widget=self.attr_box, master=self, contentsLength=12, callback=self.attr_changed, sendSelectedValue=True, valueType=str, model=self.domain_model) fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) gui.comboBox(value="attr_x", **combo_args) gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) gui.comboBox(value="attr_y", **combo_args) self.vizrank, self.vizrank_button = SieveRank.add_vizrank( self.attr_box, self, "Score Combinations", self.set_attr) self.vizrank_button.setSizePolicy(*fixed_size) self.canvas = QGraphicsScene() self.canvasView = ViewWithPress(self.canvas, self.mainArea, handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) def sizeHint(self): return QSize(450, 550) def resizeEvent(self, event): super().resizeEvent(event) self.update_graph() def showEvent(self, event): super().showEvent(event) self.update_graph() @classmethod def migrate_context(cls, context, version): if not version: settings.rename_setting(context, "attrX", "attr_x") settings.rename_setting(context, "attrY", "attr_y") settings.migrate_str_to_variable(context) def set_data(self, data): """ Discretize continuous attributes, and put all attributes and discrete metas into self.attrs. Select the first two attributes unless context overrides this. Method `resolve_shown_attributes` is called to use the attributes from the input, if it exists and matches the attributes in the data. Remove selection; again let the context override this. Initialize the vizrank dialog, but don't show it. Args: data (Table): input data """ if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.areas = [] self.selection = set() if is None: self.attrs[:] = [] self.domain_model.set_domain(None) self.discrete_data = None else: self.domain_model.set_domain(data.domain) self.attrs = [x for x in self.domain_model if isinstance(x, Variable)] if self.attrs: self.attr_x = self.attrs[0] self.attr_y = self.attrs[len(self.attrs) > 1] else: self.attr_x = self.attr_y = None self.areas = [] self.selection = set() self.openContext( if self.discrete_data = self.sparse_to_dense(data, True) self.resolve_shown_attributes() self.update_graph() self.update_selection() self.vizrank.initialize() self.vizrank_button.setEnabled( is not None and len( > 1 and len( > 1 and not def set_attr(self, attr_x, attr_y): self.attr_x, self.attr_y = attr_x, attr_y self.update_attr() def attr_changed(self): self.update_attr() self.xy_changed_manually.emit(self.attr_x, self.attr_y) def update_attr(self): """Update the graph and selection.""" self.selection = set() self.discrete_data = self.sparse_to_dense( self.update_graph() self.update_selection() def sparse_to_dense(self, data, init=False): """ Extracts two selected columns from sparse matrix. GH-2260 """ def discretizer(data): if any(attr.is_continuous for attr in chain( data.domain.variables, data.domain.metas)): discretize = Discretize(method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True) return discretize(data).to_dense() return data if not data.is_sparse() and not init: return self.discrete_data if data.is_sparse(): attrs = {self.attr_x, self.attr_y} new_domain = data.domain.select_columns(attrs) data = Table.from_table(new_domain, data) return discretizer(data) @Inputs.features def set_input_features(self, attr_list): """ Handler for the Features signal. The method stores the attributes and calls `resolve_shown_attributes` Args: attr_list (AttributeList): data from the signal """ self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): """ Use the attributes from the input signal if the signal is present and at least two attributes appear in the domain. If there are multiple, use the first two. Combos are disabled if inputs are used. """ self.warning() self.attr_box.setEnabled(True) self.vizrank.setEnabled(True) if not self.input_features: # None or empty return features = [f for f in self.input_features if f in self.domain_model] if not features: self.warning( "Features from the input signal are not present in the data") return old_attrs = self.attr_x, self.attr_y self.attr_x, self.attr_y = [f for f in (features * 2)[:2]] self.attr_box.setEnabled(False) self.vizrank.setEnabled(False) if (self.attr_x, self.attr_y) != old_attrs: self.selection = set() self.update_graph() def reset_selection(self): self.selection = set() self.update_selection() def select_area(self, area, event): """ Add or remove the clicked area from the selection Args: area (QRect): the area that is clicked event (QEvent): event description """ if event.button() != Qt.LeftButton: return index = self.areas.index(area) if event.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection() def update_selection(self): """ Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(, [])) return filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filts.append( filter.Values([ filter.FilterDiscrete(, [val_x]), filter.FilterDiscrete(, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filts) == 1: filts = filts[0] else: filts = filter.Values(filts, conjunction=False) selection = filts(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(, sel_idx)) def update_graph(self): # Function uses weird names like r, g, b, but it does it with utmost # caution, hence # pylint: disable=invalid-name """Update the graph.""" def text(txt, *args, **kwargs): text = html_text = None if "max_width" in kwargs: text = txt else: html_text = to_html(txt) return CanvasText(self.canvas, text, html_text=html_text, *args, **kwargs) def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() def height(txt): return text(txt, 0, 0, show=False).boundingRect().height() def fmt(val): return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) def show_pearson(rect, pearson, pen_width): """ Color the given rectangle according to its corresponding standardized Pearson residual. Args: rect (QRect): the rectangle being drawn pearson (float): signed standardized pearson residual pen_width (int): pen width (bolder pen is used for selection) """ r = rect.rect() x, y, w, h = r.x(), r.y(), r.width(), r.height() if w == 0 or h == 0: return r = b = 255 if pearson > 0: r = g = max(255 - 20 * pearson, 55) elif pearson < 0: b = g = max(255 + 20 * pearson, 55) else: r = g = b = 224 rect.setBrush(QBrush(QColor(r, g, b))) pen_color = QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)) pen = QPen(pen_color, pen_width) rect.setPen(pen) if pearson > 0: pearson = min(pearson, 10) dist = 20 - 1.6 * pearson else: pearson = max(pearson, -10) dist = 20 - 8 * pearson pen.setWidth(1) def _offseted_line(ax, ay): r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) self.canvas.addItem(r) r.setPen(pen) ax = dist while ax < w: _offseted_line(ax, 0) ax += dist ay = dist while ay < h: _offseted_line(0, ay) ay += dist def make_tooltip(): """Create the tooltip. The function uses local variables from the enclosing scope.""" # pylint: disable=undefined-loop-variable def _oper(attr, txt): if[] == ddomain[]: return " = " return " " if txt[0] in "<≥" else " in " xt, yt = [ "<b>{attr}{eq}{val_name}</b>: {obs}/{n} ({p:.0f} %)".format( attr=to_html(, eq=_oper(attr, val_name), val_name=to_html(val_name), obs=fmt(prob * n), n=int(n), p=100 * prob) for attr, val_name, prob in [( attr_x, xval_name, chi.probs_x[x]), (attr_y, yval_name, chi.probs_y[y])] ] ct = """<b>combination of values: </b><br/> expected {exp} ({p_exp:.0f} %)<br/> observed {obs} ({p_obs:.0f} %)""".format( exp=fmt(chi.expected[y, x]), p_exp=100 * chi.expected[y, x] / n, obs=fmt(chi.observed[y, x]), p_obs=100 * chi.observed[y, x] / n) return f"{xt}<br/>{yt}<hr/>{ct}" for item in self.canvas.items(): self.canvas.removeItem(item) if is None or len( == 0 or \ self.attr_x is None or self.attr_y is None: return ddomain = self.discrete_data.domain attr_x, attr_y = self.attr_x, self.attr_y disc_x, disc_y = ddomain[], ddomain[] view = self.canvasView chi = ChiSqStats(self.discrete_data, disc_x, disc_y) max_ylabel_w = max((width(val) for val in disc_y.values), default=0) max_ylabel_w = min(max_ylabel_w, 200) x_off = height( + max_ylabel_w y_off = 15 square_size = min(view.width() - x_off - 35, view.height() - y_off - 80) square_size = max(square_size, 10) self.canvasView.setSceneRect(0, 0, view.width(), view.height()) if not disc_x.values or not disc_y.values: text_ = "Features {} and {} have no values".format(disc_x, disc_y) \ if not disc_x.values and \ not disc_y.values and \ disc_x != disc_y \ else \ "Feature {} has no values".format( disc_x if not disc_x.values else disc_y) text(text_, view.width() / 2 + 70, view.height() / 2, Qt.AlignRight | Qt.AlignVCenter) return n = chi.n curr_x = x_off max_xlabel_h = 0 self.areas = [] for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)): if px == 0: continue width = square_size * px curr_y = y_off for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order py = chi.probs_y[y] yval_name = disc_y.values[y] if py == 0: continue height = square_size * py selected = len(self.areas) in self.selection rect = CanvasRectangle(self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = x, y self.areas.append(rect) show_pearson(rect, chi.residuals[y, x], 3 * selected) rect.setToolTip(make_tooltip()) if x == 0: text(yval_name, x_off, curr_y + height / 2, Qt.AlignRight | Qt.AlignVCenter) curr_y += height xl = text(xval_name, curr_x + width / 2, y_off + square_size, Qt.AlignHCenter | Qt.AlignTop, max_width=width) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) curr_x += width bottom = y_off + square_size + max_xlabel_h text(, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True) text(, x_off + square_size / 2, bottom, Qt.AlignHCenter | Qt.AlignTop, bold=True) bottom += 30 xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom) # Assume similar height for both lines text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height()) def get_widget_name_extension(self): if is not None: return "{} vs {}".format(, return None def send_report(self): self.report_plot()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1: Variable = ContextSetting(None) variable2: Variable = ContextSetting(None) variable3: Variable = ContextSetting(None) variable4: Variable = ContextSetting(None) variable_color: DiscreteVariable = ContextSetting(None) selection = Setting(set(), schema_only=True) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [ QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255) ] RED_COLORS = [ QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0) ] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.__pending_selection = self.selection self.selection = set() self.color_data = None self.areas = [] self.canvas = QGraphicsScene(self) self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox(box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, searchable=True, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5) ] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox(box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, searchable=True, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox(box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any( attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize(method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos( if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = # None is OK, too def get_disc_attr_list(self): return [ self.discrete_data.domain[] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var ] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and[] for attr in attrs ] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( is not None and len( > 1 and len( >= 1) if is None: self.discrete_data = None self.init_combos(None) return self.init_combos( self.openContext( @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if is not None and self.subset_data: transformed = self.subset_data.transform( if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = { for e in transformed} self.subset_indices = [ in indices for ex in] if is not None and self.__pending_selection is not None: self.selection = self.__pending_selection self.__pending_selection = None else: self.selection = set() self.set_color_data() self.update_graph() self.send_selection() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data =, self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not if isinstance(, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [ conditionaldict[attr_vals + "-" + val] for val in values ] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition,, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [ Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter ] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) rwidth = width * perc xs = [ x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc ] CanvasText(self.canvas, val, xs[side], ys[side], align, max_width=rwidth if side == 0 else None) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [ x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2 ] CanvasText(self.canvas,, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle(self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle(self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [ conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior)) ] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def create_legend(): if self.variable_color is None: names = [ "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:" ] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] edges = repeat( else: names = get_variable_values_sorted(class_var) edges = colors = [QColor(*col) for col in class_var.colors] items = [] size = 8 for name, color, edgecolor in zip(names, colors, edges): item = QGraphicsItemGroup() item.addToGroup( CanvasRectangle(None, -size / 2, -size / 2, size, size, edgecolor, color)) item.addToGroup( CanvasText(None, name, size, 0, Qt.AlignVCenter)) items.append(item) return wrap_legend_items(items, hspacing=20, vspacing=16 + size, max_width=self.canvas_view.width() - xoff) self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var # TODO: check this # data = Preprocessor_dropMissing(data) unique = [ for v in set(attr_list + [class_var]) if v] if len(data[:, unique]) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [ get_distribution(data, attr) for attr in attr_list ] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw xoff = 20 # get the maximum width of rectangle width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 legend = create_legend() # get the maximum height of rectangle yoff = 45 legendoff = yoff + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 square_size = min( self.canvas_view.width() - width - 20, self.canvas_view.height() - legendoff - legend.boundingRect().height()) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data(attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) self.canvas.addItem(legend) legend.setPos( xoff - legend.boundingRect().x() + max(0, (square_size - legend.boundingRect().width()) / 2), legendoff + square_size) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 inputs = [("Data", Table, "set_data", Default), ("Data Subset", Table, "set_subset_data")] outputs = [("Selected Data", Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Table)] settingsHandler = DomainContextHandler() use_boxes = Setting(True) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) # interior_coloring is context setting to properly reset it # if the widget switches to regression and back (set setData) interior_coloring = ContextSetting(1) PEARSON, CLASS_DISTRIBUTION = 0, 1 interior_coloring_opts = ["Pearson residuals", "Class distribution"] BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str) for i in range(1, 5)] self.rb_colors = gui.radioButtonsInBox( self.controlArea, self, "interior_coloring", self.interior_coloring_opts, box="Interior Coloring", callback=self.update_graph) self.bar_button = gui.checkBox( gui.indentedBox(self.rb_colors), self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(530, 720) def _compare_with_total(self): if and self.interior_coloring = 1 self.update_graph() def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_discrete or attr.is_continuous: for combo in self.attr_combos: combo.addItem(icons[attr], if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.init_combos( if not self.discrete_data = None return if any(attr.is_continuous for attr in data.domain): self.discrete_data = Discretize(method=EqualFreq(n=4))(data) else: self.discrete_data = if is None: self.rb_colors.setDisabled(True) disc_class = False else: self.rb_colors.setDisabled(False) disc_class = disc_class) self.bar_button.setDisabled(not disc_class) self.interior_coloring = bool(disc_class) self.openContext( # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if is None: self.unprocessed_subset_data = data return try: self.subset_data = data.from_table(, data) except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def reset_graph(self): self.clear_selection() self.update_graph() def update_selection_rects(self): for i, (attr, vals, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or is None: self.send("Selected Data", None) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not if isinstance(, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, area = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.send("Selected Data", selection) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 if attr_vals == "": counts = [conditionaldict.get(val, 1) for val in values] else: counts = [conditionaldict.get(attr_vals + "-" + val, 1) for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i in range(len(values)): val = values[i] perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition="", used_attrs=[], used_vals=[], attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var and class_var.is_discrete: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori ) for cls, act, apr in zip(cls_values, actual, apriori )) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 y0, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [ + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list)) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class OWSieveDiagram(OWWidget): name = "Sieve Diagram" description = "Visualize the observed and expected frequencies " \ "for a combination of values." icon = "icons/SieveDiagram.svg" priority = 200 class Inputs: data = Input("Data", Table, default=True) features = Input("Features", AttributeList) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) graph_name = "canvas" want_control_area = False settings_version = 1 settingsHandler = DomainContextHandler() attr_x = ContextSetting(None) attr_y = ContextSetting(None) selection = ContextSetting(set()) def __init__(self): # pylint: disable=missing-docstring super().__init__() = self.discrete_data = None self.attrs = [] self.input_features = None self.areas = [] self.selection = set() self.attr_box = gui.hBox(self.mainArea) self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE) combo_args = dict( widget=self.attr_box, master=self, contentsLength=12, callback=self.update_attr, sendSelectedValue=True, valueType=str, model=self.domain_model) fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) gui.comboBox(value="attr_x", **combo_args) gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) gui.comboBox(value="attr_y", **combo_args) self.vizrank, self.vizrank_button = SieveRank.add_vizrank( self.attr_box, self, "Score Combinations", self.set_attr) self.vizrank_button.setSizePolicy(*fixed_size) self.canvas = QGraphicsScene() self.canvasView = ViewWithPress( self.canvas, self.mainArea, handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) def sizeHint(self): return QSize(450, 550) def resizeEvent(self, event): super().resizeEvent(event) self.update_graph() def showEvent(self, event): super().showEvent(event) self.update_graph() @classmethod def migrate_context(cls, context, version): if not version: settings.rename_setting(context, "attrX", "attr_x") settings.rename_setting(context, "attrY", "attr_y") settings.migrate_str_to_variable(context) def set_data(self, data): """ Discretize continuous attributes, and put all attributes and discrete metas into self.attrs. Select the first two attributes unless context overrides this. Method `resolve_shown_attributes` is called to use the attributes from the input, if it exists and matches the attributes in the data. Remove selection; again let the context override this. Initialize the vizrank dialog, but don't show it. Args: data (Table): input data """ if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.areas = [] self.selection = set() if is None: self.attrs[:] = [] self.domain_model.set_domain(None) self.discrete_data = None else: self.domain_model.set_domain(data.domain) self.attrs = [x for x in self.domain_model if isinstance(x, Variable)] if self.attrs: self.attr_x = self.attrs[0] self.attr_y = self.attrs[len(self.attrs) > 1] else: self.attr_x = self.attr_y = None self.areas = [] self.selection = set() self.openContext( if self.discrete_data = self.sparse_to_dense(data, True) self.resolve_shown_attributes() self.update_graph() self.update_selection() self.vizrank.initialize() self.vizrank_button.setEnabled( is not None and len( > 1 and len( > 1 and not def set_attr(self, attr_x, attr_y): self.attr_x, self.attr_y = attr_x, attr_y self.update_attr() def update_attr(self): """Update the graph and selection.""" self.selection = set() self.discrete_data = self.sparse_to_dense( self.update_graph() self.update_selection() def sparse_to_dense(self, data, init=False): """ Extracts two selected columns from sparse matrix. GH-2260 """ def discretizer(data): if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)): discretize = Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True) return discretize(data).to_dense() return data if not data.is_sparse() and not init: return self.discrete_data if data.is_sparse(): attrs = {self.attr_x, self.attr_y} new_domain = data.domain.select_columns(attrs) data = Table.from_table(new_domain, data) return discretizer(data) @Inputs.features def set_input_features(self, attr_list): """ Handler for the Features signal. The method stores the attributes and calls `resolve_shown_attributes` Args: attr_list (AttributeList): data from the signal """ self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): """ Use the attributes from the input signal if the signal is present and at least two attributes appear in the domain. If there are multiple, use the first two. Combos are disabled if inputs are used. """ self.warning() self.attr_box.setEnabled(True) if not self.input_features: # None or empty return features = [f for f in self.input_features if f in self.domain_model] if not features: self.warning( "Features from the input signal are not present in the data") return old_attrs = self.attr_x, self.attr_y self.attr_x, self.attr_y = [f for f in (features * 2)[:2]] self.attr_box.setEnabled(False) if (self.attr_x, self.attr_y) != old_attrs: self.selection = set() self.update_graph() def reset_selection(self): self.selection = set() self.update_selection() def select_area(self, area, event): """ Add or remove the clicked area from the selection Args: area (QRect): the area that is clicked event (QEvent): event description """ if event.button() != Qt.LeftButton: return index = self.areas.index(area) if event.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection() def update_selection(self): """ Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(create_annotated_table(, [])) return filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filts.append( filter.Values([ filter.FilterDiscrete(, [val_x]), filter.FilterDiscrete(, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filts) == 1: filts = filts[0] else: filts = filter.Values(filts, conjunction=False) selection = filts(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send(create_annotated_table(, sel_idx)) def update_graph(self): # Function uses weird names like r, g, b, but it does it with utmost # caution, hence # pylint: disable=invalid-name """Update the graph.""" def text(txt, *args, **kwargs): return CanvasText(self.canvas, "", html_text=to_html(txt), *args, **kwargs) def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() def fmt(val): return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) def show_pearson(rect, pearson, pen_width): """ Color the given rectangle according to its corresponding standardized Pearson residual. Args: rect (QRect): the rectangle being drawn pearson (float): signed standardized pearson residual pen_width (int): pen width (bolder pen is used for selection) """ r = rect.rect() x, y, w, h = r.x(), r.y(), r.width(), r.height() if w == 0 or h == 0: return r = b = 255 if pearson > 0: r = g = max(255 - 20 * pearson, 55) elif pearson < 0: b = g = max(255 + 20 * pearson, 55) else: r = g = b = 224 rect.setBrush(QBrush(QColor(r, g, b))) pen_color = QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)) pen = QPen(pen_color, pen_width) rect.setPen(pen) if pearson > 0: pearson = min(pearson, 10) dist = 20 - 1.6 * pearson else: pearson = max(pearson, -10) dist = 20 - 8 * pearson pen.setWidth(1) def _offseted_line(ax, ay): r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) self.canvas.addItem(r) r.setPen(pen) ax = dist while ax < w: _offseted_line(ax, 0) ax += dist ay = dist while ay < h: _offseted_line(0, ay) ay += dist def make_tooltip(): """Create the tooltip. The function uses local variables from the enclosing scope.""" # pylint: disable=undefined-loop-variable def _oper(attr, txt): if[] is ddomain[]: return "=" return " " if txt[0] in "<≥" else " in " return ( "<b>{attr_x}{xeq}{xval_name}</b>: {obs_x}/{n} ({p_x:.0f} %)". format(attr_x=to_html(, xeq=_oper(attr_x, xval_name), xval_name=to_html(xval_name), obs_x=fmt(chi.probs_x[x] * n), n=int(n), p_x=100 * chi.probs_x[x]) + "<br/>" + "<b>{attr_y}{yeq}{yval_name}</b>: {obs_y}/{n} ({p_y:.0f} %)". format(attr_y=to_html(, yeq=_oper(attr_y, yval_name), yval_name=to_html(yval_name), obs_y=fmt(chi.probs_y[y] * n), n=int(n), p_y=100 * chi.probs_y[y]) + "<hr/>" + """<b>combination of values: </b><br/> expected {exp} ({p_exp:.0f} %)<br/> observed {obs} ({p_obs:.0f} %)""". format(exp=fmt(chi.expected[y, x]), p_exp=100 * chi.expected[y, x] / n, obs=fmt(chi.observed[y, x]), p_obs=100 * chi.observed[y, x] / n)) for item in self.canvas.items(): self.canvas.removeItem(item) if is None or len( == 0 or \ self.attr_x is None or self.attr_y is None: return ddomain = self.discrete_data.domain attr_x, attr_y = self.attr_x, self.attr_y disc_x, disc_y = ddomain[], ddomain[] view = self.canvasView chi = ChiSqStats(self.discrete_data, disc_x, disc_y) max_ylabel_w = max((width(val) for val in disc_y.values), default=0) max_ylabel_w = min(max_ylabel_w, 200) x_off = width( + max_ylabel_w y_off = 15 square_size = min(view.width() - x_off - 35, view.height() - y_off - 80) square_size = max(square_size, 10) self.canvasView.setSceneRect(0, 0, view.width(), view.height()) if not disc_x.values or not disc_y.values: text_ = "Features {} and {} have no values".format(disc_x, disc_y) \ if not disc_x.values and \ not disc_y.values and \ disc_x != disc_y \ else \ "Feature {} has no values".format( disc_x if not disc_x.values else disc_y) text(text_, view.width() / 2 + 70, view.height() / 2, Qt.AlignRight | Qt.AlignVCenter) return n = chi.n curr_x = x_off max_xlabel_h = 0 self.areas = [] for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)): if px == 0: continue width = square_size * px curr_y = y_off for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order py = chi.probs_y[y] yval_name = disc_y.values[y] if py == 0: continue height = square_size * py selected = len(self.areas) in self.selection rect = CanvasRectangle( self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = x, y self.areas.append(rect) show_pearson(rect, chi.residuals[y, x], 3 * selected) rect.setToolTip(make_tooltip()) if x == 0: text(yval_name, x_off, curr_y + height / 2, Qt.AlignRight | Qt.AlignVCenter) curr_y += height xl = text(xval_name, curr_x + width / 2, y_off + square_size, Qt.AlignHCenter | Qt.AlignTop) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) curr_x += width bottom = y_off + square_size + max_xlabel_h text(, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True) text(, x_off + square_size / 2, bottom, Qt.AlignHCenter | Qt.AlignTop, bold=True) bottom += 30 xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom) # Assume similar height for both lines text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height()) def get_widget_name_extension(self): if is not None: return "{} vs {}".format(, def send_report(self): self.report_plot()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) PEARSON, CLASS_DISTRIBUTION = 0, 1 settingsHandler = DomainContextHandler() use_boxes = Setting(True) interior_coloring = Setting(CLASS_DISTRIBUTION) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) variable_color = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] vizrank = SettingProvider(MosaicVizRank) graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str, emptyString="(None)") for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") dmod = DomainModel self.color_model = DomainModel(order=dmod.MIXED, valid_types=dmod.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, sendSelectedValue=True, model=self.color_model, valueType=str) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _compare_with_total(self): if is not None and \ is not None and \ self.interior_coloring != self.CLASS_DISTRIBUTION: self.interior_coloring = self.CLASS_DISTRIBUTION self.coloring_changed() # This also calls self.update_graph else: self.update_graph() def _get_discrete_data(self, data): """ Discretizes continuous attributes. Returns None when there is no data, no rows, or no discrete or continuous attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_primitive: for combo in self.attr_combos: combo.addItem(icons[attr], if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) if self.variable_color = idx = self.cb_attr_color.findText(self.variable_color) else: idx = 0 self.cb_attr_color.setCurrentIndex(idx) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = \ [ if a else "" for a in attrs] self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( is not None and len( > 1 \ and len( >= 1) if is None: return self.color_model.set_domain( self.init_combos( self.openContext( # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None self.set_color_data() @Inputs.data_subset def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if is None: self.unprocessed_subset_data = data return try: self.subset_data = data.transform( except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if is None or len( < 2 or len( < 1: return if self.cb_attr_color.currentIndex() <= 0: color_var = None self.interior_coloring = self.PEARSON self.bar_button.setEnabled(False) else: color_var =[self.cb_attr_color.currentText()] self.interior_coloring = self.CLASS_DISTRIBUTION self.bar_button.setEnabled(True) attributes = [v for v in if v != color_var] metas = [v for v in if v != color_var] domain = Domain(attributes, color_var, metas) self.color_data = color_data =, self.discrete_data = self._get_discrete_data(color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): for i, (_, _, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(create_annotated_table(, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not if isinstance(, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send(create_annotated_table(, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """This function calculates rectangles' widths. If all widths are zero then all widths are set to 1.""" if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [ + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not data.domain[attr].values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1 = ContextSetting(None) variable2 = ContextSetting(None) variable3 = ContextSetting(None) variable4 = ContextSetting(None) variable_color = ContextSetting(None) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress( self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos( if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = # None is OK, too def get_disc_attr_list(self): return [self.discrete_data.domain[] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and[] for attr in attrs] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( is not None and len( > 1 and len( >= 1) if is None: self.discrete_data = None self.init_combos(None) return self.init_combos( self.openContext( @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if is not None and self.subset_data: transformed = self.subset_data.transform( if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = { for e in transformed} self.subset_indices = [ in indices for ex in] self.set_color_data() self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data =, self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not if isinstance(, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] if self.discrete_data is not selection =[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition,, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data( attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data( attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) xs = [x0 + currpos + width * 0.5 * perc, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + width * perc * 0.5, x1 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc] CanvasText(self.canvas, val, xs[side], ys[side], align) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2] CanvasText( self.canvas,, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.variable_color is None: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [ + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.variable_color is None: edgecolor = else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var if class_var: sql = isinstance(data, SqlTable) name = not sql and # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")
class OWSieveDiagram(OWWidget): name = "Sieve Diagram" description = "Visualize the observed and expected frequencies " \ "for a combination of values." icon = "icons/SieveDiagram.svg" priority = 310 inputs = [("Data", Table, "set_data", Default), ("Features", AttributeList, "set_input_features")] outputs = [("Selection", Table)] graph_name = "canvas" want_control_area = False settingsHandler = DomainContextHandler() attrX = ContextSetting("") attrY = ContextSetting("") selection = ContextSetting(set()) def __init__(self): # pylint: disable=missing-docstring super().__init__() = self.discrete_data = None self.attrs = [] self.input_features = None self.areas = [] self.selection = set() self.attr_box = gui.hBox(self.mainArea) model = VariableListModel() model.wrap(self.attrs) combo_args = dict(widget=self.attr_box, master=self, contentsLength=12, callback=self.update_attr, sendSelectedValue=True, valueType=str, model=model) fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) self.attrXCombo = gui.comboBox(value="attrX", **combo_args) gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) self.attrYCombo = gui.comboBox(value="attrY", **combo_args) self.vizrank = SieveRank(self) self.vizrank_button = gui.button(self.attr_box, self, "Score Combinations", sizePolicy=fixed_size, callback=self.vizrank.reshow, enabled=False) self.vizrank.pairSelected.connect(self.set_attr) self.canvas = QGraphicsScene() self.canvasView = ViewWithPress(self.canvas, self.mainArea, handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) box = gui.hBox(self.mainArea) box.layout().addWidget(self.graphButton) box.layout().addWidget(self.report_button) def sizeHint(self): return QSize(450, 550) def resizeEvent(self, event): super().resizeEvent(event) self.update_graph() def showEvent(self, event): super().showEvent(event) self.update_graph() def closeEvent(self, event): self.vizrank.close() super().closeEvent(event) def hideEvent(self, event): self.vizrank.hide() super().hideEvent(event) def set_data(self, data): """ Discretize continuous attributes, and put all attributes and discrete metas into self.attrs, which is used as a model for combos. Select the first two attributes unless context overrides this. Method `resolve_shown_attributes` is called to use the attributes from the input, if it exists and matches the attributes in the data. Remove selection; again let the context override this. Initialize the vizrank dialog, but don't show it. Args: data (Table): input data """ if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() = data self.areas = [] self.selection = set() if is None: self.attrs[:] = [] else: if any(attr.is_continuous for attr in data.domain): discretizer = Discretize(method=EqualFreq(n=4), discretize_classes=True, discretize_metas=True) self.discrete_data = discretizer(data) else: self.discrete_data = self.attrs[:] = [ var for var in chain(self.discrete_data.domain, ( var for var in if var.is_discrete)) ] if self.attrs: self.attrX = self.attrs[0].name self.attrY = self.attrs[len(self.attrs) > 1].name else: self.attrX = self.attrY = None self.areas = [] self.selection = set() self.openContext( self.resolve_shown_attributes() self.update_graph() self.update_selection() self.vizrank.initialize() self.vizrank_button.setEnabled( is not None and len( > 1 and len( > 1) def set_attr(self, attr_x, attr_y): self.attrX, self.attrY =, self.update_attr() def update_attr(self): """Update the graph and selection.""" self.selection = set() self.update_graph() self.update_selection() def set_input_features(self, attr_list): """ Handler for the Features signal. The method stores the attributes and calls `resolve_shown_attributes` Args: attr_list (AttributeList): data from the signal """ self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): """ Use the attributes from the input signal if the signal is present and at least two attributes appear in the domain. If there are multiple, use the first two. Combos are disabled if inputs are used. """ self.warning() self.attr_box.setEnabled(True) if not self.input_features: # None or empty return features = [f for f in self.input_features if f in self.attrs] if not features: self.warning( "Features from the input signal are not present in the data") return old_attrs = self.attrX, self.attrY self.attrX, self.attrY = [ for f in (features * 2)[:2]] self.attr_box.setEnabled(False) if (self.attrX, self.attrY) != old_attrs: self.selection = set() self.update_graph() def reset_selection(self): self.selection = set() self.update_selection() def select_area(self, area, event): """ Add or remove the clicked area from the selection Args: area (QRect): the area that is clicked event (QEvent): event description """ if event.button() != Qt.LeftButton: return index = self.areas.index(area) if event.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection() def update_selection(self): """ Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: self.send("Selection", None) return filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filts.append( filter.Values([ filter.FilterDiscrete(self.attrX, [val_x]), filter.FilterDiscrete(self.attrY, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filts) == 1: filts = filts[0] else: filts = filter.Values(filts, conjunction=False) selection = filts(self.discrete_data) if self.discrete_data is not idset = set(selection.ids) sel_idx = [i for i, id in enumerate( if id in idset] selection =[sel_idx] self.send("Selection", selection) def update_graph(self): # Function uses weird names like r, g, b, but it does it with utmost # caution, hence # pylint: disable=invalid-name """Update the graph.""" def text(txt, *args, **kwargs): return CanvasText(self.canvas, "", html_text=to_html(txt), *args, **kwargs) def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() def fmt(val): return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) def show_pearson(rect, pearson, pen_width): """ Color the given rectangle according to its corresponding standardized Pearson residual. Args: rect (QRect): the rectangle being drawn pearson (float): signed standardized pearson residual pen_width (int): pen width (bolder pen is used for selection) """ r = rect.rect() x, y, w, h = r.x(), r.y(), r.width(), r.height() if w == 0 or h == 0: return r = b = 255 if pearson > 0: r = g = max(255 - 20 * pearson, 55) elif pearson < 0: b = g = max(255 + 20 * pearson, 55) else: r = g = b = 224 rect.setBrush(QBrush(QColor(r, g, b))) pen_color = QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)) pen = QPen(pen_color, pen_width) rect.setPen(pen) if pearson > 0: pearson = min(pearson, 10) dist = 20 - 1.6 * pearson else: pearson = max(pearson, -10) dist = 20 - 8 * pearson pen.setWidth(1) def _offseted_line(ax, ay): r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) self.canvas.addItem(r) r.setPen(pen) ax = dist while ax < w: _offseted_line(ax, 0) ax += dist ay = dist while ay < h: _offseted_line(0, ay) ay += dist def make_tooltip(): """Create the tooltip. The function uses local variables from the enclosing scope.""" # pylint: disable=undefined-loop-variable def _oper(attr_name, txt): if[attr_name] is ddomain[attr_name]: return "=" return " " if txt[0] in "<≥" else " in " return ("<b>{attrX}{xeq}{xval_name}</b>: {obs_x}/{n} ({p_x:.0f} %)" .format(attrX=to_html(attr_x), xeq=_oper(attr_x, xval_name), xval_name=to_html(xval_name), obs_x=fmt(chi.probs_x[x] * n), n=int(n), p_x=100 * chi.probs_x[x]) + "<br/>" + "<b>{attrY}{yeq}{yval_name}</b>: {obs_y}/{n} ({p_y:.0f} %)" .format(attrY=to_html(attr_y), yeq=_oper(attr_y, yval_name), yval_name=to_html(yval_name), obs_y=fmt(chi.probs_y[y] * n), n=int(n), p_y=100 * chi.probs_y[y]) + "<hr/>" + """<b>combination of values: </b><br/> expected {exp} ({p_exp:.0f} %)<br/> observed {obs} ({p_obs:.0f} %)""".format( exp=fmt(chi.expected[y, x]), p_exp=100 * chi.expected[y, x] / n, obs=fmt(chi.observed[y, x]), p_obs=100 * chi.observed[y, x] / n)) for item in self.canvas.items(): self.canvas.removeItem(item) if is None or len( == 0 or \ self.attrX is None or self.attrY is None: return ddomain = self.discrete_data.domain attr_x, attr_y = self.attrX, self.attrY disc_x, disc_y = ddomain[attr_x], ddomain[attr_y] view = self.canvasView chi = ChiSqStats(self.discrete_data, attr_x, attr_y) n = chi.n max_ylabel_w = max((width(val) for val in disc_y.values), default=0) max_ylabel_w = min(max_ylabel_w, 200) x_off = width(attr_x) + max_ylabel_w y_off = 15 square_size = min(view.width() - x_off - 35, view.height() - y_off - 50) square_size = max(square_size, 10) self.canvasView.setSceneRect(0, 0, view.width(), view.height()) curr_x = x_off max_xlabel_h = 0 self.areas = [] for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)): if px == 0: continue width = square_size * px curr_y = y_off for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order py = chi.probs_y[y] yval_name = disc_y.values[y] if py == 0: continue height = square_size * py selected = len(self.areas) in self.selection rect = CanvasRectangle(self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = x, y self.areas.append(rect) show_pearson(rect, chi.residuals[y, x], 3 * selected) rect.setToolTip(make_tooltip()) if x == 0: text(yval_name, x_off, curr_y + height / 2, Qt.AlignRight | Qt.AlignVCenter) curr_y += height xl = text(xval_name, curr_x + width / 2, y_off + square_size, Qt.AlignHCenter | Qt.AlignTop) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) curr_x += width bottom = y_off + square_size + max_xlabel_h text(attr_y, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True) text(attr_x, x_off + square_size / 2, bottom, Qt.AlignHCenter | Qt.AlignTop, bold=True) xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom) # Assume similar height for both lines text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height()) def get_widget_name_extension(self): if is not None: return "{} vs {}".format(self.attrX, self.attrY) def send_report(self): self.report_plot()