class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1: Variable = ContextSetting(None) variable2: Variable = ContextSetting(None) variable3: Variable = ContextSetting(None) variable4: Variable = ContextSetting(None) variable_color: DiscreteVariable = ContextSetting(None) selection = Setting(set(), schema_only=True) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [ QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255) ] RED_COLORS = [ QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0) ] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.__pending_selection = self.selection self.selection = set() self.color_data = None self.areas = [] self.canvas = QGraphicsScene(self) self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox(box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, searchable=True, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5) ] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox(box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, searchable=True, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox(box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any( attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize(method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos(self.data.domain) if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = self.data.domain.class_var # None is OK, too def get_disc_attr_list(self): return [ self.discrete_data.domain[var.name] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var ] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and self.data.domain[attr.name] for attr in attrs ] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) >= 1) if self.data is None: self.discrete_data = None self.init_combos(None) return self.init_combos(self.data) self.openContext(self.data) @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if self.data is not None and self.subset_data: transformed = self.subset_data.transform(self.data.domain) if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = {e.id for e in transformed} self.subset_indices = [ex.id in indices for ex in self.data] if self.data is not None and self.__pending_selection is not None: self.selection = self.__pending_selection self.__pending_selection = None else: self.selection = set() self.set_color_data() self.update_graph() self.send_selection() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [ conditionaldict[attr_vals + "-" + val] for val in values ] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [ Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter ] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) rwidth = width * perc xs = [ x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc ] CanvasText(self.canvas, val, xs[side], ys[side], align, max_width=rwidth if side == 0 else None) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [ x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2 ] CanvasText(self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle(self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle(self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [ conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior)) ] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def create_legend(): if self.variable_color is None: names = [ "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:" ] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] edges = repeat(Qt.black) else: names = get_variable_values_sorted(class_var) edges = colors = [QColor(*col) for col in class_var.colors] items = [] size = 8 for name, color, edgecolor in zip(names, colors, edges): item = QGraphicsItemGroup() item.addToGroup( CanvasRectangle(None, -size / 2, -size / 2, size, size, edgecolor, color)) item.addToGroup( CanvasText(None, name, size, 0, Qt.AlignVCenter)) items.append(item) return wrap_legend_items(items, hspacing=20, vspacing=16 + size, max_width=self.canvas_view.width() - xoff) self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var # TODO: check this # data = Preprocessor_dropMissing(data) unique = [v.name for v in set(attr_list + [class_var]) if v] if len(data[:, unique]) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [ get_distribution(data, attr) for attr in attr_list ] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw xoff = 20 # get the maximum width of rectangle width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 legend = create_legend() # get the maximum height of rectangle yoff = 45 legendoff = yoff + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 square_size = min( self.canvas_view.width() - width - 20, self.canvas_view.height() - legendoff - legend.boundingRect().height()) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data(attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) self.canvas.addItem(legend) legend.setPos( xoff - legend.boundingRect().x() + max(0, (square_size - legend.boundingRect().width()) / 2), legendoff + square_size) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) PEARSON, CLASS_DISTRIBUTION = 0, 1 settingsHandler = DomainContextHandler() use_boxes = Setting(True) interior_coloring = Setting(CLASS_DISTRIBUTION) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) variable_color = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] vizrank = SettingProvider(MosaicVizRank) graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str, emptyString="(None)") for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") dmod = DomainModel self.color_model = DomainModel(order=dmod.MIXED, valid_types=dmod.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, sendSelectedValue=True, model=self.color_model, valueType=str) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _compare_with_total(self): if self.data is not None and \ self.data.domain.class_var is not None and \ self.interior_coloring != self.CLASS_DISTRIBUTION: self.interior_coloring = self.CLASS_DISTRIBUTION self.coloring_changed() # This also calls self.update_graph else: self.update_graph() def _get_discrete_data(self, data): """ Discretizes continuous attributes. Returns None when there is no data, no rows, or no discrete or continuous attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_primitive: for combo in self.attr_combos: combo.addItem(icons[attr], attr.name) if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) if self.data.domain.class_var: self.variable_color = self.data.domain.class_var.name idx = self.cb_attr_color.findText(self.variable_color) else: idx = 0 self.cb_attr_color.setCurrentIndex(idx) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = \ [a.name if a else "" for a in attrs] self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 \ and len(self.data.domain.attributes) >= 1) if self.data is None: return self.color_model.set_domain(self.data.domain) self.init_combos(self.data) self.openContext(self.data) # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None self.set_color_data() @Inputs.data_subset def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if self.data is None: self.unprocessed_subset_data = data return try: self.subset_data = data.transform(self.data.domain) except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None or len(self.data) < 2 or len(self.data.domain.attributes) < 1: return if self.cb_attr_color.currentIndex() <= 0: color_var = None self.interior_coloring = self.PEARSON self.bar_button.setEnabled(False) else: color_var = self.data.domain[self.cb_attr_color.currentText()] self.interior_coloring = self.CLASS_DISTRIBUTION self.bar_button.setEnabled(True) attributes = [v for v in self.data.domain if v != color_var] metas = [v for v in self.data.domain.metas if v != color_var] domain = Domain(attributes, color_var, metas) self.color_data = color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): for i, (_, _, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(Qt.black, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """This function calculates rectangles' widths. If all widths are zero then all widths are set to 1.""" if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not data.domain[attr].values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 inputs = [("Data", Table, "set_data", Default), ("Data Subset", Table, "set_subset_data")] outputs = [("Selected Data", Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Table)] settingsHandler = DomainContextHandler() use_boxes = Setting(True) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) # interior_coloring is context setting to properly reset it # if the widget switches to regression and back (set setData) interior_coloring = ContextSetting(1) PEARSON, CLASS_DISTRIBUTION = 0, 1 interior_coloring_opts = ["Pearson residuals", "Class distribution"] BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str) for i in range(1, 5)] self.rb_colors = gui.radioButtonsInBox( self.controlArea, self, "interior_coloring", self.interior_coloring_opts, box="Interior Coloring", callback=self.update_graph) self.bar_button = gui.checkBox( gui.indentedBox(self.rb_colors), self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(530, 720) def _compare_with_total(self): if self.data and self.data.domain.has_discrete_class: self.interior_coloring = 1 self.update_graph() def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_discrete or attr.is_continuous: for combo in self.attr_combos: combo.addItem(icons[attr], attr.name) if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.init_combos(self.data) if not self.data: self.discrete_data = None return if any(attr.is_continuous for attr in data.domain): self.discrete_data = Discretize(method=EqualFreq(n=4))(data) else: self.discrete_data = self.data if self.data.domain.class_var is None: self.rb_colors.setDisabled(True) disc_class = False else: self.rb_colors.setDisabled(False) disc_class = self.data.domain.has_discrete_class self.rb_colors.group.button(2).setDisabled(not disc_class) self.bar_button.setDisabled(not disc_class) self.interior_coloring = bool(disc_class) self.openContext(self.data) # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if self.data is None: self.unprocessed_subset_data = data return try: self.subset_data = data.from_table(self.data.domain, data) except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def reset_graph(self): self.clear_selection() self.update_graph() def update_selection_rects(self): for i, (attr, vals, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(Qt.black, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.send("Selected Data", None) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, area = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.send("Selected Data", selection) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 if attr_vals == "": counts = [conditionaldict.get(val, 1) for val in values] else: counts = [conditionaldict.get(attr_vals + "-" + val, 1) for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i in range(len(values)): val = values[i] perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition="", used_attrs=[], used_vals=[], attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var and class_var.is_discrete: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori ) for cls, act, apr in zip(cls_values, actual, apriori )) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 y0, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list)) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1 = ContextSetting(None) variable2 = ContextSetting(None) variable3 = ContextSetting(None) variable4 = ContextSetting(None) variable_color = ContextSetting(None) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress( self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos(self.data.domain) if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = self.data.domain.class_var # None is OK, too def get_disc_attr_list(self): return [self.discrete_data.domain[var.name] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and self.data.domain[attr.name] for attr in attrs] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) >= 1) if self.data is None: self.discrete_data = None self.init_combos(None) return self.init_combos(self.data) self.openContext(self.data) @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if self.data is not None and self.subset_data: transformed = self.subset_data.transform(self.data.domain) if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = {e.id for e in transformed} self.subset_indices = [ex.id in indices for ex in self.data] self.set_color_data() self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data( attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data( attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) xs = [x0 + currpos + width * 0.5 * perc, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + width * perc * 0.5, x1 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc] CanvasText(self.canvas, val, xs[side], ys[side], align) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2] CanvasText( self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.variable_color is None: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.variable_color is None: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var if class_var: sql = isinstance(data, SqlTable) name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")