class TestGraphicsPixmapWidget(GuiTest): def setUp(self) -> None: super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) def tearDown(self) -> None: self.scene.clear() self.scene.deleteLater() self.view.deleteLater() del self.scene del self.view def test_graphicspixmapwidget(self): w = GraphicsPixmapWidget() self.scene.addItem(w) w.setPixmap(QPixmap(100, 100)) p = w.pixmap() self.assertEqual(p.size(), QSize(100, 100)) self.view.grab() w.setScaleContents(True) w.setAspectRatioMode(Qt.KeepAspectRatio) s = w.sizeHint(Qt.PreferredSize) self.assertEqual(s, QSizeF(100., 100.)) s = w.sizeHint(Qt.PreferredSize, QSizeF(200., -1.)) self.assertEqual(s, QSizeF(200., 200.)) s = w.sizeHint(Qt.PreferredSize, QSizeF(-1., 200.)) self.assertEqual(s, QSizeF(200., 200.)) self.view.grab()
class TestDendrogramWidget(GuiTest): def setUp(self) -> None: super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.widget = DendrogramWidget() self.scene.addItem(self.widget) def tearDown(self) -> None: self.scene.clear() del self.widget del self.view super().tearDown() def test_widget(self): w = self.widget T = hierarchical.Tree C = hierarchical.ClusterData S = hierarchical.SingletonData def t(h: float, left: T, right: T): return T(C((left.value.first, right.value.last), h), (left, right)) def leaf(r, index): return T(S((r, r + 1), 0.0, index)) T = hierarchical.Tree w.set_root(t(0.0, leaf(0, 0), leaf(1, 1))) w.resize(w.effectiveSizeHint(Qt.PreferredSize)) h = w.height_at(QPoint()) self.assertEqual(h, 0) h = w.height_at(QPoint(10, 0)) self.assertEqual(h, 0) self.assertEqual(w.pos_at_height(0).x(), w.rect().x()) self.assertEqual(w.pos_at_height(1).x(), w.rect().x()) height = np.finfo(float).eps w.set_root(t(height, leaf(0, 0), leaf(1, 1))) h = w.height_at(QPoint()) self.assertEqual(h, height) h = w.height_at(QPoint(w.size().width(), 0)) self.assertEqual(h, 0) self.assertEqual(w.pos_at_height(0).x(), w.rect().right()) self.assertEqual(w.pos_at_height(height).x(), w.rect().left())
class _GraphicsGuiTest(GuiTest): scene: QGraphicsScene view: QGraphicsView def setUp(self) -> None: super().setUp() self.view = QGraphicsView() self.scene = QGraphicsScene(self.view) self.view.setScene(self.scene) def tearDown(self) -> None: self.scene.clear() self.scene.deleteLater() self.scene = None self.view.deleteLater() self.view = None super().tearDown()
class TestItems(QAppTestCase): def setUp(self): super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHints(QPainter.Antialiasing | QPainter.SmoothPixmapTransform | QPainter.TextAntialiasing) self.view.resize(500, 300) self.view.show() def tearDown(self): self.scene.clear() self.scene.deleteLater() self.view.deleteLater() del self.scene del self.view super().tearDown()
def render_drop_shadow_frame(pixmap, shadow_rect, shadow_color, offset, radius, rect_fill_color): pixmap.fill(QColor(0, 0, 0, 0)) scene = QGraphicsScene() rect = QGraphicsRectItem(shadow_rect) rect.setBrush(QColor(rect_fill_color)) rect.setPen(QPen(Qt.NoPen)) scene.addItem(rect) effect = QGraphicsDropShadowEffect(color=shadow_color, blurRadius=radius, offset=offset) rect.setGraphicsEffect(effect) scene.setSceneRect(QRectF(QPointF(0, 0), QSizeF(pixmap.size()))) painter = QPainter(pixmap) scene.render(painter) painter.end() scene.clear() scene.deleteLater() return pixmap
class TestItems(QAppTestCase): def setUp(self): super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHints( QPainter.Antialiasing | QPainter.SmoothPixmapTransform | QPainter.TextAntialiasing ) self.view.resize(500, 300) self.view.show() def tearDown(self): self.scene.clear() self.scene.deleteLater() self.view.deleteLater() del self.scene del self.view super().tearDown()
class TestItems(unittest.TestCase): def setUp(self): import logging from AnyQt.QtWidgets import \ QApplication, QGraphicsScene, QGraphicsView from AnyQt.QtGui import QPainter from AnyQt.QtCore import QTimer logging.basicConfig() self.app = QApplication([]) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHints( QPainter.Antialiasing | \ QPainter.SmoothPixmapTransform | \ QPainter.TextAntialiasing ) self.view.resize(500, 300) self.view.show() QTimer.singleShot(10000, self.app.exit) def my_excepthook(*args): sys.setrecursionlimit(1010) traceback.print_exc(limit=4) self._orig_excepthook = sys.excepthook sys.excepthook = my_excepthook self.singleShot = QTimer.singleShot def tearDown(self): self.scene.clear() self.scene.deleteLater() self.view.deleteLater() del self.scene del self.view self.app.processEvents() del self.app sys.excepthook = self._orig_excepthook
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1 = ContextSetting(None) variable2 = ContextSetting(None) variable3 = ContextSetting(None) variable4 = ContextSetting(None) variable_color = ContextSetting(None) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress( self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel( order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos(self.data.domain) if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = self.data.domain.class_var # None is OK, too def get_disc_attr_list(self): return [self.discrete_data.domain[var.name] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and self.data.domain[attr.name] for attr in attrs] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) >= 1) if self.data is None: self.discrete_data = None self.init_combos(None) return self.init_combos(self.data) self.openContext(self.data) @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if self.data is not None and self.subset_data: transformed = self.subset_data.transform(self.data.domain) if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = {e.id for e in transformed} self.subset_indices = [ex.id in indices for ex in self.data] self.set_color_data() self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data( attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data( attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) xs = [x0 + currpos + width * 0.5 * perc, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + width * perc * 0.5, x1 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc] CanvasText(self.canvas, val, xs[side], ys[side], align) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2] CanvasText( self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.variable_color is None: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.variable_color is None: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var if class_var: sql = isinstance(data, SqlTable) name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", (Orange.data.Table, Orange.misc.DistMatrix)) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data auto_commit = settings.Setting(True) pending_selection = settings.Setting(None, schema_only=True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan), ("Cosine", Orange.distance.Cosine)] graph_name = "scene" class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") input_validation_error = Msg("{}") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") nan_distances = Msg("{} instance{s} omitted (undefined distances)") ignoring_categorical = Msg("Ignoring categorical features") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: The input distance matrix (if present) self.distances = None # type: Optional[Orange.misc.DistMatrix] #: The effective distance matrix (is self.distances or computed from #: self.data depending on input) self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) controllayout = self.controlArea.layout() assert isinstance(controllayout, QVBoxLayout) self._distances_gui_box = distbox = gui.widgetBox(None, "Distance") self._distances_gui_cb = gui.comboBox( distbox, self, "distance_idx", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) controllayout.addWidget(distbox) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", contentsLength=14, searchable=True, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.auto_send(self.buttonsArea, self, "auto_commit") self.scene = QGraphicsScene(self) self.view = StickyGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) self.settingsAboutToBePacked.connect(self.pack_settings) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) def pack_settings(self): if self.data and self._silplot is not None: self.pending_selection = list(self._silplot.selection()) else: self.pending_selection = None @Inputs.data @check_sql_input def set_data(self, data: Union[Table, DistMatrix, None]): """ Set the input dataset or distance matrix. """ self.closeContext() self.clear() try: if isinstance(data, Orange.misc.DistMatrix): self._set_distances(data) elif isinstance(data, Orange.data.Table): self._set_table(data) else: self.distances = None self.data = None except InputValidationError as err: self.Error.input_validation_error(err.message) self.distances = None self.data = None def _set_table(self, data: Table): self._setup_control_models(data.domain) self.data = data self.distances = None def _set_distances(self, distances: DistMatrix): if isinstance(distances.row_items, Orange.data.Table) and \ distances.axis == 1: data = distances.row_items else: raise ValidationError("Input matrix does not have associated data") if data is not None: self._setup_control_models(data.domain) self.distances = distances self.data = data def handleNewSignals(self): summary = len(self.data) if self.data else self.info.NoInput details = format_summary_details(self.data) if self.data else "" self.info.set_input_summary(summary, details) if not self._is_empty(): self._update() self._replot() if self.pending_selection is not None and self._silplot is not None: # If selection contains indices that are too large, the data # file must had been modified, so we ignore selection if max(self.pending_selection, default=-1) < len(self.data): self._silplot.setSelection(np.array( self.pending_selection)) self.pending_selection = None # Disable/enable the Distances GUI controls if applicable self._distances_gui_box.setEnabled(self.distances is None) self.unconditional_commit() def _setup_control_models(self, domain: Domain): groupvars = [ v for v in domain.variables + domain.metas if v.is_discrete and len(v.values) >= 2 ] if not groupvars: raise NoGroupVariable() self.cluster_var_model[:] = groupvars if domain.class_var in groupvars: self.cluster_var_idx = groupvars.index(domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if annotvars else 0 self.openContext(Orange.data.Domain(groupvars)) def _is_empty(self) -> bool: # Is empty (does not have any input). return (self.data is None or len(self.data) == 0) \ and self.distances is None def clear(self): """ Clear the widget state. """ self.data = None self.distances = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self.view.setSceneRect(QRectF()) self.view.setHeaderSceneRect(QRectF()) self.view.setFooterSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit() def _ensure_matrix(self): # ensure self._matrix is computed if necessary if self._is_empty(): return if self._matrix is None: if self.distances is not None: self._matrix = np.asarray(self.distances) elif self.data is not None: data = self.data _, metric = self.Distances[self.distance_idx] if not metric.supports_discrete and any( a.is_discrete for a in data.domain.attributes): self.Warning.ignoring_categorical() data = Orange.distance.remove_discrete_features(data) try: self._matrix = np.asarray(metric(data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return else: assert False, "invalid state" def _update(self): # Update/recompute the effective distances and scores as required. self._clear_messages() if self._is_empty(): self._reset_all() return self._ensure_matrix() if self._matrix is None: return labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) cluster_mask = np.isnan(labels) dist_mask = np.isnan(self._matrix).all(axis=0) mask = cluster_mask | dist_mask labels = labels.astype(int) labels = labels[~mask] labels_unq = np.unique(labels) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if mask is not None: count_missing = np.count_nonzero(cluster_mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") count_nandist = np.count_nonzero(dist_mask) if count_nandist: self.Warning.nan_distances(count_nandist, s="s" if count_nandist > 1 else "") def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.clear() def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores(self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]])) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape # pylint: disable=invalid-unary-operand-type column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): geom = self._silplot.geometry() self.scene.setSceneRect(geom) self.view.setSceneRect(geom) header = self._silplot.topScaleItem() footer = self._silplot.bottomScaleItem() def extend_horizontal(rect): # type: (QRectF) -> QRectF rect = QRectF(rect) rect.setLeft(geom.left()) rect.setRight(geom.right()) return rect margin = 3 if header is not None: self.view.setHeaderSceneRect( extend_horizontal(header.geometry().adjusted(0, 0, 0, margin))) if footer is not None: self.view.setFooterSceneRect( extend_horizontal(footer.geometry().adjusted(0, -margin, 0, 0))) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: # pylint: disable=invalid-unary-operand-type indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) # pylint: disable=invalid-unary-operand-type scores[~self._mask] = self._silhouette else: scores = self._silhouette var = self.cluster_var_model[self.cluster_var_idx] domain = self.data.domain proposed = "Silhouette ({})".format(escape(var.name)) names = [ var.name for var in itertools.chain( domain.attributes, domain.class_vars, domain.metas) ] unique = get_unique_names(names, proposed) silhouette_var = Orange.data.ContinuousVariable(unique) domain = Orange.data.Domain(domain.attributes, domain.class_vars, domain.metas + (silhouette_var, )) data = self.data.transform(domain) if np.count_nonzero(selectedmask): selected = self.data.from_table(domain, self.data, np.flatnonzero(selectedmask)) if selected is not None: selected[:, silhouette_var] = np.c_[scores[selectedmask]] data[:, silhouette_var] = np.c_[scores] summary = len(selected) if selected else self.info.NoOutput details = format_summary_details(selected) if selected else "" self.info.set_output_summary(summary, details) self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWExplainPredictions(OWWidget): name = "Explain Predictions" description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value" icon = "icons/ExplainPredictions.svg" priority = 200 gui_error = settings.Setting(0.05) gui_p_val = settings.Setting(0.05) gui_num_atr = settings.Setting(20) sort_index = settings.Setting(SortBy.ABSOLUTE) class Inputs: data = Input("Data", Table, default=True) model = Input("Model", Model, multiple=False) sample = Input("Sample", Table) class Outputs: explanations = Output("Explanations", Table) class Error(OWWidget.Error): sample_too_big = widget.Msg("Can only explain one sample at the time.") class Warning(OWWidget.Warning): unknowns_increased = widget.Msg( "Number of unknown values increased, Data and Sample domains mismatch.") def __init__(self): super().__init__() self.data = None self.model = None self.to_explain = None self.explanations = None self.stop = True self.e = None self._task = None self._executor = ThreadExecutor() info_box = gui.vBox(self.controlArea, "Info") self.data_info = gui.widgetLabel(info_box, "Data: N/A") self.model_info = gui.widgetLabel(info_box, "Model: N/A") self.sample_info = gui.widgetLabel(info_box, "Sample: N/A") criteria_box = gui.vBox(self.controlArea, "Stopping criteria") self.error_spin = gui.spin(criteria_box, self, "gui_error", 0.01, 1, step=0.01, label="Error < ", spinType=float, callback=self._update_error_spin, controlWidth=80, keyboardTracking=False) self.p_val_spin = gui.spin(criteria_box, self, "gui_p_val", 0.01, 1, step=0.01, label="Error p-value < ", spinType=float, callback=self._update_p_val_spin, controlWidth=80, keyboardTracking=False) plot_properties_box = gui.vBox(self.controlArea, "Display features") self.num_atr_spin = gui.spin(plot_properties_box, self, "gui_num_atr", 1, 100, step=1, label="Show attributes", callback=self._update_num_atr_spin, controlWidth=80, keyboardTracking=False) self.sort_combo = gui.comboBox(plot_properties_box, self, "sort_index", label="Rank by", items=SortBy.items(), orientation=Qt.Horizontal, callback=self._update_combo) gui.rubber(self.controlArea) self.cancel_button = gui.button(self.controlArea, self, "Stop Computation", callback=self.toggle_button, autoDefault=True, tooltip="Stops and restarts computation") self.cancel_button.setDisabled(True) predictions_box = gui.vBox(self.mainArea, "Model prediction") self.predict_info = gui.widgetLabel(predictions_box, "") self.mainArea.setMinimumWidth(700) self.resize(700, 400) class _GraphicsView(QGraphicsView): def __init__(self, scene, parent, **kwargs): for k, v in dict(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff, viewportUpdateMode=QGraphicsView.BoundingRectViewportUpdate, renderHints=(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform), alignment=(Qt.AlignTop | Qt.AlignLeft), sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)).items(): kwargs.setdefault(k, v) super().__init__(scene, parent, **kwargs) class GraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__(scene, parent, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, styleSheet='QGraphicsView {background: white}') self.viewport().setMinimumWidth(500) self._is_resizing = False w = self def resizeEvent(self, resizeEvent): self._is_resizing = True self.w.draw() self._is_resizing = False return super().resizeEvent(resizeEvent) def is_resizing(self): return self._is_resizing def sizeHint(self): return QSize(600, 300) class FixedSizeGraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__(scene, parent, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Minimum)) def sizeHint(self): return QSize(600, 30) """all will share the same scene, but will show different parts of it""" self.box_scene = QGraphicsScene(self) self.box_view = GraphicsView(self.box_scene, self) self.header_view = FixedSizeGraphicsView(self.box_scene, self) self.footer_view = FixedSizeGraphicsView(self.box_scene, self) self.mainArea.layout().addWidget(self.header_view) self.mainArea.layout().addWidget(self.box_view) self.mainArea.layout().addWidget(self.footer_view) self.painter = None def draw(self): """Uses GraphAttributes class to draw the explanaitons """ self.box_scene.clear() wp = self.box_view.viewport().rect() header_height = 30 if self.explanations is not None: self.painter = GraphAttributes(self.box_scene, min( self.gui_num_atr, self.explanations.Y.shape[0])) self.painter.paint(wp, self.explanations, header_h=header_height) """set appropriate boxes for different views""" rect = QRectF(self.box_scene.itemsBoundingRect().x(), self.box_scene.itemsBoundingRect().y(), self.box_scene.itemsBoundingRect().width(), self.box_scene.itemsBoundingRect().height()) self.box_scene.setSceneRect(rect) self.box_view.setSceneRect( rect.x(), rect.y()+header_height+2, rect.width(), rect.height() - 80) self.header_view.setSceneRect( rect.x(), rect.y(), rect.width(), 10) self.header_view.setFixedHeight(header_height) self.footer_view.setSceneRect( rect.x(), rect.y() + rect.height() - 50, rect.width(), 35) def sort_explanations(self): """sorts explanations according to users choice from combo box""" if self.sort_index == SortBy.POSITIVE: self.explanations = self.explanations[np.argsort( self.explanations.X[:, 0])][::-1] elif self.sort_index == SortBy.NEGATIVE: self.explanations = self.explanations[np.argsort( self.explanations.X[:, 0])] elif self.sort_index == SortBy.ABSOLUTE: self.explanations = self.explanations[np.argsort( np.abs(self.explanations.X[:, 0]))][::-1] elif self.sort_index == SortBy.BY_NAME: l = np.array( list(map(np.chararray.lower, self.explanations.metas[:, 0]))) self.explanations = self.explanations[np.argsort(l)] else: return @Inputs.data @check_sql_input def set_data(self, data): """Set input 'Data""" self.data = data self.explanations = None self.data_info.setText("Data: N/A") self.e = None if data is not None: model = TableModel(data, parent=None) if data.X.shape[0] == 1: inst = "1 instance and " else: inst = str(data.X.shape[0]) + " instances and " if data.X.shape[1] == 1: feat = "1 feature " else: feat = str(data.X.shape[1]) + " features" self.data_info.setText("Data: " + inst + feat) @Inputs.model def set_predictor(self, model): """Set input 'Model""" self.model = model self.model_info.setText("Model: N/A") self.explanations = None self.e = None if model is not None: self.model_info.setText("Model: " + str(model.name)) @Inputs.sample @check_sql_input def set_sample(self, sample): """Set input 'Sample', checks if size is appropriate""" self.to_explain = sample self.explanations = None self.Error.sample_too_big.clear() self.sample_info.setText("Sample: N/A") if sample is not None: if len(sample.X) != 1: self.to_explain = None self.Error.sample_too_big() else: if sample.X.shape[1] == 1: feat = "1 feature" else: feat = str(sample.X.shape[1]) + " features" self.sample_info.setText("Sample: " + feat) if self.e is not None: self.e.saved = False def handleNewSignals(self): if self._task is not None: self.cancel() assert self._task is None self.predict_info.setText("") self.Warning.unknowns_increased.clear() self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def commit_calc_or_output(self): if self.data is not None and self.to_explain is not None: self.commit_calc() else: self.commit_output() def commit_calc(self): num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0])) self.to_explain = self.to_explain.transform(self.data.domain) if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])): self.Warning.unknowns_increased() if self.model is not None: # calculate contributions if self.e is None: self.e = ExplainPredictions(self.data, self.model, batch_size=min( len(self.data.X), 500), p_val=self.gui_p_val, error=self.gui_error) self._task = task = Task() def callback(progress): nonlocal task # update progress bar QMetaObject.invokeMethod( self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress)) if task.canceled: return True return False def callback_update(table): QMetaObject.invokeMethod( self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table)) def callback_prediction(class_value): QMetaObject.invokeMethod( self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value)) self.was_canceled = False explain_func = partial( self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction) self.progressBarInit(processEvents=None) task.future = self._executor.submit(explain_func) task.watcher = FutureWatcher(task.future) task.watcher.done.connect(self._task_finished) self.cancel_button.setDisabled(False) @pyqtSlot(Orange.data.Table) def update_view(self, table): self.explanations = table self.sort_explanations() self.draw() self.commit_output() @pyqtSlot(float) def update_model_prediction(self, value): self._print_prediction(value) @pyqtSlot(int) def set_progress_value(self, value): self.progressBarSet(value, processEvents=False) @pyqtSlot(concurrent.futures.Future) def _task_finished(self, f): """ Parameters: ---------- f: conncurent.futures.Future future instance holding the result of learner evaluation """ assert self.thread() is QThread.currentThread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None if not self.was_canceled: self.cancel_button.setDisabled(True) try: results = f.result() except Exception as ex: log = logging.getLogger() log.exception(__name__, exc_info=True) self.error("Exception occured during evaluation: {!r}".format(ex)) for key in self.results.keys(): self.results[key] = None else: self.update_view(results[1]) self.progressBarFinished(processEvents=False) def commit_output(self): """ Sends best-so-far results forward """ self.Outputs.explanations.send(self.explanations) def toggle_button(self): if self.stop: self.stop = False self.cancel_button.setText("Restart Computation") self.cancel() else: self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._task_finished) self.was_canceled = True self._task_finished(self._task.future) def _print_prediction(self, class_value): """ Parameters ---------- class_value: float Number representing either index of predicted class value, looked up in domain, or predicted value (regression) """ name = self.data.domain.class_vars[0].name if isinstance(self.data.domain.class_vars[0], ContinuousVariable): self.predict_info.setText(name + ": " + str(class_value)) else: self.predict_info.setText( name + ": " + self.data.domain.class_vars[0].values[int(class_value)]) def _update_error_spin(self): self.cancel() if self.e is not None: self.e.error = self.gui_error self.handleNewSignals() def _update_p_val_spin(self): self.cancel() if self.e is not None: self.e.p_val = self.gui_p_val self.handleNewSignals() def _update_num_atr_spin(self): self.cancel() self.handleNewSignals() def _update_combo(self): if self.explanations != None: self.sort_explanations() self.draw() self.commit_output() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OWExplainPrediction(OWWidget, ConcurrentWidgetMixin): name = "Explain Prediction" description = "Prediction explanation widget." icon = "icons/ExplainPred.svg" priority = 110 class Inputs: model = Input("Model", Model) background_data = Input("Background Data", Table) data = Input("Data", Table) class Outputs: scores = Output("Scores", Table) class Error(OWWidget.Error): domain_transform_err = Msg("{}") unknown_err = Msg("{}") class Information(OWWidget.Information): multiple_instances = Msg("Explaining prediction for the first " "instance in 'Data'.") settingsHandler = ClassValuesContextHandler() target_index = ContextSetting(0) stripe_len = Setting(10) graph_name = "scene" def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.__results = None # type: Optional[Results] self.model = None # type: Optional[Model] self.background_data = None # type: Optional[Table] self.data = None # type: Optional[Table] self._stripe_plot = None # type: Optional[StripePlot] self.mo_info = "" self.bv_info = "" self.setup_gui() def setup_gui(self): self._add_controls() self._add_plot() self.info.set_input_summary(self.info.NoInput) def _add_plot(self): self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignVCenter | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def _add_controls(self): box = gui.vBox(self.controlArea, "Target class") self._target_combo = gui.comboBox(box, self, "target_index", callback=self.__target_combo_changed, contentsLength=12) box = gui.hBox(self.controlArea, "Zoom") gui.hSlider(box, self, "stripe_len", None, minValue=1, maxValue=500, createLabel=False, callback=self.__size_slider_changed) gui.rubber(self.controlArea) box = gui.vBox(self.controlArea, "Prediction info") gui.label(box, self, "%(mo_info)s") # type: QLabel bv_label = gui.label(box, self, "%(bv_info)s") # type: QLabel bv_label.setToolTip("The average prediction for selected class.") def __target_combo_changed(self): self.update_scene() def __size_slider_changed(self): if self._stripe_plot is not None: self._stripe_plot.set_height(self.stripe_len) @Inputs.data @check_sql_input def set_data(self, data: Optional[Table]): self.data = data @Inputs.background_data @check_sql_input def set_background_data(self, data: Optional[Table]): self.background_data = data @Inputs.model def set_model(self, model: Optional[Model]): self.closeContext() self.model = model self.setup_controls() self.openContext(self.model.domain.class_var if self.model else None) def setup_controls(self): self._target_combo.clear() self._target_combo.setEnabled(True) if self.model is not None: if self.model.domain.has_discrete_class: self._target_combo.addItems(self.model.domain.class_var.values) self.target_index = 0 elif self.model.domain.has_continuous_class: self.target_index = -1 self._target_combo.setEnabled(False) else: raise NotImplementedError def handleNewSignals(self): self.clear() self.check_inputs() data = self.data and self.data[:1] self.start(run, data, self.background_data, self.model) def clear(self): self.mo_info = "" self.bv_info = "" self.__results = None self.cancel() self.clear_scene() self.clear_messages() def check_inputs(self): if self.data and len(self.data) > 1: self.Information.multiple_instances() summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.background_data: n_data = len(self.data) if self.data else 0 n_background_data = len(self.background_data) \ if self.background_data else 0 summary = f"{self.info.format_number(n_background_data)}, " \ f"{self.info.format_number(n_data)}" kwargs = {"format": Qt.RichText} details = format_multiple_summaries([("Background data", self.background_data), ("Data", self.data)]) self.info.set_input_summary(summary, details, **kwargs) def clear_scene(self): self.scene.clear() self.scene.setSceneRect(QRectF()) self.view.setSceneRect(QRectF()) self._stripe_plot = None def update_scene(self): self.clear_scene() self.mo_info = "" self.bv_info = "" scores = None if self.__results is not None: data = self.__results.transformed_data pred = self.__results.predictions base = self.__results.base_value values, _, labels, ranges = prepare_force_plot_data( self.__results.values, data, pred, self.target_index) index = 0 HIGH, LOW = 0, 1 plot_data = PlotData(high_values=values[index][HIGH], low_values=values[index][LOW][::-1], high_labels=labels[index][HIGH], low_labels=labels[index][LOW][::-1], value_range=ranges[index], model_output=pred[index][self.target_index], base_value=base[self.target_index]) self.setup_plot(plot_data) self.mo_info = f"Model prediction: {_str(plot_data.model_output)}" self.bv_info = f"Base value: {_str(plot_data.base_value)}" assert isinstance(self.__results.values, list) scores = self.__results.values[self.target_index][0, :] names = [a.name for a in data.domain.attributes] scores = self.create_scores_table(scores, names) self.Outputs.scores.send(scores) def setup_plot(self, plot_data: PlotData): self._stripe_plot = StripePlot() self._stripe_plot.set_data(plot_data, self.stripe_len) self._stripe_plot.layout().activate() self._stripe_plot.geometryChanged.connect(self.update_scene_rect) self.scene.addItem(self._stripe_plot) self.update_scene_rect() def update_scene_rect(self): geom = self._stripe_plot.geometry() self.scene.setSceneRect(geom) self.view.setSceneRect(geom) @staticmethod def create_scores_table(scores: np.ndarray, names: List[str]) -> Table: domain = Domain([ContinuousVariable("Score")], metas=[StringVariable("Feature")]) scores_table = Table(domain, scores[:, None], metas=np.array(names)[:, None]) scores_table.name = "Feature Scores" return scores_table def on_partial_result(self, _): pass def on_done(self, results: Optional[RunnerResults]): self.__results = results self.update_scene() def on_exception(self, ex: Exception): if isinstance(ex, DomainTransformationError): self.Error.domain_transform_err(ex) else: self.Error.unknown_err(ex) def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def sizeHint(self) -> QSizeF: sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(700, 700)) def send_report(self): if not self.data or not self.background_data or not self.model: return items = {"Target class": "None"} if self.model.domain.has_discrete_class: class_var = self.model.domain.class_var items["Target class"] = class_var.values[self.target_index] self.report_items(items) self.report_plot()
class TestHeatmapGridWidget(GuiTest): scene: QGraphicsScene view: QGraphicsView def setUp(self) -> None: super().setUp() self.view = QGraphicsView() self.scene = QGraphicsScene(self.view) self.view.setScene(self.scene) def tearDown(self) -> None: self.scene.clear() self.scene.deleteLater() self.scene = None self.view.deleteLater() self.view = None super().tearDown() _c2 = Tree(ClusterData((0, 1), 0.5), ( Tree(SingletonData((0, 0), 0, 0), ()), Tree(SingletonData((1, 1), 0, 1), ()), )) _Data = { "0-0": HeatmapGridWidget.Parts(rows=[], columns=[], data=np.zeros(shape=(0, 0)), span=(0, 0)), "1-0": HeatmapGridWidget.Parts(rows=[], columns=[], data=np.zeros(shape=(0, 0)), span=(0, 0)), "0-1": HeatmapGridWidget.Parts( rows=[], columns=[HeatmapGridWidget.ColumnItem("a", [0])], data=np.zeros(shape=(0, 1)), span=(0, 1)), "1-1": HeatmapGridWidget.Parts( rows=[HeatmapGridWidget.RowItem("a", [0])], columns=[HeatmapGridWidget.ColumnItem("a", [0])], data=np.zeros(shape=(1, 1)), span=(0, 1), row_names=["a"], col_names=["b"], ), "2-2-split": HeatmapGridWidget.Parts( rows=[ HeatmapGridWidget.RowItem("a", [0]), HeatmapGridWidget.RowItem("b", [1]), ], columns=[ HeatmapGridWidget.ColumnItem("a", [0]), HeatmapGridWidget.ColumnItem("a", [1]), ], data=np.zeros(shape=(2, 2)), span=(-1, 1), row_names=["a", "b"], col_names=["b", "b"], ), "2-2-cl": HeatmapGridWidget.Parts( rows=[HeatmapGridWidget.RowItem("", [0, 1], _c2)], columns=[HeatmapGridWidget.ColumnItem("", [0, 1], _c2)], data=np.zeros(shape=(2, 2)), span=(-1, 1), row_names=["a", "b"], col_names=["b", "b"], ), "2-2": HeatmapGridWidget.Parts( rows=[HeatmapGridWidget.RowItem("", [0, 1])], columns=[HeatmapGridWidget.ColumnItem("", [0, 1])], data=np.zeros(shape=(2, 2)), span=(-1, 1), row_names=["a", "b"], col_names=["b", "b"], ) } def test_widget(self): w = HeatmapGridWidget() self.scene.addItem(w) for p in self._Data.values(): w.setHeatmaps(p) w.headerGeometry() w.footerGeometry() def test_widget_annotations(self): w = HeatmapGridWidget() self.scene.addItem(w) w.setHeatmaps(self._Data["2-2"]) # Coverage. The game. w.setLegendVisible(True) w.setLegendVisible(False) w.setShowAverages(True) w.setShowAverages(False) w.setRowLabels(None) w.setRowLabels(["1", "2"]) w.setRowLabelsVisible(False) w.setRowLabelsVisible(True) w.setColumnLabels(None) w.setColumnLabels(["1", "2"]) w.setAspectRatioMode(Qt.IgnoreAspectRatio) w.setAspectRatioMode(Qt.KeepAspectRatio) w.setAspectRatioMode(Qt.KeepAspectRatioByExpanding) for pos in ( HeatmapGridWidget.NoPosition, HeatmapGridWidget.PositionTop, HeatmapGridWidget.PositionBottom, HeatmapGridWidget.PositionTop | HeatmapGridWidget.PositionBottom, ): w.setColumnLabelsPosition(pos) w.setRowSideColorAnnotations( np.array([0, 1]), CategoricalColorMap(np.array([[255] * 3, [0] * 3]), names=["a", "b"])) w.setRowSideColorAnnotations(None) def test_selection(self): w = HeatmapGridWidget() self.scene.addItem(w) w.setHeatmaps(self._Data["2-2"]) view = self.view w.resize(w.effectiveSizeHint(Qt.PreferredSize)) h = w.layout().itemAt(w.Row0, w.Col0 + 1) pos = view.mapFromScene(h.scenePos()) spy = QSignalSpy(w.selectionFinished) QTest.mouseClick(view.viewport(), Qt.LeftButton, pos=pos + QPoint(1, 1)) self.assertSequenceEqual(list(spy), [[]]) self.assertSequenceEqual(w.selectedRows(), [0]) spy = QSignalSpy(w.selectionFinished) QTest.mouseClick(view.viewport(), Qt.LeftButton, Qt.ControlModifier, pos=pos + QPoint(1, 1)) self.assertSequenceEqual(list(spy), [[]]) self.assertSequenceEqual(w.selectedRows(), []) spy = QSignalSpy(w.selectionFinished) QTest.mousePress(view.viewport(), Qt.LeftButton, pos=pos + QPoint(1, 1)) mouseMove(view.viewport(), Qt.LeftButton, pos=pos + QPoint(20, 20)) QTest.mouseRelease(view.viewport(), Qt.LeftButton, pos=pos + QPoint(30, 40)) self.assertSequenceEqual(list(spy), [[]]) spy_fin = QSignalSpy(w.selectionFinished) spy_chn = QSignalSpy(w.selectionChanged) w.selectRows([1]) self.assertSequenceEqual(list(spy_fin), []) self.assertSequenceEqual(list(spy_chn), [[]]) def test_colormap(self): w = HeatmapGridWidget() self.scene.addItem(w) w.setHeatmaps(self._Data["2-2"]) w.setColorMap(GradientColorMap([[255] * 3, [0] * 3])) w.setColorMap(GradientColorMap([[255] * 3, [0] * 3], center=0))
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 inputs = [("Data", Table, "set_data", Default), ("Data Subset", Table, "set_subset_data")] outputs = [("Selected Data", Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Table)] settingsHandler = DomainContextHandler() use_boxes = Setting(True) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) # interior_coloring is context setting to properly reset it # if the widget switches to regression and back (set setData) interior_coloring = ContextSetting(1) PEARSON, CLASS_DISTRIBUTION = 0, 1 interior_coloring_opts = ["Pearson residuals", "Class distribution"] BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str) for i in range(1, 5)] self.rb_colors = gui.radioButtonsInBox( self.controlArea, self, "interior_coloring", self.interior_coloring_opts, box="Interior Coloring", callback=self.update_graph) self.bar_button = gui.checkBox( gui.indentedBox(self.rb_colors), self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(530, 720) def _compare_with_total(self): if self.data and self.data.domain.has_discrete_class: self.interior_coloring = 1 self.update_graph() def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_discrete or attr.is_continuous: for combo in self.attr_combos: combo.addItem(icons[attr], attr.name) if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.init_combos(self.data) if not self.data: self.discrete_data = None return if any(attr.is_continuous for attr in data.domain): self.discrete_data = Discretize(method=EqualFreq(n=4))(data) else: self.discrete_data = self.data if self.data.domain.class_var is None: self.rb_colors.setDisabled(True) disc_class = False else: self.rb_colors.setDisabled(False) disc_class = self.data.domain.has_discrete_class self.rb_colors.group.button(2).setDisabled(not disc_class) self.bar_button.setDisabled(not disc_class) self.interior_coloring = bool(disc_class) self.openContext(self.data) # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if self.data is None: self.unprocessed_subset_data = data return try: self.subset_data = data.from_table(self.data.domain, data) except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def reset_graph(self): self.clear_selection() self.update_graph() def update_selection_rects(self): for i, (attr, vals, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(Qt.black, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.send("Selected Data", None) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, area = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.send("Selected Data", selection) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 if attr_vals == "": counts = [conditionaldict.get(val, 1) for val in values] else: counts = [conditionaldict.get(attr_vals + "-" + val, 1) for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i in range(len(values)): val = values[i] perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition="", used_attrs=[], used_vals=[], attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var and class_var.is_discrete: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori ) for cls, act, apr in zip(cls_values, actual, apriori )) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 y0, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list)) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class OWNomogram(OWWidget): name = "Nomogram" description = " Nomograms for Visualization of Naive Bayesian" \ " and Logistic Regression Classifiers." icon = "icons/Nomogram.svg" priority = 2000 inputs = [("Classifier", Model, "set_classifier"), ("Data", Table, "set_instance")] MAX_N_ATTRS = 1000 POINT_SCALE = 0 ALIGN_LEFT = 0 ALIGN_ZERO = 1 ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier) settingsHandler = ClassValuesContextHandler() target_class_index = ContextSetting(0) normalize_probabilities = Setting(False) scale = Setting(1) display_index = Setting(1) n_attributes = Setting(10) sort_index = Setting(SortBy.ABSOLUTE) cont_feature_dim_index = Setting(0) graph_name = "scene" class Error(OWWidget.Error): invalid_classifier = Msg("Nomogram accepts only Naive Bayes and " "Logistic Regression classifiers.") def __init__(self): super().__init__() self.instances = None self.domain = None self.data = None self.classifier = None self.align = OWNomogram.ALIGN_ZERO self.log_odds_ratios = [] self.log_reg_coeffs = [] self.log_reg_coeffs_orig = [] self.log_reg_cont_data_extremes = [] self.p = None self.b0 = None self.points = [] self.feature_items = [] self.feature_marker_values = [] self.scale_back = lambda x: x self.scale_forth = lambda x: x self.nomogram = None self.nomogram_main = None self.vertical_line = None self.hidden_vertical_line = None self.old_target_class_index = self.target_class_index self.markers_set = False self.repaint = False # GUI box = gui.vBox(self.controlArea, "Target class") self.class_combo = gui.comboBox(box, self, "target_class_index", callback=self._class_combo_changed, contentsLength=12) self.norm_check = gui.checkBox( box, self, "normalize_probabilities", "Normalize probabilities", hidden=True, callback=self._norm_check_changed, tooltip="For multiclass data 1 vs. all probabilities do not" " sum to 1 and therefore could be normalized.") self.scale_radio = gui.radioButtons( self.controlArea, self, "scale", ["Point scale", "Log odds ratios"], box="Scale", callback=self._radio_button_changed) box = gui.vBox(self.controlArea, "Display features") grid = QGridLayout() self.display_radio = gui.radioButtonsInBox( box, self, "display_index", [], orientation=grid, callback=self._display_radio_button_changed) radio_all = gui.appendRadioButton(self.display_radio, "All:", addToLayout=False) radio_best = gui.appendRadioButton(self.display_radio, "Best ranked:", addToLayout=False) spin_box = gui.hBox(None, margin=0) self.n_spin = gui.spin(spin_box, self, "n_attributes", 1, self.MAX_N_ATTRS, label=" ", controlWidth=60, callback=self._n_spin_changed) grid.addWidget(radio_all, 1, 1) grid.addWidget(radio_best, 2, 1) grid.addWidget(spin_box, 2, 2) self.sort_combo = gui.comboBox(box, self, "sort_index", label="Sort by: ", items=SortBy.items(), orientation=Qt.Horizontal, callback=self._sort_combo_changed) self.cont_feature_dim_combo = gui.comboBox( box, self, "cont_feature_dim_index", label="Continuous features: ", items=["1D projection", "2D curve"], orientation=Qt.Horizontal, callback=self._cont_feature_dim_combo_changed) gui.rubber(self.controlArea) self.scene = QGraphicsScene() self.view = QGraphicsView( self.scene, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff, renderHints=QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform, alignment=Qt.AlignLeft) self.view.viewport().installEventFilter(self) self.view.viewport().setMinimumWidth(300) self.view.sizeHint = lambda: QSize(600, 500) self.mainArea.layout().addWidget(self.view) def _class_combo_changed(self): values = [item.dot.value for item in self.feature_items] self.feature_marker_values = self.scale_back(values) coeffs = [ np.nan_to_num(p[self.target_class_index] / p[self.old_target_class_index]) for p in self.points ] points = [p[self.old_target_class_index] for p in self.points] self.feature_marker_values = [ self.get_points_from_coeffs(v, c, p) for (v, c, p) in zip(self.feature_marker_values, coeffs, points) ] self.update_scene() self.old_target_class_index = self.target_class_index def _norm_check_changed(self): values = [item.dot.value for item in self.feature_items] self.feature_marker_values = self.scale_back(values) self.update_scene() def _radio_button_changed(self): values = [item.dot.value for item in self.feature_items] self.feature_marker_values = self.scale_back(values) self.update_scene() def _display_radio_button_changed(self): self.__hide_attrs(self.n_attributes if self.display_index else None) def _n_spin_changed(self): self.display_index = 1 self.__hide_attrs(self.n_attributes) def __hide_attrs(self, n_show): if self.nomogram_main is None: return self.nomogram_main.hide(n_show) if self.vertical_line: x = self.vertical_line.line().x1() y = self.nomogram_main.layout.preferredHeight() + 30 self.vertical_line.setLine(x, -6, x, y) self.hidden_vertical_line.setLine(x, -6, x, y) rect = QRectF(self.scene.sceneRect().x(), self.scene.sceneRect().y(), self.scene.itemsBoundingRect().width(), self.nomogram.preferredSize().height()) self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70)) def _sort_combo_changed(self): if self.nomogram_main is None: return self.nomogram_main.hide(None) self.nomogram_main.sort(self.sort_index) self.__hide_attrs(self.n_attributes if self.display_index else None) def _cont_feature_dim_combo_changed(self): values = [item.dot.value for item in self.feature_items] self.feature_marker_values = self.scale_back(values) self.update_scene() def eventFilter(self, obj, event): if obj is self.view.viewport() and event.type() == QEvent.Resize: self.repaint = True values = [item.dot.value for item in self.feature_items] self.feature_marker_values = self.scale_back(values) self.update_scene() return super().eventFilter(obj, event) def update_controls(self): self.class_combo.clear() self.norm_check.setHidden(True) self.cont_feature_dim_combo.setEnabled(True) if self.domain: self.class_combo.addItems(self.domain.class_vars[0].values) if len(self.domain.attributes) > self.MAX_N_ATTRS: self.display_index = 1 if len(self.domain.class_vars[0].values) > 2: self.norm_check.setHidden(False) if not self.domain.has_continuous_attributes(): self.cont_feature_dim_combo.setEnabled(False) self.cont_feature_dim_index = 0 model = self.sort_combo.model() item = model.item(SortBy.POSITIVE) item.setFlags(item.flags() | Qt.ItemIsEnabled) item = model.item(SortBy.NEGATIVE) item.setFlags(item.flags() | Qt.ItemIsEnabled) self.align = OWNomogram.ALIGN_ZERO if self.classifier and isinstance(self.classifier, LogisticRegressionClassifier): self.align = OWNomogram.ALIGN_LEFT item = model.item(SortBy.POSITIVE) item.setFlags(item.flags() & ~Qt.ItemIsEnabled) item = model.item(SortBy.NEGATIVE) item.setFlags(item.flags() & ~Qt.ItemIsEnabled) if self.sort_index in (SortBy.POSITIVE, SortBy.POSITIVE): self.sort_index = SortBy.NO_SORTING def set_instance(self, data): self.instances = data self.feature_marker_values = [] self.set_feature_marker_values() def set_classifier(self, classifier): self.closeContext() self.classifier = classifier self.Error.clear() if self.classifier and not isinstance(self.classifier, self.ACCEPTABLE): self.Error.invalid_classifier() self.classifier = None self.domain = self.classifier.domain if self.classifier else None self.data = None self.calculate_log_odds_ratios() self.calculate_log_reg_coefficients() self.update_controls() self.target_class_index = 0 self.openContext(self.domain and self.domain.class_var) self.points = self.log_odds_ratios or self.log_reg_coeffs self.feature_marker_values = [] self.old_target_class_index = self.target_class_index self.update_scene() def calculate_log_odds_ratios(self): self.log_odds_ratios = [] self.p = None if self.classifier is None or self.domain is None: return if not isinstance(self.classifier, NaiveBayesModel): return log_cont_prob = self.classifier.log_cont_prob class_prob = self.classifier.class_prob for i in range(len(self.domain.attributes)): ca = np.exp(log_cont_prob[i]) * class_prob[:, None] _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None] self.log_odds_ratios.append(np.log(_or)) self.p = class_prob def calculate_log_reg_coefficients(self): self.log_reg_coeffs = [] self.log_reg_cont_data_extremes = [] self.b0 = None if self.classifier is None or self.domain is None: return if not isinstance(self.classifier, LogisticRegressionClassifier): return self.domain = self.reconstruct_domain(self.classifier.original_domain, self.domain) self.data = self.classifier.original_data.transform(self.domain) attrs, ranges, start = self.domain.attributes, [], 0 for attr in attrs: stop = start + len(attr.values) if attr.is_discrete else start + 1 ranges.append(slice(start, stop)) start = stop self.b0 = self.classifier.intercept coeffs = self.classifier.coefficients if len(self.domain.class_var.values) == 2: self.b0 = np.hstack((self.b0 * (-1), self.b0)) coeffs = np.vstack((coeffs * (-1), coeffs)) self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))] self.log_reg_coeffs_orig = self.log_reg_coeffs.copy() min_values = nanmin(self.data.X, axis=0) max_values = nanmax(self.data.X, axis=0) for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values, max_values): if self.log_reg_coeffs[i].shape[1] == 1: coef = self.log_reg_coeffs[i] self.log_reg_coeffs[i] = np.hstack( (coef * min_t, coef * max_t)) self.log_reg_cont_data_extremes.append( [sorted([min_t, max_t], reverse=(c < 0)) for c in coef]) else: self.log_reg_cont_data_extremes.append([None]) def update_scene(self): if not self.repaint: return self.clear_scene() if self.domain is None or not len(self.points[0]): return name_items = [ QGraphicsTextItem(a.name) for a in self.domain.attributes ] point_text = QGraphicsTextItem("Points") probs_text = QGraphicsTextItem("Probabilities (%)") all_items = name_items + [point_text, probs_text] name_offset = -max(t.boundingRect().width() for t in all_items) - 50 w = self.view.viewport().rect().width() max_width = w + name_offset - 100 points = [pts[self.target_class_index] for pts in self.points] minimums = [min(p) for p in points] if self.align == OWNomogram.ALIGN_LEFT: points = [p - m for m, p in zip(minimums, points)] max_ = np.nan_to_num(max(max(abs(p)) for p in points)) d = 100 / max_ if max_ else 1 if self.scale == OWNomogram.POINT_SCALE: points = [p * d for p in points] if self.scale == OWNomogram.POINT_SCALE and \ self.align == OWNomogram.ALIGN_LEFT: self.scale_back = lambda x: [ p / d + m for m, p in zip(minimums, x) ] self.scale_forth = lambda x: [(p - m) * d for m, p in zip(minimums, x)] if self.scale == OWNomogram.POINT_SCALE and \ self.align != OWNomogram.ALIGN_LEFT: self.scale_back = lambda x: [p / d for p in x] self.scale_forth = lambda x: [p * d for p in x] if self.scale != OWNomogram.POINT_SCALE and \ self.align == OWNomogram.ALIGN_LEFT: self.scale_back = lambda x: [p + m for m, p in zip(minimums, x)] self.scale_forth = lambda x: [p - m for m, p in zip(minimums, x)] if self.scale != OWNomogram.POINT_SCALE and \ self.align != OWNomogram.ALIGN_LEFT: self.scale_back = lambda x: x self.scale_forth = lambda x: x point_item, nomogram_head = self.create_main_nomogram( name_items, points, max_width, point_text, name_offset) probs_item, nomogram_foot = self.create_footer_nomogram( probs_text, d, minimums, max_width, name_offset) for item in self.feature_items: item.dot.point_dot = point_item.dot item.dot.probs_dot = probs_item.dot item.dot.vertical_line = self.hidden_vertical_line self.nomogram = nomogram = NomogramItem() nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot]) self.scene.addItem(nomogram) self.set_feature_marker_values() rect = QRectF(self.scene.itemsBoundingRect().x(), self.scene.itemsBoundingRect().y(), self.scene.itemsBoundingRect().width(), self.nomogram.preferredSize().height()) self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70)) def create_main_nomogram(self, name_items, points, max_width, point_text, name_offset): cls_index = self.target_class_index min_p = min(min(p) for p in points) max_p = max(max(p) for p in points) values = self.get_ruler_values(min_p, max_p, max_width) min_p, max_p = min(values), max(values) diff_ = np.nan_to_num(max_p - min_p) scale_x = max_width / diff_ if diff_ else max_width nomogram_header = NomogramItem() point_item = RulerItem(point_text, values, scale_x, name_offset, -scale_x * min_p) point_item.setPreferredSize(point_item.preferredWidth(), 35) nomogram_header.add_items([point_item]) self.nomogram_main = SortableNomogramItem() cont_feature_item_class = ContinuousFeature2DItem if \ self.cont_feature_dim_index else ContinuousFeatureItem self.feature_items = [ DiscreteFeatureItem(name_items[i], [val for val in att.values], points[i], scale_x, name_offset, -scale_x * min_p, self.points[i][cls_index]) if att.is_discrete else cont_feature_item_class( name_items[i], self.log_reg_cont_data_extremes[i][cls_index], self.get_ruler_values( np.min(points[i]), np.max(points[i]), scale_x * (np.max(points[i]) - np.min(points[i])), False), scale_x, name_offset, -scale_x * min_p, self.log_reg_coeffs_orig[i][cls_index][0]) for i, att in enumerate(self.domain.attributes) ] self.nomogram_main.add_items( self.feature_items, self.sort_index, self.n_attributes if self.display_index else None) x = -scale_x * min_p y = self.nomogram_main.layout.preferredHeight() + 30 self.vertical_line = QGraphicsLineItem(x, -6, x, y) self.vertical_line.setPen(QPen(Qt.DotLine)) self.vertical_line.setParentItem(point_item) self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y) pen = QPen(Qt.DashLine) pen.setBrush(QColor(Qt.red)) self.hidden_vertical_line.setPen(pen) self.hidden_vertical_line.setParentItem(point_item) return point_item, nomogram_header def create_footer_nomogram(self, probs_text, d, minimums, max_width, name_offset): eps, d_ = 0.05, 1 k = -np.log(self.p / (1 - self.p)) if self.p is not None else -self.b0 min_sum = k[self.target_class_index] - np.log((1 - eps) / eps) max_sum = k[self.target_class_index] - np.log(eps / (1 - eps)) if self.align == OWNomogram.ALIGN_LEFT: max_sum = max_sum - sum(minimums) min_sum = min_sum - sum(minimums) for i in range(len(k)): k[i] = k[i] - sum( [min(q) for q in [p[i] for p in self.points]]) if self.scale == OWNomogram.POINT_SCALE: min_sum *= d max_sum *= d d_ = d values = self.get_ruler_values(min_sum, max_sum, max_width) min_sum, max_sum = min(values), max(values) diff_ = np.nan_to_num(max_sum - min_sum) scale_x = max_width / diff_ if diff_ else max_width cls_var, cls_index = self.domain.class_var, self.target_class_index nomogram_footer = NomogramItem() def get_normalized_probabilities(val): if not self.normalize_probabilities: return 1 / (1 + np.exp(k[cls_index] - val / d_)) totals = self.__get_totals_for_class_values(minimums) p_sum = np.sum(1 / (1 + np.exp(k - totals / d_))) return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum def get_points(prob): if not self.normalize_probabilities: return (k[cls_index] - np.log(1 / prob - 1)) * d_ totals = self.__get_totals_for_class_values(minimums) p_sum = np.sum(1 / (1 + np.exp(k - totals / d_))) return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_ self.markers_set = False probs_item = ProbabilitiesRulerItem( probs_text, values, scale_x, name_offset, -scale_x * min_sum, get_points=get_points, title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]), get_probabilities=get_normalized_probabilities) self.markers_set = True nomogram_footer.add_items([probs_item]) return probs_item, nomogram_footer def __get_totals_for_class_values(self, minimums): cls_index = self.target_class_index marker_values = [item.dot.value for item in self.feature_items] if not self.markers_set: marker_values = self.scale_forth(marker_values) totals = np.empty(len(self.domain.class_var.values)) totals[cls_index] = sum(marker_values) marker_values = self.scale_back(marker_values) for i in range(len(self.domain.class_var.values)): if i == cls_index: continue coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points] points = [p[cls_index] for p in self.points] total = sum([ self.get_points_from_coeffs(v, c, p) for (v, c, p) in zip(marker_values, coeffs, points) ]) if self.align == OWNomogram.ALIGN_LEFT: points = [p - m for m, p in zip(minimums, points)] total -= sum([min(p) for p in [p[i] for p in self.points]]) d = 100 / max(max(abs(p)) for p in points) if self.scale == OWNomogram.POINT_SCALE: total *= d totals[i] = total return totals def set_feature_marker_values(self): if not (len(self.points) and len(self.feature_items)): return if not len(self.feature_marker_values): self._init_feature_marker_values() self.feature_marker_values = self.scale_forth( self.feature_marker_values) item = self.feature_items[0] for i, item in enumerate(self.feature_items): item.dot.move_to_val(self.feature_marker_values[i]) item.dot.probs_dot.move_to_sum() def _init_feature_marker_values(self): self.feature_marker_values = [] cls_index = self.target_class_index instances = Table(self.domain, self.instances) \ if self.instances else None for i, attr in enumerate(self.domain.attributes): value, feature_val = 0, None if len(self.log_reg_coeffs): if attr.is_discrete: ind, n = unique(self.data.X[:, i], return_counts=True) feature_val = np.nan_to_num(ind[np.argmax(n)]) else: feature_val = mean(self.data.X[:, i]) inst_in_dom = instances and attr in instances.domain if inst_in_dom and not np.isnan(instances[0][attr]): feature_val = instances[0][attr] if feature_val is not None: value = self.points[i][cls_index][int(feature_val)] \ if attr.is_discrete else \ self.log_reg_coeffs_orig[i][cls_index][0] * feature_val self.feature_marker_values.append(value) def clear_scene(self): self.feature_items = [] self.scale_back = lambda x: x self.scale_forth = lambda x: x self.nomogram = None self.nomogram_main = None self.vertical_line = None self.hidden_vertical_line = None self.scene.clear() def send_report(self): self.report_plot() @staticmethod def reconstruct_domain(original, preprocessed): # abuse dict to make "in" comparisons faster attrs = OrderedDict() for attr in preprocessed.attributes: cv = attr._compute_value.variable._compute_value var = cv.variable if cv else original[attr.name] if var in attrs: # the reason for OrderedDict continue attrs[var] = None # we only need keys attrs = list(attrs.keys()) return Domain(attrs, original.class_var, original.metas) @staticmethod def get_ruler_values(start, stop, max_width, round_to_nearest=True): if max_width == 0: return [0] diff = np.nan_to_num((stop - start) / max_width) if diff <= 0: return [0] decimals = int(np.floor(np.log10(diff))) if diff > 4 * pow(10, decimals): step = 5 * pow(10, decimals + 2) elif diff > 2 * pow(10, decimals): step = 2 * pow(10, decimals + 2) elif diff > 1 * pow(10, decimals): step = 1 * pow(10, decimals + 2) else: step = 5 * pow(10, decimals + 1) round_by = int(-np.floor(np.log10(step))) r = start % step if not round_to_nearest: _range = np.arange(start + step, stop + r, step) - r start, stop = np.floor(start * 100) / 100, np.ceil( stop * 100) / 100 return np.round(np.hstack((start, _range, stop)), 2) return np.round(np.arange(start, stop + r + step, step) - r, round_by) @staticmethod def get_points_from_coeffs(current_value, coefficients, possible_values): if any(np.isnan(possible_values)): return 0 indices = np.argsort(possible_values) sorted_values = possible_values[indices] sorted_coefficients = coefficients[indices] for i, val in enumerate(sorted_values): if current_value < val: break diff = sorted_values[i] - sorted_values[i - 1] k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \ (sorted_values[i] - sorted_values[i - 1]) return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \ sorted_coefficients[i] * sorted_values[i] * (1 - k)
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)] replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(False) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") def __init__(self): super().__init__() self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self._silplot = None gui.comboBox(self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", addSpace=4, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @check_sql_input def set_data(self, data): """ Set the input data set. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable cluster labels." data = None if data is not None: ncont = sum(v.is_continuous for v in data.domain.attributes) ndiscrete = len(data.domain.attributes) - ncont if ncont == 0: data = None error_msg = "No continuous columns" elif ncont < len(data.domain.attributes): warning_msg = "{0} discrete columns will not be used for " \ "distance computation".format(ndiscrete) self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self._effective_data = Orange.distance._preprocess(data) self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self._effective_data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required if self.data is None: self._silhouette = None self._labels = None self._matrix = None self._clear_scene() return if self._matrix is None and self._effective_data is not None: _, metric = self.Distances[self.distance_idx] self._matrix = numpy.asarray(metric(self._effective_data)) labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = labels.astype(int) _, counts = numpy.unique(labels, return_counts=True) if numpy.count_nonzero(counts) >= 2: self.Error.need_two_clusters.clear() silhouette = sklearn.metrics.silhouette_samples( self._matrix, labels, metric="precomputed") else: self.Error.need_two_clusters() labels = silhouette = None self._labels = labels self._silhouette = silhouette def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values) else: silplot.setScores( self._silhouette, numpy.zeros(len(self._silhouette), dtype=int), [""]) self.scene.addItem(silplot) self._update_annotations() silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize)) silplot.selectionChanged.connect(self.commit) self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = numpy.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() selectedmask[indices] = True scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.from_table(domain, self.data) else: domain = self.data.domain data = self.data if numpy.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, numpy.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = numpy.c_[scores[selectedmask]] data[:, silhouette_var] = numpy.c_[scores] self.send("Selected Data", selected) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWExplainPredictions(OWWidget): name = "Explain Predictions" description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value" icon = "icons/ExplainPredictions.svg" priority = 200 gui_error = settings.Setting(0.05) gui_p_val = settings.Setting(0.05) gui_num_atr = settings.Setting(20) sort_index = settings.Setting(SortBy.ABSOLUTE) class Inputs: data = Input("Data", Table, default=True) model = Input("Model", Model, multiple=False) sample = Input("Sample", Table) class Outputs: explanations = Output("Explanations", Table) class Error(OWWidget.Error): sample_too_big = widget.Msg("Can only explain one sample at the time.") class Warning(OWWidget.Warning): unknowns_increased = widget.Msg( "Number of unknown values increased, Data and Sample domains mismatch." ) def __init__(self): super().__init__() self.data = None self.model = None self.to_explain = None self.explanations = None self.stop = True self.e = None self._task = None self._executor = ThreadExecutor() info_box = gui.vBox(self.controlArea, "Info") self.data_info = gui.widgetLabel(info_box, "Data: N/A") self.model_info = gui.widgetLabel(info_box, "Model: N/A") self.sample_info = gui.widgetLabel(info_box, "Sample: N/A") criteria_box = gui.vBox(self.controlArea, "Stopping criteria") self.error_spin = gui.spin(criteria_box, self, "gui_error", 0.01, 1, step=0.01, label="Error < ", spinType=float, callback=self._update_error_spin, controlWidth=80, keyboardTracking=False) self.p_val_spin = gui.spin(criteria_box, self, "gui_p_val", 0.01, 1, step=0.01, label="Error p-value < ", spinType=float, callback=self._update_p_val_spin, controlWidth=80, keyboardTracking=False) plot_properties_box = gui.vBox(self.controlArea, "Display features") self.num_atr_spin = gui.spin(plot_properties_box, self, "gui_num_atr", 1, 100, step=1, label="Show attributes", callback=self._update_num_atr_spin, controlWidth=80, keyboardTracking=False) self.sort_combo = gui.comboBox(plot_properties_box, self, "sort_index", label="Rank by", items=SortBy.items(), orientation=Qt.Horizontal, callback=self._update_combo) gui.rubber(self.controlArea) self.cancel_button = gui.button( self.controlArea, self, "Stop Computation", callback=self.toggle_button, autoDefault=True, tooltip="Stops and restarts computation") self.cancel_button.setDisabled(True) predictions_box = gui.vBox(self.mainArea, "Model prediction") self.predict_info = gui.widgetLabel(predictions_box, "") self.mainArea.setMinimumWidth(700) self.resize(700, 400) class _GraphicsView(QGraphicsView): def __init__(self, scene, parent, **kwargs): for k, v in dict( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff, viewportUpdateMode=QGraphicsView. BoundingRectViewportUpdate, renderHints=(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform), alignment=(Qt.AlignTop | Qt.AlignLeft), sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)).items(): kwargs.setdefault(k, v) super().__init__(scene, parent, **kwargs) class GraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__( scene, parent, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, styleSheet='QGraphicsView {background: white}') self.viewport().setMinimumWidth(500) self._is_resizing = False w = self def resizeEvent(self, resizeEvent): self._is_resizing = True self.w.draw() self._is_resizing = False return super().resizeEvent(resizeEvent) def is_resizing(self): return self._is_resizing def sizeHint(self): return QSize(600, 300) class FixedSizeGraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__(scene, parent, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Minimum)) def sizeHint(self): return QSize(600, 30) """all will share the same scene, but will show different parts of it""" self.box_scene = QGraphicsScene(self) self.box_view = GraphicsView(self.box_scene, self) self.header_view = FixedSizeGraphicsView(self.box_scene, self) self.footer_view = FixedSizeGraphicsView(self.box_scene, self) self.mainArea.layout().addWidget(self.header_view) self.mainArea.layout().addWidget(self.box_view) self.mainArea.layout().addWidget(self.footer_view) self.painter = None def draw(self): """Uses GraphAttributes class to draw the explanaitons """ self.box_scene.clear() wp = self.box_view.viewport().rect() header_height = 30 if self.explanations is not None: self.painter = GraphAttributes( self.box_scene, min(self.gui_num_atr, self.explanations.Y.shape[0])) self.painter.paint(wp, self.explanations, header_h=header_height) """set appropriate boxes for different views""" rect = QRectF(self.box_scene.itemsBoundingRect().x(), self.box_scene.itemsBoundingRect().y(), self.box_scene.itemsBoundingRect().width(), self.box_scene.itemsBoundingRect().height()) self.box_scene.setSceneRect(rect) self.box_view.setSceneRect(rect.x(), rect.y() + header_height + 2, rect.width(), rect.height() - 80) self.header_view.setSceneRect(rect.x(), rect.y(), rect.width(), 10) self.header_view.setFixedHeight(header_height) self.footer_view.setSceneRect(rect.x(), rect.y() + rect.height() - 50, rect.width(), 35) def sort_explanations(self): """sorts explanations according to users choice from combo box""" if self.sort_index == SortBy.POSITIVE: self.explanations = self.explanations[np.argsort( self.explanations.X[:, 0])][::-1] elif self.sort_index == SortBy.NEGATIVE: self.explanations = self.explanations[np.argsort( self.explanations.X[:, 0])] elif self.sort_index == SortBy.ABSOLUTE: self.explanations = self.explanations[np.argsort( np.abs(self.explanations.X[:, 0]))][::-1] elif self.sort_index == SortBy.BY_NAME: l = np.array( list(map(np.chararray.lower, self.explanations.metas[:, 0]))) self.explanations = self.explanations[np.argsort(l)] else: return @Inputs.data @check_sql_input def set_data(self, data): """Set input 'Data""" self.data = data self.explanations = None self.data_info.setText("Data: N/A") self.e = None if data is not None: model = TableModel(data, parent=None) if data.X.shape[0] == 1: inst = "1 instance and " else: inst = str(data.X.shape[0]) + " instances and " if data.X.shape[1] == 1: feat = "1 feature " else: feat = str(data.X.shape[1]) + " features" self.data_info.setText("Data: " + inst + feat) @Inputs.model def set_predictor(self, model): """Set input 'Model""" self.model = model self.model_info.setText("Model: N/A") self.explanations = None self.e = None if model is not None: self.model_info.setText("Model: " + str(model.name)) @Inputs.sample @check_sql_input def set_sample(self, sample): """Set input 'Sample', checks if size is appropriate""" self.to_explain = sample self.explanations = None self.Error.sample_too_big.clear() self.sample_info.setText("Sample: N/A") if sample is not None: if len(sample.X) != 1: self.to_explain = None self.Error.sample_too_big() else: if sample.X.shape[1] == 1: feat = "1 feature" else: feat = str(sample.X.shape[1]) + " features" self.sample_info.setText("Sample: " + feat) if self.e is not None: self.e.saved = False def handleNewSignals(self): if self._task is not None: self.cancel() assert self._task is None self.predict_info.setText("") self.Warning.unknowns_increased.clear() self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def commit_calc_or_output(self): if self.data is not None and self.to_explain is not None: self.commit_calc() else: self.commit_output() def commit_calc(self): num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0])) self.to_explain = self.to_explain.transform(self.data.domain) if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])): self.Warning.unknowns_increased() if self.model is not None: # calculate contributions if self.e is None: self.e = ExplainPredictions(self.data, self.model, batch_size=min( len(self.data.X), 500), p_val=self.gui_p_val, error=self.gui_error) self._task = task = Task() def callback(progress): nonlocal task # update progress bar QMetaObject.invokeMethod(self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress)) if task.canceled: return True return False def callback_update(table): QMetaObject.invokeMethod(self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table)) def callback_prediction(class_value): QMetaObject.invokeMethod(self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value)) self.was_canceled = False explain_func = partial(self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction) self.progressBarInit(processEvents=None) task.future = self._executor.submit(explain_func) task.watcher = FutureWatcher(task.future) task.watcher.done.connect(self._task_finished) self.cancel_button.setDisabled(False) @pyqtSlot(Orange.data.Table) def update_view(self, table): self.explanations = table self.sort_explanations() self.draw() self.commit_output() @pyqtSlot(float) def update_model_prediction(self, value): self._print_prediction(value) @pyqtSlot(int) def set_progress_value(self, value): self.progressBarSet(value, processEvents=False) @pyqtSlot(concurrent.futures.Future) def _task_finished(self, f): """ Parameters: ---------- f: conncurent.futures.Future future instance holding the result of learner evaluation """ assert self.thread() is QThread.currentThread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None if not self.was_canceled: self.cancel_button.setDisabled(True) try: results = f.result() except Exception as ex: log = logging.getLogger() log.exception(__name__, exc_info=True) self.error("Exception occured during evaluation: {!r}".format(ex)) for key in self.results.keys(): self.results[key] = None else: self.update_view(results[1]) self.progressBarFinished(processEvents=False) def commit_output(self): """ Sends best-so-far results forward """ self.Outputs.explanations.send(self.explanations) def toggle_button(self): if self.stop: self.stop = False self.cancel_button.setText("Restart Computation") self.cancel() else: self.stop = True self.cancel_button.setText("Stop Computation") self.commit_calc_or_output() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._task_finished) self.was_canceled = True self._task_finished(self._task.future) def _print_prediction(self, class_value): """ Parameters ---------- class_value: float Number representing either index of predicted class value, looked up in domain, or predicted value (regression) """ name = self.data.domain.class_vars[0].name if isinstance(self.data.domain.class_vars[0], ContinuousVariable): self.predict_info.setText(name + ": " + str(class_value)) else: self.predict_info.setText( name + ": " + self.data.domain.class_vars[0].values[int(class_value)]) def _update_error_spin(self): self.cancel() if self.e is not None: self.e.error = self.gui_error self.handleNewSignals() def _update_p_val_spin(self): self.cancel() if self.e is not None: self.e.p_val = self.gui_p_val self.handleNewSignals() def _update_num_atr_spin(self): self.cancel() self.handleNewSignals() def _update_combo(self): if self.explanations != None: self.sort_explanations() self.draw() self.commit_output() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)] replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(False) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") def __init__(self): super().__init__() self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self._silplot = None gui.comboBox( self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox( box, self, "cluster_var_idx", addSpace=4, callback=self._invalidate_scores) gui.checkBox( box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider( box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox( box, self, "annotation_var_idx", callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit( box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @check_sql_input def set_data(self, data): """ Set the input data set. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2] if not candidatevars: error_msg = "Input does not have any suitable cluster labels." data = None if data is not None: ncont = sum(v.is_continuous for v in data.domain.attributes) ndiscrete = len(data.domain.attributes) - ncont if ncont == 0: data = None error_msg = "No continuous columns" elif ncont < len(data.domain.attributes): warning_msg = "{0} discrete columns will not be used for " \ "distance computation".format(ndiscrete) self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self._effective_data = Orange.distance._preprocess(data) self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self._effective_data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required if self.data is None: self._silhouette = None self._labels = None self._matrix = None self._clear_scene() return if self._matrix is None and self._effective_data is not None: _, metric = self.Distances[self.distance_idx] self._matrix = numpy.asarray(metric(self._effective_data)) labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = labels.astype(int) _, counts = numpy.unique(labels, return_counts=True) if numpy.count_nonzero(counts) >= 2: self.Error.need_two_clusters.clear() silhouette = sklearn.metrics.silhouette_samples( self._matrix, labels, metric="precomputed") else: self.Error.need_two_clusters() labels = silhouette = None self._labels = labels self._silhouette = silhouette def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible( not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values) else: silplot.setScores( self._silhouette, numpy.zeros(len(self._silhouette), dtype=int), [""] ) self.scene.addItem(silplot) self._update_annotations() silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize)) silplot.selectionChanged.connect(self.commit) self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible( self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = numpy.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() selectedmask[indices] = True scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.from_table( domain, self.data) else: domain = self.data.domain data = self.data if numpy.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, numpy.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = numpy.c_[scores[selectedmask]] data[:, silhouette_var] = numpy.c_[scores] self.send("Selected Data", selected) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) PEARSON, CLASS_DISTRIBUTION = 0, 1 settingsHandler = DomainContextHandler() use_boxes = Setting(True) interior_coloring = Setting(CLASS_DISTRIBUTION) variable1 = ContextSetting("", exclude_metas=False) variable2 = ContextSetting("", exclude_metas=False) variable3 = ContextSetting("", exclude_metas=False) variable4 = ContextSetting("", exclude_metas=False) variable_color = ContextSetting("", exclude_metas=False) selection = ContextSetting(set()) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255)] RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0)] vizrank = SettingProvider(MosaicVizRank) graph_name = "canvas" class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of continuous variables on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.unprocessed_subset_data = None self.subset_data = None self.color_data = None self.areas = [] self.canvas = QGraphicsScene() self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.attr_combos = [ gui.comboBox( box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, callback=self.reset_graph, sendSelectedValue=True, valueType=str, emptyString="(None)") for i in range(1, 5)] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") dmod = DomainModel self.color_model = DomainModel(order=dmod.MIXED, valid_types=dmod.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox( box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, callback=self.set_color_data, sendSelectedValue=True, model=self.color_model, valueType=str) self.bar_button = gui.checkBox( box2, self, 'use_boxes', label='Compare with total', callback=self._compare_with_total) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _compare_with_total(self): if self.data is not None and \ self.data.domain.class_var is not None and \ self.interior_coloring != self.CLASS_DISTRIBUTION: self.interior_coloring = self.CLASS_DISTRIBUTION self.coloring_changed() # This also calls self.update_graph else: self.update_graph() def _get_discrete_data(self, data): """ Discretizes continuous attributes. Returns None when there is no data, no rows, or no discrete or continuous attributes. """ if (data is None or not len(data) or not any(attr.is_discrete or attr.is_continuous for attr in chain(data.domain, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain): return Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): for combo in self.attr_combos: combo.clear() if data is None: return for combo in self.attr_combos[1:]: combo.addItem("(None)") icons = gui.attributeIconDict for attr in chain(data.domain, data.domain.metas): if attr.is_primitive: for combo in self.attr_combos: combo.addItem(icons[attr], attr.name) if self.attr_combos[0].count() > 0: self.variable1 = self.attr_combos[0].itemText(0) self.variable2 = self.attr_combos[1].itemText( 2 * (self.attr_combos[1].count() > 2)) self.variable3 = self.attr_combos[2].itemText(0) self.variable4 = self.attr_combos[3].itemText(0) if self.data.domain.class_var: self.variable_color = self.data.domain.class_var.name idx = self.cb_attr_color.findText(self.variable_color) else: idx = 0 self.cb_attr_color.setCurrentIndex(idx) def get_attr_list(self): return [ a for a in [self.variable1, self.variable2, self.variable3, self.variable4] if a and a != "(None)"] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = \ [a.name if a else "" for a in attrs] self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 \ and len(self.data.domain.attributes) >= 1) if self.data is None: return self.color_model.set_domain(self.data.domain) self.init_combos(self.data) self.openContext(self.data) # if we first received subset we now call setSubsetData to process it if self.unprocessed_subset_data: self.set_subset_data(self.unprocessed_subset_data) self.unprocessed_subset_data = None self.set_color_data() @Inputs.data_subset def set_subset_data(self, data): self.Warning.incompatible_subset.clear() if self.data is None: self.unprocessed_subset_data = data return try: self.subset_data = data.transform(self.data.domain) except: self.subset_data = None self.Warning.incompatible_subset(shown=data is not None) # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.reset_graph() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None or len(self.data) < 2 or len(self.data.domain.attributes) < 1: return if self.cb_attr_color.currentIndex() <= 0: color_var = None self.interior_coloring = self.PEARSON self.bar_button.setEnabled(False) else: color_var = self.data.domain[self.cb_attr_color.currentText()] self.interior_coloring = self.CLASS_DISTRIBUTION self.bar_button.setEnabled(True) attributes = [v for v in self.data.domain if v != color_var] metas = [v for v in self.data.domain.metas if v != color_var] domain = Domain(attributes, color_var, metas) self.color_data = color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): for i, (_, _, area) in enumerate(self.areas): if i in self.selection: area.setPen(QPen(Qt.black, 3, Qt.DotLine)) else: area.setPen(QPen()) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """This function calculates rectangles' widths. If all widths are zero then all widths are set to 1.""" if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width() self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return subset = self.subset_data attr_list = self.get_attr_list() class_var = data.domain.class_var if class_var: sql = type(data) == SqlTable name = not sql and data.name # save class_var because it is removed in the next line data = data[:, attr_list + [class_var]] data.domain.class_var = class_var if not sql: data.name = name else: data = data[:, attr_list] # TODO: check this # data = Preprocessor_dropMissing(data) if len(data) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not data.domain[attr].values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.interior_coloring == self.PEARSON: apriori_dists = [get_distribution(data, attr) for attr in attr_list] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw # get the maximum width of rectangle xoff = 20 width = 20 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1], bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3], bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 # get the maximum height of rectangle height = 100 yoff = 45 square_size = min(self.canvas_view.width() - width - 20, self.canvas_view.height() - height - 20) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect( 0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if subset: conditionalsubsetdict, _ = \ get_conditional_distribution(subset, attr_list) # draw rectangles draw_data( attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size)) self.update_selection_rects()
class TestGraphicsTextItem(QAppTestCase): def setUp(self): super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.item = GraphicsTextItem() self.item.setPlainText("AAA") self.item.setTextInteractionFlags(Qt.TextEditable) self.scene.addItem(self.item) self.view.setFocus() def tearDown(self): self.scene.clear() self.view.deleteLater() del self.scene del self.view super().tearDown() def test_item_context_menu(self): item = self.item menu = self._context_menu() self.assertFalse(item.textCursor().hasSelection()) ac = find_action(menu, "select-all") self.assertTrue(ac.isEnabled()) ac.trigger() self.assertTrue(item.textCursor().hasSelection()) def test_copy_cut_paste(self): item = self.item cb = QApplication.clipboard() c = item.textCursor() c.select(c.Document) item.setTextCursor(c) menu = self._context_menu() ac = find_action(menu, "edit-copy") spy = QSignalSpy(cb.dataChanged) ac.trigger() self.assertTrue(len(spy) or spy.wait()) ac = find_action(menu, "edit-cut") spy = QSignalSpy(cb.dataChanged) ac.trigger() self.assertTrue(len(spy) or spy.wait()) self.assertEqual(item.toPlainText(), "") ac = find_action(menu, "edit-paste") ac.trigger() self.assertEqual(item.toPlainText(), "AAA") def test_context_menu_delete(self): item = self.item c = item.textCursor() c.select(c.Document) item.setTextCursor(c) menu = self._context_menu() ac = find_action(menu, "edit-delete") ac.trigger() self.assertEqual(self.item.toPlainText(), "") def _context_menu(self): point = map_to_viewport(self.view, self.item, self.item.boundingRect().center()) contextMenu(self.view.viewport(), point) return self._get_menu() def _get_menu(self) -> QMenu: menu = findf( self.app.topLevelWidgets(), lambda w: isinstance(w, QMenu) and w. parent() is self.view.viewport()) assert menu is not None return menu
class OWPieChart(widget.OWWidget): name = "Pie Chart" description = "Make fun of Pie Charts." keywords = ["pie chart", "chart", "visualisation"] icon = "icons/PieChart.svg" priority = 700 class Inputs: data = Input("Data", Orange.data.Table) settingsHandler = DomainContextHandler() attribute = ContextSetting(None) split_var = ContextSetting(None) explode = Setting(False) graph_name = "scene" def __init__(self): super().__init__() self.dataset = None self.attrs = DomainModel(valid_types=Orange.data.DiscreteVariable, separators=False) cb = gui.comboBox(self.controlArea, self, "attribute", box=True, model=self.attrs, callback=self.update_scene, contentsLength=12) grid = QGridLayout() self.legend = gui.widgetBox(gui.indentedBox(cb.box), orientation=grid) grid.setColumnStretch(1, 1) grid.setHorizontalSpacing(6) self.legend_items = [] self.split_vars = DomainModel( valid_types=Orange.data.DiscreteVariable, separators=False, placeholder="None", ) self.split_combobox = gui.comboBox(self.controlArea, self, "split_var", box="Split by", model=self.split_vars, callback=self.update_scene) self.explode_checkbox = gui.checkBox(self.controlArea, self, "explode", "Explode pies", box=True, callback=self.update_scene) gui.rubber(self.controlArea) gui.widgetLabel( gui.hBox(self.controlArea, box=True), "The aim of this widget is to\n" "demonstrate that pie charts are\n" "a terrible visualization. Please\n" "don't use it for any other purpose.") self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.mainArea.layout().addWidget(self.view) self.mainArea.setMinimumWidth(500) def sizeHint(self): return QSize(200, 150) # Horizontal size is regulated by mainArea @Inputs.data def set_data(self, dataset): if dataset is not None and (not bool(dataset) or not len(dataset.domain)): dataset = None self.closeContext() self.dataset = dataset self.attribute = None self.split_var = None domain = dataset.domain if dataset is not None else None self.attrs.set_domain(domain) self.split_vars.set_domain(domain) if dataset is not None: self.select_default_variables(domain) self.openContext(self.dataset) self.update_scene() def select_default_variables(self, domain): if len(self.attrs) > len(domain.class_vars): first_attr = self.split_vars[len(domain.class_vars)] else: first_attr = None if len(self.attrs): self.attribute, self.split_var = self.attrs[0], first_attr else: self.attribute, self.split_var = self.split_var, None def update_scene(self): self.scene.clear() if self.dataset is None or self.attribute is None: return dists, labels = self.compute_box_data() colors = self.attribute.colors for x, (dist, label) in enumerate(zip(dists, labels)): self.pie_chart(SCALE * x, 0, 0.8 * SCALE, dist, colors) self.pie_label(SCALE * x, 0, label) self.update_legend([QColor(*col) for col in colors], self.attribute.values) self.view.centerOn(SCALE * len(dists) / 2, 0) def update_legend(self, colors, labels): layout = self.legend.layout() while self.legend_items: w = self.legend_items.pop() layout.removeWidget(w) w.deleteLater() for row, (color, label) in enumerate(zip(colors, labels)): icon = QLabel() p = QPixmap(12, 12) p.fill(color) icon.setPixmap(p) label = QLabel(label) layout.addWidget(icon, row, 0) layout.addWidget(label, row, 1, alignment=Qt.AlignLeft) self.legend_items += (icon, label) def pie_chart(self, x, y, r, dist, colors): start_angle = 0 dist = np.asarray(dist) spans = dist / (float(np.sum(dist)) or 1) * 360 * 16 for span, color in zip(spans, colors): if not span: continue if self.explode: mid_ang = (start_angle + span / 2) / 360 / 16 * 2 * pi dx = r / 30 * cos(mid_ang) dy = r / 30 * sin(mid_ang) else: dx = dy = 0 ellipse = QGraphicsEllipseItem(x - r / 2 + dx, y - r / 2 - dy, r, r) if len(spans) > 1: ellipse.setStartAngle(start_angle) ellipse.setSpanAngle(span) ellipse.setBrush(QColor(*color)) self.scene.addItem(ellipse) start_angle += span def pie_label(self, x, y, label): if not label: return text = QGraphicsSimpleTextItem(label) for cut in range(1, len(label)): if text.boundingRect().width() < 0.95 * SCALE: break text = QGraphicsSimpleTextItem(label[:-cut] + "...") text.setPos(x - text.boundingRect().width() / 2, y + 0.5 * SCALE) self.scene.addItem(text) def compute_box_data(self): if self.split_var: return (contingency.get_contingency(self.dataset, self.attribute, self.split_var), self.split_var.values) else: return [ distribution.get_distribution(self.dataset, self.attribute) ], [""] def send_report(self): self.report_plot() text = "" if self.attribute is not None: text += "Box plot for '{}' ".format(self.attribute.name) if self.split_var is not None: text += "split by '{}'".format(self.split_var.name) if text: self.report_caption(text)
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan), ("Cosine", Orange.distance.Cosine)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") nan_distances = Msg("{} instance{s} omitted (undefined distances)") ignoring_categorical = Msg("Ignoring categorical features") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: Distance matrix computed from data self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] gui.comboBox( self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox( box, self, "cluster_var_idx", contentsLength=14, addSpace=4, callback=self._invalidate_scores ) gui.checkBox( box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider( box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox( box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit( box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @Inputs.data @check_sql_input def set_data(self, data): """ Set the input dataset. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2] if not candidatevars: error_msg = "Input does not have any suitable labels." data = None self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self.data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required self._clear_messages() if self.data is None or not len(self.data): self._reset_all() return if self._matrix is None and self.data is not None: _, metric = self.Distances[self.distance_idx] data = self.data if not metric.supports_discrete and any( a.is_discrete for a in data.domain.attributes): self.Warning.ignoring_categorical() data = Orange.distance.remove_discrete_features(data) try: self._matrix = np.asarray(metric(data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return self._update_labels() def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.clear() def _update_labels(self): labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) cluster_mask = np.isnan(labels) dist_mask = np.isnan(self._matrix).all(axis=0) mask = cluster_mask | dist_mask labels = labels.astype(int) labels = labels[~mask] labels_unq, _ = np.unique(labels, return_counts=True) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if mask is not None: count_missing = np.count_nonzero(cluster_mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") count_nandist = np.count_nonzero(dist_mask) if count_nandist: self.Warning.nan_distances( count_nandist, s="s" if count_nandist > 1 else "") def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible( not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores( self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]]) ) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible( self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape # pylint: disable=invalid-unary-operand-type column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): self.scene.setSceneRect(self._silplot.geometry()) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: # pylint: disable=invalid-unary-operand-type indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) # pylint: disable=invalid-unary-operand-type scores[~self._mask] = self._silhouette else: scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.transform(domain) else: domain = self.data.domain data = self.data if np.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, np.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = np.c_[scores[selectedmask]] data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWBoxPlot(widget.OWWidget): """ Here's how the widget's functions call each other: - `set_data` is a signal handler fills the list boxes and calls `grouping_changed`. - `grouping_changed` handles changes of grouping attribute: it enables or disables the box for ordering, orders attributes and calls `attr_changed`. - `attr_changed` handles changes of attribute. It recomputes box data by calling `compute_box_data`, shows the appropriate display box (discrete/continuous) and then calls`layout_changed` - `layout_changed` constructs all the elements for the scene (as lists of QGraphicsItemGroup) and calls `display_changed`. It is called when the attribute or grouping is changed (by attr_changed) and on resize event. - `display_changed` puts the elements corresponding to the current display settings on the scene. It is called when the elements are reconstructed (layout is changed due to selection of attributes or resize event), or when the user changes display settings or colors. For discrete attributes, the flow is a bit simpler: the elements are not constructed in advance (by layout_changed). Instead, layout_changed and display_changed call display_changed_disc that draws everything. """ name = "箱形图" description = "在方框图中可视化特征值的分布。" icon = "icons/BoxPlot.svg" priority = 100 keywords = ["whisker"] class Inputs: data = Input("数据", Orange.data.Table) class Outputs: selected_data = Output("所选数据", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() conditions = ContextSetting([]) attribute = ContextSetting(None) order_by_importance = Setting(False) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMeans) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) show_labels = Setting(True) sort_freqs = Setting(False) auto_commit = Setting(True) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _axis_font = QFont() _axis_font.setPixelSize(12) _label_font = QFont() _label_font.setPixelSize(11) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "盒式布景" def __init__(self): super().__init__() self.stats = [] self.dataset = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.p = -1.0 self.scale_x = self.scene_min_x = self.scene_width = 0 self.label_width = 0 self.attrs = VariableListModel() view = gui.listView(self.controlArea, self, "attribute", box="变量", model=self.attrs, callback=self.attr_changed) view.setMinimumSize(QSize(30, 30)) # Any other policy than Ignored will let the QListBox's scrollbar # set the minimal height (see the penultimate paragraph of # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget) view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) gui.separator(view.box, 6, 6) self.cb_order = gui.checkBox(view.box, self, "order_by_importance", "按相关性排序", tooltip="由𝜒²或方差对子群排序", callback=self.apply_sorting) self.group_vars = DomainModel(placeholder="无", separators=False, valid_types=Orange.data.DiscreteVariable) self.group_view = view = gui.listView(self.controlArea, self, "group_var", box="子群", model=self.group_vars, callback=self.grouping_changed) view.setEnabled(False) view.setMinimumSize(QSize(30, 30)) # See the comment above view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) # TODO: move Compare median/mean to grouping box # The vertical size policy is needed to let only the list views expand self.display_box = gui.vBox(self.controlArea, "Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum), addSpace=False) gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.display_changed) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["无比较", "中位数比较", "均值比较"], callback=self.layout_changed) # The vertical size policy is needed to let only the list views expand self.stretching_box = box = gui.vBox(self.controlArea, box="显示", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Fixed)) self.stretching_box.sizeHint = self.display_box.sizeHint gui.checkBox(box, self, 'stretched', "拉杆", callback=self.display_changed) gui.checkBox(box, self, 'show_labels', "显示框标签", callback=self.display_changed) self.sort_cb = gui.checkBox(box, self, 'sort_freqs', "按子组频率排序", callback=self.display_changed) gui.rubber(box) gui.auto_commit(self.controlArea, self, "auto_commit", "选中发送", "自动发送") gui.vBox(self.mainArea, addSpace=True) self.box_scene = QGraphicsScene() self.box_scene.selectionChanged.connect(self.commit) self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) e = gui.hBox(self.mainArea, addSpace=False) self.infot1 = gui.widgetLabel(e, "<center>没有测试结果。</center>") self.mainArea.setMinimumWidth(600) self.stats = self.dist = self.conts = [] self.is_continuous = False self.update_display_box() def sizeHint(self): return QSize(100, 500) # Vertical size is regulated by mainArea def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.layout_changed() return super().eventFilter(obj, event) def reset_attrs(self, domain): self.attrs[:] = [ var for var in chain(domain.class_vars, domain.metas, domain.attributes) if var.is_primitive() ] # noinspection PyTypeChecker @Inputs.data def set_data(self, dataset): if dataset is not None and (not bool(dataset) or not len(dataset.domain) and not any(var.is_primitive() for var in dataset.domain.metas)): dataset = None self.closeContext() self.dataset = dataset self.dist = self.stats = self.conts = [] self.group_var = None self.attribute = None if dataset: domain = dataset.domain self.group_vars.set_domain(domain) self.group_view.setEnabled(len(self.group_vars) > 1) self.reset_attrs(domain) self.select_default_variables(domain) self.openContext(self.dataset) self.grouping_changed() else: self.reset_all_data() self.commit() def select_default_variables(self, domain): # visualize first non-class variable, group by class (if present) if len(self.attrs) > len(domain.class_vars): self.attribute = self.attrs[len(domain.class_vars)] elif self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var else: self.group_var = None # Reset to trigger selection via callback def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = \ np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \ np.sum(observed) p = chisquare(observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.reset_attrs(domain) self.attribute = attribute def reset_all_data(self): self.clear_scene() self.infot1.setText("") self.attrs.clear() self.group_vars.set_domain(None) self.group_view.setEnabled(False) self.is_continuous = False self.update_display_box() def grouping_changed(self): self.cb_order.setEnabled(self.group_var is not None) self.apply_sorting() self.attr_changed() def select_box_items(self): temp_cond = self.conditions.copy() for box in self.box_scene.items(): if isinstance(box, FilterGraphicsRectItem): box.setSelected( box.filter.conditions in [c.conditions for c in temp_cond]) def attr_changed(self): self.compute_box_data() self.update_display_box() self.layout_changed() if self.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn(self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset self.is_continuous = attr.is_continuous if dataset is None or not self.is_continuous and not attr.values or \ self.group_var and not self.group_var.values: self.stats = self.dist = self.conts = [] return if self.group_var: self.dist = [] self.conts = contingency.get_contingency(dataset, attr, self.group_var) if self.is_continuous: stats, label_texts = [], [] for i, cont in enumerate(self.conts): if np.sum(cont[1]): stats.append(BoxData(cont, attr, i, self.group_var)) label_texts.append(self.group_var.values[i]) self.stats = stats self.label_txts_all = label_texts else: self.label_txts_all = \ [v for v, c in zip(self.group_var.values, self.conts) if np.sum(c) > 0] else: self.dist = distribution.get_distribution(dataset, attr) self.conts = [] if self.is_continuous: self.stats = [BoxData(self.dist, attr, None)] self.label_txts_all = [""] self.label_txts = [ txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0 ] self.stats = [stat for stat in self.stats if stat.n > 0] def update_display_box(self): if self.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() self.sort_cb.setEnabled(self.group_var is not None) def clear_scene(self): self.closeContext() self.box_scene.clearSelection() self.box_scene.clear() self.box_view.viewport().update() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] self.openContext(self.dataset) def layout_changed(self): attr = self.attribute if not attr: return self.clear_scene() if self.dataset is None or len(self.conts) == len(self.dist) == 0: return if not self.is_continuous: self.display_changed_disc() return self.mean_labels = [ self.mean_label(stat, attr, lab) for stat, lab in zip(self.stats, self.label_txts) ] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [ self.label_group(stat, attr, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels) ] self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts ] for it in chain(self.labels, self.attr_labels): self.box_scene.addItem(it) self.display_changed() def display_changed(self): if self.dataset is None: return if not self.is_continuous: self.display_changed_disc() return self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: vals = [getattr(stat, criterion) for stat in self.stats] overmax = max((val for val in vals if val is not None), default=0) \ + 1 vals = [val if val is not None else overmax for val in vals] self.order = sorted(self.order, key=vals.__getitem__) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 for item in self.boxes[box_index]: self.box_scene.addItem(item) item.setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians and \ stat.median is not None: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self.compute_tests() self.show_posthoc() self.select_box_items() def display_changed_disc(self): assert not self.is_continuous self.clear_scene() self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all ] if not self.stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts if np.sum(cont) > 0 ] else: self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))] self.order = list(range(len(self.attr_labels))) self.draw_axis_disc() if self.group_var: self.boxes = \ [self.strudel(cont, i) for i, cont in enumerate(self.conts) if np.sum(cont) > 0] self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0] if self.sort_freqs: # pylint: disable=invalid-unary-operand-type self.order = sorted( self.order, key=(-np.sum(self.conts, axis=1)).__getitem__) else: self.boxes = [self.strudel(self.dist)] for row, box_index in enumerate(self.order): y = (-len(self.boxes) + row) * 40 + 10 box = self.boxes[box_index] bars, labels = box[::2], box[1::2] self.__draw_group_labels(y, box_index) if not self.stretched: self.__draw_row_counts(y, box_index) if self.show_labels and self.attribute is not self.group_var: self.__draw_bar_labels(y, bars, labels) self.__draw_bars(y, bars) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self.infot1.setText("") self.select_box_items() def __draw_group_labels(self, y, row): """Draw group labels Parameters ---------- y: int vertical offset of bars row: int row index """ label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_row_counts(self, y, row): """Draw row counts Parameters ---------- y: int vertical offset of bars row: int row index """ assert not self.is_continuous label = self.labels[row] b = label.boundingRect() if self.group_var: right = self.scale_x * sum(self.conts[row]) else: right = self.scale_x * sum(self.dist) label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_bar_labels(self, y, bars, labels): """Draw bar labels Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars being drawn labels: List[QGraphicsTextItem] list of labels for corresponding bars """ label = bar_part = None for text_item, bar_part in zip(labels, bars): label = self.Label(text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) label.setMaxWidth(bar_part.boundingRect().width()) self.box_scene.addItem(label) def __draw_bars(self, y, bars): """Draw bars Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars to draw """ for item in bars: item.setPos(0, y) self.box_scene.addItem(item) # noinspection PyPep8Naming def compute_tests(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests def stat_ttest(): d1, d2 = self.stats if d1.n == 0 or d2.n == 0: return np.nan, np.nan pooled_var = d1.var / d1.n + d2.var / d2.n df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) if pooled_var == 0: return np.nan, np.nan t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): if any(stat.n == 0 for stat in self.stats): return np.nan, np.nan n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg)**2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance," \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, self.p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p) else: t, self.p = stat_ttest() t = "Student's t: %.3f (p=%.3f)" % (t, self.p) else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, self.p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p) else: F, self.p = stat_ANOVA() t = "ANOVA: %.3f (p=%.3f)" % (F, self.p) self.infot1.setText("<center>%s</center>" % t) def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)] mean_labels = self.mean_labels or [ self.mean_label(stats[0], self.attribute, "") ] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_width = (gtop - gbottom) * scale_x val = first_val decimals = max(3, 4 - int(math.log10(step))) while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText( repr(round(val, decimals)) if not misssing_stats else "?", self._axis_font) t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() t.setPos(val * scale_x - r.width() / 2, 8) if val >= top: break val += step self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ assert not self.is_continuous if self.stretched: if not self.attr_labels: return step = steps = 10 else: if self.group_var: max_box = max(float(np.sum(dist)) for dist in self.conts) else: max_box = float(np.sum(self.dist)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.stretched and self.labels: if self.group_var: rows = list(zip(self.conts, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max( sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) if stat.median is not None: msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() / 2 line(msc) if stat.q25 is not None: x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) if stat.q75 is not None: x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = [] whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5) vert_line = line(stat.a_min, 0, stat.a_max, 0) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0) var_line.setPen(self._pen_paramet) box.extend([whisker1, whisker2, vert_line, mean_line, var_line]) if stat.q25 is not None and stat.q75 is not None: mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x, -height / 2, (stat.q75 - stat.q25) * scale_x, height) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) box.append(mbox) if stat.median is not None: median_line = line(stat.median, -height / 2, stat.median, height / 2) median_line.setPen(self._pen_median) median_line.setZValue(-150) box.append(median_line) return box def strudel(self, dist, group_val_index=None): attr = self.attribute ss = np.sum(dist) box = [] if ss < 1e-6: cond = [FilterDiscrete(attr, None)] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10)) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x cond = [FilterDiscrete(attr, [i])] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.append(rect) box.append(text) cum += v return box def commit(self): self.conditions = [ item.filter for item in self.box_scene.selectedItems() if item.filter ] selected, selection = None, [] if self.conditions: selected = Values(self.conditions, conjunction=False)(self.dataset) selection = np.in1d(self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, selection)) def show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) if x is None: continue x *= self.scale_x xs.append(x * self.scale_x) by = y_up + pos * height line(by + 12, 3) line(by - 12, by - 25) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if to in (last_to, frm): continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = -6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): return self.attribute.name if self.attribute else None def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text) class Label(QGraphicsSimpleTextItem): """Boxplot Label with settable maxWidth""" # Minimum width to display label text MIN_LABEL_WIDTH = 25 # padding bellow the text PADDING = 3 __max_width = None def maxWidth(self): return self.__max_width def setMaxWidth(self, max_width): self.__max_width = max_width def paint(self, painter, option, widget): """Overrides QGraphicsSimpleTextItem.paint If label text is too long, it is elided to fit into the allowed region """ if self.__max_width is None: width = option.rect.width() else: width = self.__max_width if width < self.MIN_LABEL_WIDTH: # if space is too narrow, no label return fm = painter.fontMetrics() text = fm.elidedText(self.text(), Qt.ElideRight, width) painter.drawText( option.rect.x(), option.rect.y() + self.boundingRect().height() - self.PADDING, text)
class OWPieChart(widget.OWWidget): name = "Pie Chart" description = "Make fun of Pie Charts." icon = "icons/PieChart.svg" priority = 100 class Inputs: data = Input("Data", Orange.data.Table) settingsHandler = DomainContextHandler() attribute = ContextSetting(None) split_var = ContextSetting(None) explode = Setting(False) graph_name = "scene" def __init__(self): super().__init__() self.dataset = None self.attrs = DomainModel( valid_types=Orange.data.DiscreteVariable, separators=False) cb = gui.comboBox( self.controlArea, self, "attribute", box=True, model=self.attrs, callback=self.update_scene, contentsLength=12) grid = QGridLayout() self.legend = gui.widgetBox(gui.indentedBox(cb.box), orientation=grid) grid.setColumnStretch(1, 1) grid.setHorizontalSpacing(6) self.legend_items = [] self.split_vars = DomainModel( valid_types=Orange.data.DiscreteVariable, separators=False, placeholder="None", ) gui.comboBox( self.controlArea, self, "split_var", box="Split by", model=self.split_vars, callback=self.update_scene) gui.checkBox( self.controlArea, self, "explode", "Explode pies", box=True, callback=self.update_scene) gui.rubber(self.controlArea) gui.widgetLabel( gui.hBox(self.controlArea, box=True), "The aim of this widget is to\n" "demonstrate that pie charts are\n" "a terrible visualization. Please\n" "don't use it for any other purpose.") self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHints( QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.mainArea.layout().addWidget(self.view) self.mainArea.setMinimumWidth(600) def sizeHint(self): return QSize(200, 150) # Horizontal size is regulated by mainArea @Inputs.data def set_data(self, dataset): if dataset is not None and ( not bool(dataset) or not len(dataset.domain)): dataset = None self.closeContext() self.dataset = dataset self.attribute = None self.split_var = None domain = dataset.domain if dataset is not None else None self.attrs.set_domain(domain) self.split_vars.set_domain(domain) if dataset is not None: self.select_default_variables(domain) self.openContext(self.dataset) self.update_scene() def select_default_variables(self, domain): if len(self.attrs) > len(domain.class_vars): first_attr = self.split_vars[len(domain.class_vars)] else: first_attr = None if len(self.attrs): self.attribute, self.split_var = self.attrs[0], first_attr else: self.attribute, self.split_var = self.split_var, None def update_scene(self): self.scene.clear() if self.dataset is None or self.attribute is None: return dists, labels = self.compute_box_data() colors = self.attribute.colors for x, (dist, label) in enumerate(zip(dists, labels)): self.pie_chart(SCALE * x, 0, 0.8 * SCALE, dist, colors) self.pie_label(SCALE * x, 0, label) self.update_legend( [QColor(*col) for col in colors], self.attribute.values) self.view.centerOn(SCALE * len(dists) / 2, 0) def update_legend(self, colors, labels): layout = self.legend.layout() while self.legend_items: w = self.legend_items.pop() layout.removeWidget(w) w.deleteLater() for row, (color, label) in enumerate(zip(colors, labels)): icon = QLabel() p = QPixmap(12, 12) p.fill(color) icon.setPixmap(p) label = QLabel(label) layout.addWidget(icon, row, 0) layout.addWidget(label, row, 1, alignment=Qt.AlignLeft) self.legend_items += (icon, label) def pie_chart(self, x, y, r, dist, colors): start_angle = 0 dist = np.asarray(dist) spans = dist / (float(np.sum(dist)) or 1) * 360 * 16 for span, color in zip(spans, colors): if not span: continue if self.explode: mid_ang = (start_angle + span / 2) / 360 / 16 * 2 * pi dx = r / 30 * cos(mid_ang) dy = r / 30 * sin(mid_ang) else: dx = dy = 0 ellipse = QGraphicsEllipseItem(x - r / 2 + dx, y - r / 2 - dy, r, r) if len(spans) > 1: ellipse.setStartAngle(start_angle) ellipse.setSpanAngle(span) ellipse.setBrush(QColor(*color)) self.scene.addItem(ellipse) start_angle += span def pie_label(self, x, y, label): if not label: return text = QGraphicsSimpleTextItem(label) for cut in range(1, len(label)): if text.boundingRect().width() < 0.95 * SCALE: break text = QGraphicsSimpleTextItem(label[:-cut] + "...") text.setPos(x - text.boundingRect().width() / 2, y + 0.5 * SCALE) self.scene.addItem(text) def compute_box_data(self): if self.split_var: return ( contingency.get_contingency( self.dataset, self.attribute, self.split_var), self.split_var.values) else: return [ distribution.get_distribution( self.dataset, self.attribute)], [""] def send_report(self): self.report_plot() text = "" if self.attribute is not None: text += "Box plot for '{}' ".format(self.attribute.name) if self.split_var is not None: text += "split by '{}'".format(self.split_var.name) if text: self.report_caption(text)
class OWMosaicDisplay(OWWidget): name = "Mosaic Display" description = "Display data in a mosaic plot." icon = "icons/MosaicDisplay.svg" priority = 220 keywords = [] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() vizrank = SettingProvider(MosaicVizRank) settings_version = 2 use_boxes = Setting(True) variable1: Variable = ContextSetting(None) variable2: Variable = ContextSetting(None) variable3: Variable = ContextSetting(None) variable4: Variable = ContextSetting(None) variable_color: DiscreteVariable = ContextSetting(None) selection = Setting(set(), schema_only=True) BAR_WIDTH = 5 SPACING = 4 ATTR_NAME_OFFSET = 20 ATTR_VAL_OFFSET = 3 BLUE_COLORS = [ QColor(255, 255, 255), QColor(210, 210, 255), QColor(110, 110, 255), QColor(0, 0, 255) ] RED_COLORS = [ QColor(255, 255, 255), QColor(255, 200, 200), QColor(255, 100, 100), QColor(255, 0, 0) ] graph_name = "canvas" attrs_changed_manually = Signal(list) class Warning(OWWidget.Warning): incompatible_subset = Msg("Data subset is incompatible with Data") no_valid_data = Msg("No valid data") no_cont_selection_sql = \ Msg("Selection of numeric features on SQL is not supported") def __init__(self): super().__init__() self.data = None self.discrete_data = None self.subset_data = None self.subset_indices = None self.__pending_selection = self.selection self.selection = set() self.color_data = None self.areas = [] self.canvas = QGraphicsScene(self) self.canvas_view = ViewWithPress(self.canvas, handler=self.clear_selection) self.mainArea.layout().addWidget(self.canvas_view) self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvas_view.setRenderHint(QPainter.Antialiasing) box = gui.vBox(self.controlArea, box=True) self.model_1 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE) self.model_234 = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(None)") self.attr_combos = [ gui.comboBox(box, self, value="variable{}".format(i), orientation=Qt.Horizontal, contentsLength=12, searchable=True, callback=self.attr_changed, model=self.model_1 if i == 1 else self.model_234) for i in range(1, 5) ] self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank( box, self, "Find Informative Mosaics", self.set_attr) box2 = gui.vBox(self.controlArea, box="Interior Coloring") self.color_model = DomainModel(order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE, placeholder="(Pearson residuals)") self.cb_attr_color = gui.comboBox(box2, self, value="variable_color", orientation=Qt.Horizontal, contentsLength=12, labelWidth=50, searchable=True, callback=self.set_color_data, model=self.color_model) self.bar_button = gui.checkBox(box2, self, 'use_boxes', label='Compare with total', callback=self.update_graph) gui.rubber(self.controlArea) def sizeHint(self): return QSize(720, 530) def _get_discrete_data(self, data): """ Discretize continuous attributes. Return None when there is no data, no rows, or no primitive attributes. """ if (data is None or not len(data) or not any( attr.is_discrete or attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas))): return None elif any(attr.is_continuous for attr in data.domain.variables): return Discretize(method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True)(data) else: return data def init_combos(self, data): def set_combos(value): self.model_1.set_domain(value) self.model_234.set_domain(value) self.color_model.set_domain(value) if data is None: set_combos(None) self.variable1 = self.variable2 = self.variable3 \ = self.variable4 = self.variable_color = None return set_combos(self.data.domain) if len(self.model_1) > 0: self.variable1 = self.model_1[0] self.variable2 = self.model_1[min(1, len(self.model_1) - 1)] self.variable3 = self.variable4 = None self.variable_color = self.data.domain.class_var # None is OK, too def get_disc_attr_list(self): return [ self.discrete_data.domain[var.name] for var in (self.variable1, self.variable2, self.variable3, self.variable4) if var ] def set_attr(self, *attrs): self.variable1, self.variable2, self.variable3, self.variable4 = [ attr and self.data.domain[attr.name] for attr in attrs ] self.reset_graph() def attr_changed(self): self.attrs_changed_manually.emit(self.get_disc_attr_list()) self.reset_graph() def resizeEvent(self, e): OWWidget.resizeEvent(self, e) self.update_graph() def showEvent(self, ev): OWWidget.showEvent(self, ev) self.update_graph() @Inputs.data def set_data(self, data): if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.vizrank.stop_and_reset() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) >= 1) if self.data is None: self.discrete_data = None self.init_combos(None) return self.init_combos(self.data) self.openContext(self.data) @Inputs.data_subset def set_subset_data(self, data): self.subset_data = data # this is called by widget after setData and setSubsetData are called. # this way the graph is updated only once def handleNewSignals(self): self.Warning.incompatible_subset.clear() self.subset_indices = None if self.data is not None and self.subset_data: transformed = self.subset_data.transform(self.data.domain) if np.all(np.isnan(transformed.X)) \ and np.all(np.isnan(transformed.Y)): self.Warning.incompatible_subset() else: indices = {e.id for e in transformed} self.subset_indices = [ex.id in indices for ex in self.data] if self.data is not None and self.__pending_selection is not None: self.selection = self.__pending_selection self.__pending_selection = None else: self.selection = set() self.set_color_data() self.update_graph() self.send_selection() def clear_selection(self): self.selection = set() self.update_selection_rects() self.send_selection() def coloring_changed(self): self.vizrank.coloring_changed() self.update_graph() def reset_graph(self): self.clear_selection() self.update_graph() def set_color_data(self): if self.data is None: return self.bar_button.setEnabled(self.variable_color is not None) attrs = [v for v in self.model_1 if v and v is not self.variable_color] domain = Domain(attrs, self.variable_color, None) self.color_data = self.data.from_table(domain, self.data) self.discrete_data = self._get_discrete_data(self.color_data) self.vizrank.stop_and_reset() self.vizrank_button.setEnabled(True) self.coloring_changed() def update_selection_rects(self): pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine)) for i, (_, _, area) in enumerate(self.areas): area.setPen(pens[i in self.selection]) def select_area(self, index, ev): if ev.button() != Qt.LeftButton: return if ev.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection_rects() self.send_selection() def send_selection(self): if not self.selection or self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.data, [])) return filters = [] self.Warning.no_cont_selection_sql.clear() if self.discrete_data is not self.data: if isinstance(self.data, SqlTable): self.Warning.no_cont_selection_sql() for i in self.selection: cols, vals, _ = self.areas[i] filters.append( filter.Values( filter.FilterDiscrete(col, [val]) for col, val in zip(cols, vals))) if len(filters) > 1: filters = filter.Values(filters, conjunction=False) else: filters = filters[0] selection = filters(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, sel_idx)) def send_report(self): self.report_plot(self.canvas) def update_graph(self): spacing = self.SPACING bar_width = self.BAR_WIDTH def get_counts(attr_vals, values): """Calculate rectangles' widths; if all are 0, they are set to 1.""" if not attr_vals: counts = [conditionaldict[val] for val in values] else: counts = [ conditionaldict[attr_vals + "-" + val] for val in values ] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) return total, counts def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [ Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter ] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) rwidth = width * perc xs = [ x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc ] CanvasText(self.canvas, val, xs[side], ys[side], align, max_width=rwidth if side == 0 else None) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [ x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2 ] CanvasText(self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2) def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle(self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle(self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [ conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior)) ] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4])) def create_legend(): if self.variable_color is None: names = [ "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:" ] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] edges = repeat(Qt.black) else: names = get_variable_values_sorted(class_var) edges = colors = [QColor(*col) for col in class_var.colors] items = [] size = 8 for name, color, edgecolor in zip(names, colors, edges): item = QGraphicsItemGroup() item.addToGroup( CanvasRectangle(None, -size / 2, -size / 2, size, size, edgecolor, color)) item.addToGroup( CanvasText(None, name, size, 0, Qt.AlignVCenter)) items.append(item) return wrap_legend_items(items, hspacing=20, vspacing=16 + size, max_width=self.canvas_view.width() - xoff) self.canvas.clear() self.areas = [] data = self.discrete_data if data is None: return attr_list = self.get_disc_attr_list() class_var = data.domain.class_var # TODO: check this # data = Preprocessor_dropMissing(data) unique = [v.name for v in set(attr_list + [class_var]) if v] if len(data[:, unique]) == 0: self.Warning.no_valid_data() return else: self.Warning.no_valid_data.clear() attrs = [attr for attr in attr_list if not attr.values] if attrs: CanvasText(self.canvas, "Feature {} has no values".format(attrs[0]), (self.canvas_view.width() - 120) / 2, self.canvas_view.height() / 2) return if self.variable_color is None: apriori_dists = [ get_distribution(data, attr) for attr in attr_list ] else: apriori_dists = [] def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw xoff = 20 # get the maximum width of rectangle width = 20 max_ylabel_w1 = max_ylabel_w2 = 0 if len(attr_list) > 1: text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0) max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150) width = 5 + text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w1 xoff = width if len(attr_list) == 4: text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0) max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150) width += text.boundingRect().height() + \ self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10 legend = create_legend() # get the maximum height of rectangle yoff = 45 legendoff = yoff + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 square_size = min( self.canvas_view.width() - width - 20, self.canvas_view.height() - legendoff - legend.boundingRect().height()) if square_size < 0: return # canvas is too small to draw rectangles self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(), self.canvas_view.height()) drawn_sides = set() draw_positions = {} conditionaldict, distributiondict = \ get_conditional_distribution(data, attr_list) conditionalsubsetdict = None if self.subset_indices: conditionalsubsetdict, _ = get_conditional_distribution( self.discrete_data[self.subset_indices], attr_list) # draw rectangles draw_data(attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size), 0, "", len(attr_list), [], []) self.canvas.addItem(legend) legend.setPos( xoff - legend.boundingRect().x() + max(0, (square_size - legend.boundingRect().width()) / 2), legendoff + square_size) self.update_selection_rects() @classmethod def migrate_context(cls, context, version): if version < 2: settings.migrate_str_to_variable(context, none_placeholder="(None)")
class OWBoxPlot(widget.OWWidget): """ Here's how the widget's functions call each other: - `set_data` is a signal handler fills the list boxes and calls `attr_changed`. - `attr_changed` handles changes of attribute or grouping (callbacks for list boxes). It recomputes box data by calling `compute_box_data`, shows the appropriate display box (discrete/continuous) and then calls `layout_changed` - `layout_changed` constructs all the elements for the scene (as lists of QGraphicsItemGroup) and calls `display_changed`. It is called when the attribute or grouping is changed (by attr_changed) and on resize event. - `display_changed` puts the elements corresponding to the current display settings on the scene. It is called when the elements are reconstructed (layout is changed due to selection of attributes or resize event), or when the user changes display settings or colors. For discrete attributes, the flow is a bit simpler: the elements are not constructed in advance (by layout_changed). Instead, layout_changed and display_changed call display_changed_disc that draws everything. """ name = "Box Plot" description = "Visualize the distribution of feature values in a box plot." icon = "icons/BoxPlot.svg" priority = 100 inputs = [("Data", Orange.data.Table, "set_data")] #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() attribute = ContextSetting(None) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMedians) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _axis_font = QFont() _axis_font.setPixelSize(12) _label_font = QFont() _label_font.setPixelSize(11) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "box_scene" def __init__(self): super().__init__() self.stats = [] self.dataset = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.p = -1.0 self.scale_x = self.scene_min_x = self.scene_width = 0 self.label_width = 0 common_options = dict( callback=self.attr_changed, sizeHint=(200, 100)) self.attrs = VariableListModel() gui.listView( self.controlArea, self, "attribute", box="Variable", model=self.attrs, **common_options) self.group_vars = VariableListModel() gui.listView( self.controlArea, self, "group_var", box="Grouping", model=self.group_vars, **common_options) # TODO: move Compare median/mean to grouping box self.display_box = gui.vBox(self.controlArea, "Display") gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.display_changed) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["No comparison", "Compare medians", "Compare means"], callback=self.display_changed) self.stretching_box = gui.checkBox( self.controlArea, self, 'stretched', "Stretch bars", box='Display', callback=self.display_changed).box gui.vBox(self.mainArea, addSpace=True) self.box_scene = QGraphicsScene() self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) e = gui.hBox(self.mainArea, addSpace=False) self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>") self.mainArea.setMinimumWidth(650) self.stats = self.dist = self.conts = [] self.is_continuous = False self.update_display_box() def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.layout_changed() return super().eventFilter(obj, event) # noinspection PyTypeChecker def set_data(self, dataset): if dataset is not None and ( not bool(dataset) or not len(dataset.domain)): dataset = None self.closeContext() self.dataset = dataset self.dist = self.stats = self.conts = [] self.group_var = None self.attribute = None if dataset: domain = dataset.domain self.group_vars[:] = \ [None] + \ [a for a in chain(domain.variables, domain.metas) if a.is_discrete] self.attrs[:] = chain(domain.variables, (a for a in domain.metas if a.is_primitive())) if self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var else: self.group_var = None # Reset to trigger selection via callback self.openContext(self.dataset) self.attr_changed() else: self.reset_all_data() def reset_all_data(self): self.clear_scene() self.infot1.setText("") self.attrs[:] = [] self.group_vars[:] = [] self.is_continuous = False self.update_display_box() def attr_changed(self): self.compute_box_data() self.update_display_box() self.layout_changed() if self.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn(self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset if dataset is None: self.stats = self.dist = self.conts = [] return self.is_continuous = attr.is_continuous if self.group_var: self.dist = [] self.conts = contingency.get_contingency( dataset, attr, self.group_var) if self.is_continuous: self.stats = [BoxData(cont) for cont in self.conts] self.label_txts_all = self.group_var.values else: self.dist = distribution.get_distribution(dataset, attr) self.conts = [] if self.is_continuous: self.stats = [BoxData(self.dist)] self.label_txts_all = [""] self.label_txts = [txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0] self.stats = [stat for stat in self.stats if stat.n > 0] def update_display_box(self): if self.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() def clear_scene(self): self.box_scene.clear() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] def layout_changed(self): attr = self.attribute if not attr: return self.clear_scene() if self.dataset is None or len(self.conts) == len(self.dist) == 0: return if not self.is_continuous: return self.display_changed_disc() self.mean_labels = [self.mean_label(stat, attr, lab) for stat, lab in zip(self.stats, self.label_txts)] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [self.label_group(stat, attr, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels)] self.attr_labels = [QGraphicsSimpleTextItem(lab) for lab in self.label_txts] for it in chain(self.labels, self.boxes, self.attr_labels): self.box_scene.addItem(it) self.display_changed() def display_changed(self): if self.dataset is None: return if not self.is_continuous: return self.display_changed_disc() self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: self.order = sorted( self.order, key=lambda i: getattr(self.stats[i], criterion)) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 self.boxes[box_index].setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self.compute_tests() self.show_posthoc() def display_changed_disc(self): self.clear_scene() self.attr_labels = [QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all] if not self.stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts] else: self.labels = [ QGraphicsTextItem(str(int(sum(self.dist))))] self.draw_axis_disc() if self.group_var: self.boxes = [self.strudel(cont) for cont in self.conts] else: self.boxes = [self.strudel(self.dist)] for row, box in enumerate(self.boxes): y = (-len(self.boxes) + row) * 40 + 10 label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) if not self.stretched: label = self.labels[row] b = label.boundingRect() if self.group_var: right = self.scale_x * sum(self.conts[row]) else: right = self.scale_x * sum(self.dist) label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) if self.attribute is not self.group_var: for text_item, bar_part in zip(box.childItems()[1::2], box.childItems()[::2]): label = QGraphicsSimpleTextItem( text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) self.box_scene.addItem(label) for text_item in box.childItems()[1::2]: box.removeFromGroup(text_item) self.box_scene.addItem(box) box.setPos(0, y) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self.infot1.setText("") # noinspection PyPep8Naming def compute_tests(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests def stat_ttest(): d1, d2 = self.stats pooled_var = d1.var / d1.n + d2.var / d2.n df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg) ** 2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance, " \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, self.p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p) else: t, self.p = stat_ttest() t = "Student's t: %.3f (p=%.3f)" % (t, self.p) else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, self.p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p) else: F, self.p = stat_ANOVA() t = "ANOVA: %.3f (p=%.3f)" % (F, self.p) self.infot1.setText("<center>%s</center>" % t) def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]))] mean_labels = self.mean_labels or [self.mean_label(stats[0], self.attribute, "")] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_width = (gtop - gbottom) * scale_x val = first_val while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText( self.attribute.repr_val(val) if not misssing_stats else "?", self._axis_font) t.setFlags( t.flags() | QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() t.setPos(val * scale_x - r.width() / 2, 8) if val >= top: break val += step self.box_scene.addLine( bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ if self.stretched: step = steps = 10 else: if self.group_var: max_box = max(float(np.sum(dist)) for dist in self.conts) else: max_box = float(np.sum(self.dist)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.stretched and self.labels: if self.group_var: rows = list(zip(self.conts, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max(sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() line(msc) x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = QGraphicsItemGroup() whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5, box) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5, box) vert_line = line(stat.a_min, 0, stat.a_max, 0, box) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3, box) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0, box) var_line.setPen(self._pen_paramet) mbox = QGraphicsRectItem(stat.q25 * scale_x, -height / 2, (stat.q75 - stat.q25) * scale_x, height, box) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) median_line = line(stat.median, -height / 2, stat.median, height / 2, box) median_line.setPen(self._pen_median) median_line.setZValue(-150) return box def strudel(self, dist): attr = self.attribute ss = np.sum(dist) box = QGraphicsItemGroup() if ss < 1e-6: QGraphicsRectItem(0, -10, 1, 10, box) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x rect = QGraphicsRectItem(cum + 1, -6, v - 2, 12, box) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.addToGroup(text) cum += v return box def show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) * self.scale_x xs.append(x) by = y_up + pos * height line(by + 12, 3) line(by - 12, by - 25) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if last_to == to or frm == to: continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = - 6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): if self.attribute: return self.attribute.name def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text)
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: Distance matrix computed from data self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] gui.comboBox(self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", contentsLength=14, addSpace=4, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @Inputs.data @check_sql_input def set_data(self, data): """ Set the input dataset. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable labels." data = None self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self.data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required self._clear_messages() if self.data is None or not len(self.data): self._reset_all() return if self._matrix is None and self.data is not None: _, metric = self.Distances[self.distance_idx] try: self._matrix = np.asarray(metric(self.data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return self._update_labels() def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.missing_cluster_assignment.clear() def _update_labels(self): labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) mask = np.isnan(labels) labels = labels.astype(int) labels = labels[~mask] labels_unq, _ = np.unique(labels, return_counts=True) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if labels is not None: count_missing = np.count_nonzero(mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores(self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]])) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): self.scene.setSceneRect(self._silplot.geometry()) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) scores[~self._mask] = self._silhouette else: scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.transform(domain) else: domain = self.data.domain data = self.data if np.count_nonzero(selectedmask): selected = self.data.from_table(domain, self.data, np.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = np.c_[scores[selectedmask]] data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWBoxPlot(widget.OWWidget): """ Here's how the widget's functions call each other: - `set_data` is a signal handler fills the list boxes and calls `grouping_changed`. - `grouping_changed` handles changes of grouping attribute: it enables or disables the box for ordering, orders attributes and calls `attr_changed`. - `attr_changed` handles changes of attribute. It recomputes box data by calling `compute_box_data`, shows the appropriate display box (discrete/continuous) and then calls`layout_changed` - `layout_changed` constructs all the elements for the scene (as lists of QGraphicsItemGroup) and calls `display_changed`. It is called when the attribute or grouping is changed (by attr_changed) and on resize event. - `display_changed` puts the elements corresponding to the current display settings on the scene. It is called when the elements are reconstructed (layout is changed due to selection of attributes or resize event), or when the user changes display settings or colors. For discrete attributes, the flow is a bit simpler: the elements are not constructed in advance (by layout_changed). Instead, layout_changed and display_changed call display_changed_disc that draws everything. """ name = "Box Plot" description = "Visualize the distribution of feature values in a box plot." icon = "icons/BoxPlot.svg" priority = 100 keywords = ["whisker"] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() conditions = ContextSetting([]) attribute = ContextSetting(None) order_by_importance = Setting(False) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMeans) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) show_labels = Setting(True) sort_freqs = Setting(False) auto_commit = Setting(True) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _axis_font = QFont() _axis_font.setPixelSize(12) _label_font = QFont() _label_font.setPixelSize(11) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "box_scene" def __init__(self): super().__init__() self.stats = [] self.dataset = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.scale_x = self.scene_min_x = self.scene_width = 0 self.label_width = 0 self.attrs = VariableListModel() view = gui.listView( self.controlArea, self, "attribute", box="Variable", model=self.attrs, callback=self.attr_changed) view.setMinimumSize(QSize(30, 30)) # Any other policy than Ignored will let the QListBox's scrollbar # set the minimal height (see the penultimate paragraph of # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget) view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) gui.separator(view.box, 6, 6) self.cb_order = gui.checkBox( view.box, self, "order_by_importance", "Order by relevance", tooltip="Order by 𝜒² or ANOVA over the subgroups", callback=self.apply_sorting) self.group_vars = DomainModel( placeholder="None", separators=False, valid_types=Orange.data.DiscreteVariable) self.group_view = view = gui.listView( self.controlArea, self, "group_var", box="Subgroups", model=self.group_vars, callback=self.grouping_changed) view.setEnabled(False) view.setMinimumSize(QSize(30, 30)) # See the comment above view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) # TODO: move Compare median/mean to grouping box # The vertical size policy is needed to let only the list views expand self.display_box = gui.vBox( self.controlArea, "Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum), addSpace=False) gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.display_changed) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["No comparison", "Compare medians", "Compare means"], callback=self.layout_changed) # The vertical size policy is needed to let only the list views expand self.stretching_box = box = gui.vBox( self.controlArea, box="Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Fixed)) self.stretching_box.sizeHint = self.display_box.sizeHint gui.checkBox( box, self, 'stretched', "Stretch bars", callback=self.display_changed) gui.checkBox( box, self, 'show_labels', "Show box labels", callback=self.display_changed) self.sort_cb = gui.checkBox( box, self, 'sort_freqs', "Sort by subgroup frequencies", callback=self.display_changed) gui.rubber(box) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") gui.vBox(self.mainArea, addSpace=True) self.box_scene = QGraphicsScene() self.box_scene.selectionChanged.connect(self.commit) self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) e = gui.hBox(self.mainArea, addSpace=False) self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>") self.mainArea.setMinimumWidth(300) self.stats = self.dist = self.conts = [] self.is_continuous = False self.update_display_box() def sizeHint(self): return QSize(900, 500) def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.layout_changed() return super().eventFilter(obj, event) def reset_attrs(self, domain): self.attrs[:] = [ var for var in chain( domain.class_vars, domain.metas, domain.attributes) if var.is_primitive()] # noinspection PyTypeChecker @Inputs.data def set_data(self, dataset): if dataset is not None and ( not bool(dataset) or not len(dataset.domain) and not any(var.is_primitive() for var in dataset.domain.metas)): dataset = None self.closeContext() self.dataset = dataset self.dist = self.stats = self.conts = [] self.group_var = None self.attribute = None if dataset: domain = dataset.domain self.group_vars.set_domain(domain) self.group_view.setEnabled(len(self.group_vars) > 1) self.reset_attrs(domain) self.select_default_variables(domain) self.openContext(self.dataset) self.grouping_changed() else: self.reset_all_data() self.commit() def select_default_variables(self, domain): # visualize first non-class variable, group by class (if present) if len(self.attrs) > len(domain.class_vars): self.attribute = self.attrs[len(domain.class_vars)] elif self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var else: self.group_var = None # Reset to trigger selection via callback def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = \ np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \ np.sum(observed) p = chisquare(observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.reset_attrs(domain) self.attribute = attribute def reset_all_data(self): self.clear_scene() self.infot1.setText("") self.attrs.clear() self.group_vars.set_domain(None) self.group_view.setEnabled(False) self.is_continuous = False self.update_display_box() def grouping_changed(self): self.cb_order.setEnabled(self.group_var is not None) self.apply_sorting() self.attr_changed() def select_box_items(self): temp_cond = self.conditions.copy() for box in self.box_scene.items(): if isinstance(box, FilterGraphicsRectItem): box.setSelected(box.filter.conditions in [c.conditions for c in temp_cond]) def attr_changed(self): self.compute_box_data() self.update_display_box() self.layout_changed() if self.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn(self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset self.is_continuous = attr.is_continuous if dataset is None or not self.is_continuous and not attr.values or \ self.group_var and not self.group_var.values: self.stats = self.dist = self.conts = [] return if self.group_var: self.dist = [] self.conts = contingency.get_contingency( dataset, attr, self.group_var) if self.is_continuous: stats, label_texts = [], [] for i, cont in enumerate(self.conts): if np.sum(cont[1]): stats.append(BoxData(cont, attr, i, self.group_var)) label_texts.append(self.group_var.values[i]) self.stats = stats self.label_txts_all = label_texts else: self.label_txts_all = \ [v for v, c in zip(self.group_var.values, self.conts) if np.sum(c) > 0] else: self.dist = distribution.get_distribution(dataset, attr) self.conts = [] if self.is_continuous: self.stats = [BoxData(self.dist, attr, None)] self.label_txts_all = [""] self.label_txts = [txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0] self.stats = [stat for stat in self.stats if stat.n > 0] def update_display_box(self): if self.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() self.sort_cb.setEnabled(self.group_var is not None) def clear_scene(self): self.closeContext() self.box_scene.clearSelection() self.box_scene.clear() self.box_view.viewport().update() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] self.openContext(self.dataset) def layout_changed(self): attr = self.attribute if not attr: return self.clear_scene() if self.dataset is None or len(self.conts) == len(self.dist) == 0: return if not self.is_continuous: self.display_changed_disc() return self.mean_labels = [self.mean_label(stat, attr, lab) for stat, lab in zip(self.stats, self.label_txts)] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [self.label_group(stat, attr, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels)] self.attr_labels = [QGraphicsSimpleTextItem(lab) for lab in self.label_txts] for it in chain(self.labels, self.attr_labels): self.box_scene.addItem(it) self.display_changed() def display_changed(self): if self.dataset is None: return if not self.is_continuous: self.display_changed_disc() return self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: vals = [getattr(stat, criterion) for stat in self.stats] overmax = max((val for val in vals if val is not None), default=0) \ + 1 vals = [val if val is not None else overmax for val in vals] self.order = sorted(self.order, key=vals.__getitem__) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 for item in self.boxes[box_index]: self.box_scene.addItem(item) item.setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians and \ stat.median is not None: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self.compute_tests() self.show_posthoc() self.select_box_items() def display_changed_disc(self): assert not self.is_continuous self.clear_scene() self.attr_labels = [QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all] if not self.stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts if np.sum(cont) > 0] else: self.labels = [ QGraphicsTextItem(str(int(sum(self.dist))))] self.order = list(range(len(self.attr_labels))) self.draw_axis_disc() if self.group_var: self.boxes = \ [self.strudel(cont, i) for i, cont in enumerate(self.conts) if np.sum(cont) > 0] self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0] if self.sort_freqs: # pylint: disable=invalid-unary-operand-type self.order = sorted(self.order, key=(-np.sum(self.conts, axis=1)).__getitem__) else: self.boxes = [self.strudel(self.dist)] for row, box_index in enumerate(self.order): y = (-len(self.boxes) + row) * 40 + 10 box = self.boxes[box_index] bars, labels = box[::2], box[1::2] self.__draw_group_labels(y, box_index) if not self.stretched: self.__draw_row_counts(y, box_index) if self.show_labels and self.attribute is not self.group_var: self.__draw_bar_labels(y, bars, labels) self.__draw_bars(y, bars) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self.infot1.setText("") self.select_box_items() def __draw_group_labels(self, y, row): """Draw group labels Parameters ---------- y: int vertical offset of bars row: int row index """ label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_row_counts(self, y, row): """Draw row counts Parameters ---------- y: int vertical offset of bars row: int row index """ assert not self.is_continuous label = self.labels[row] b = label.boundingRect() if self.group_var: right = self.scale_x * sum(self.conts[row]) else: right = self.scale_x * sum(self.dist) label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_bar_labels(self, y, bars, labels): """Draw bar labels Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars being drawn labels: List[QGraphicsTextItem] list of labels for corresponding bars """ label = bar_part = None for text_item, bar_part in zip(labels, bars): label = self.Label( text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) label.setMaxWidth(bar_part.boundingRect().width()) self.box_scene.addItem(label) def __draw_bars(self, y, bars): """Draw bars Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars to draw """ for item in bars: item.setPos(0, y) self.box_scene.addItem(item) # noinspection PyPep8Naming def compute_tests(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests # pylint: disable=comparison-with-itself def stat_ttest(): d1, d2 = self.stats if d1.n < 2 or d2.n < 2: return np.nan, np.nan pooled_var = d1.var / d1.n + d2.var / d2.n # pylint: disable=comparison-with-itself if pooled_var == 0 or np.isnan(pooled_var): return np.nan, np.nan df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): if any(stat.n == 0 for stat in self.stats): return np.nan, np.nan n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg) ** 2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) if var_within == 0 or df_within == 0 or df_between == 0: return np.nan, np.nan F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance, " \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, p) else: t, p = stat_ttest() t = "" if np.isnan(t) else f"Student's t: {t:.3f} (p={p:.3f})" else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, p) else: F, p = stat_ANOVA() t = "" if np.isnan(F) else f"ANOVA: {F:.3f} (p={p:.3f})" self.infot1.setText("<center>%s</center>" % t) def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)] mean_labels = self.mean_labels or [self.mean_label(stats[0], self.attribute, "")] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_width = (gtop - gbottom) * scale_x val = first_val decimals = max(3, 4 - int(math.log10(step))) while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText( repr(round(val, decimals)) if not misssing_stats else "?", self._axis_font) t.setFlags( t.flags() | QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() t.setPos(val * scale_x - r.width() / 2, 8) if val >= top: break val += step self.box_scene.addLine( bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ assert not self.is_continuous if self.stretched: if not self.attr_labels: return step = steps = 10 else: if self.group_var: max_box = max(float(np.sum(dist)) for dist in self.conts) else: max_box = float(np.sum(self.dist)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.stretched and self.labels: if self.group_var: rows = list(zip(self.conts, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max(sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) if stat.median is not None: msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() / 2 line(msc) if stat.q25 is not None: x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) if stat.q75 is not None: x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = [] whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5) vert_line = line(stat.a_min, 0, stat.a_max, 0) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0) var_line.setPen(self._pen_paramet) box.extend([whisker1, whisker2, vert_line, mean_line, var_line]) if stat.q25 is not None and stat.q75 is not None: mbox = FilterGraphicsRectItem( stat.conditions, stat.q25 * scale_x, -height / 2, (stat.q75 - stat.q25) * scale_x, height) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) box.append(mbox) if stat.median is not None: median_line = line(stat.median, -height / 2, stat.median, height / 2) median_line.setPen(self._pen_median) median_line.setZValue(-150) box.append(median_line) return box def strudel(self, dist, group_val_index=None): attr = self.attribute ss = np.sum(dist) box = [] if ss < 1e-6: cond = [FilterDiscrete(attr, None)] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10)) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x cond = [FilterDiscrete(attr, [i])] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.append(rect) box.append(text) cum += v return box def commit(self): self.conditions = [item.filter for item in self.box_scene.selectedItems() if item.filter] selected, selection = None, [] if self.conditions: selected = Values(self.conditions, conjunction=False)(self.dataset) selection = np.in1d( self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, selection)) def show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) if x is None: continue x *= self.scale_x xs.append(x * self.scale_x) by = y_up + pos * height line(by + 12, 3) line(by - 12, by - 25) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if to in (last_to, frm): continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = - 6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): return self.attribute.name if self.attribute else None def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text) class Label(QGraphicsSimpleTextItem): """Boxplot Label with settable maxWidth""" # Minimum width to display label text MIN_LABEL_WIDTH = 25 # padding bellow the text PADDING = 3 __max_width = None def maxWidth(self): return self.__max_width def setMaxWidth(self, max_width): self.__max_width = max_width def paint(self, painter, option, widget): """Overrides QGraphicsSimpleTextItem.paint If label text is too long, it is elided to fit into the allowed region """ if self.__max_width is None: width = option.rect.width() else: width = self.__max_width if width < self.MIN_LABEL_WIDTH: # if space is too narrow, no label return fm = painter.fontMetrics() text = fm.elidedText(self.text(), Qt.ElideRight, width) painter.drawText( option.rect.x(), option.rect.y() + self.boundingRect().height() - self.PADDING, text)
class OWQualityControl(widget.OWWidget): name = "Quality Control" description = "Experiment quality control" icon = "../widgets/icons/QualityControl.svg" priority = 5000 inputs = [("Experiment Data", Orange.data.Table, "set_data")] outputs = [] DISTANCE_FUNCTIONS = [("Distance from Pearson correlation", dist_pcorr), ("Euclidean distance", dist_eucl), ("Distance from Spearman correlation", dist_spearman)] settingsHandler = SetContextHandler() split_by_labels = settings.ContextSetting({}) sort_by_labels = settings.ContextSetting({}) selected_distance_index = settings.Setting(0) def __init__(self, parent=None): super().__init__(parent) ## Attributes self.data = None self.distances = None self.groups = None self.unique_pos = None self.base_group_index = 0 ## GUI box = gui.widgetBox(self.controlArea, "Info") self.info_box = gui.widgetLabel(box, "\n") ## Separate By box box = gui.widgetBox(self.controlArea, "Separate By") self.split_by_model = itemmodels.PyListModel(parent=self) self.split_by_view = QListView() self.split_by_view.setSelectionMode(QListView.ExtendedSelection) self.split_by_view.setModel(self.split_by_model) box.layout().addWidget(self.split_by_view) self.split_by_view.selectionModel().selectionChanged.connect( self.on_split_key_changed) ## Sort By box box = gui.widgetBox(self.controlArea, "Sort By") self.sort_by_model = itemmodels.PyListModel(parent=self) self.sort_by_view = QListView() self.sort_by_view.setSelectionMode(QListView.ExtendedSelection) self.sort_by_view.setModel(self.sort_by_model) box.layout().addWidget(self.sort_by_view) self.sort_by_view.selectionModel().selectionChanged.connect( self.on_sort_key_changed) ## Distance box box = gui.widgetBox(self.controlArea, "Distance Measure") gui.comboBox(box, self, "selected_distance_index", items=[name for name, _ in self.DISTANCE_FUNCTIONS], callback=self.on_distance_measure_changed) self.scene = QGraphicsScene() self.scene_view = QGraphicsView(self.scene) self.scene_view.setRenderHints(QPainter.Antialiasing) self.scene_view.setAlignment(Qt.AlignLeft | Qt.AlignVCenter) self.mainArea.layout().addWidget(self.scene_view) self.scene_view.installEventFilter(self) self._disable_updates = False self._cached_distances = {} self._base_index_hints = {} self.main_widget = None self.resize(800, 600) def clear(self): """Clear the widget state.""" self.data = None self.distances = None self.groups = None self.unique_pos = None with disable_updates(self): self.split_by_model[:] = [] self.sort_by_model[:] = [] self.main_widget = None self.scene.clear() self.info_box.setText("\n") self._cached_distances = {} def set_data(self, data=None): """Set input experiment data.""" self.closeContext() self.clear() self.error(0) self.warning(0) if data is not None: keys = self.get_suitable_keys(data) if not keys: self.error(0, "Data has no suitable feature labels.") data = None self.data = data if data is not None: self.on_new_data() def update_label_candidates(self): """Update the label candidates selection GUI (Group/Sort By views). """ keys = self.get_suitable_keys(self.data) with disable_updates(self): self.split_by_model[:] = keys self.sort_by_model[:] = keys def get_suitable_keys(self, data): """ Return suitable attr label keys from the data where the key has at least two unique values in the data. """ attrs = [attr.attributes.items() for attr in data.domain.attributes] attrs = reduce(operator.iadd, attrs, []) # in case someone put non string values in attributes dict attrs = [(str(key), str(value)) for key, value in attrs] attrs = set(attrs) values = defaultdict(set) for key, value in attrs: values[key].add(value) keys = [key for key in values if len(values[key]) > 1] return keys def selected_split_by_labels(self): """Return the current selected split labels. """ sel_m = self.split_by_view.selectionModel() indices = [r.row() for r in sel_m.selectedRows()] return [self.sort_by_model[i] for i in indices] def selected_sort_by_labels(self): """Return the current selected sort labels """ sel_m = self.sort_by_view.selectionModel() indices = [r.row() for r in sel_m.selectedRows()] return [self.sort_by_model[i] for i in indices] def selected_distance(self): """Return the selected distance function. """ return self.DISTANCE_FUNCTIONS[self.selected_distance_index][1] def selected_base_group_index(self): """Return the selected base group index """ return self.base_group_index def selected_base_indices(self, base_group_index=None): indices = [] for g, ind in self.groups: if base_group_index is None: label = group_label(self.selected_split_by_labels(), g) ind = [i for i in ind if i is not None] i = self._base_index_hints.get(label, ind[0] if ind else None) else: i = ind[base_group_index] indices.append(i) return indices def on_new_data(self): """We have new data and need to recompute all. """ self.closeContext() self.update_label_candidates() self.info_box.setText( "%s genes \n%s experiments" % (len(self.data), len(self.data.domain.attributes)) ) self.base_group_index = 0 keys = self.get_suitable_keys(self.data) self.openContext(keys) ## Restore saved context settings (split/sort selection) split_by_labels = self.split_by_labels sort_by_labels = self.sort_by_labels def select(model, selection_model, selected_items): """Select items in a Qt item model view """ all_items = list(model) try: indices = [all_items.index(item) for item in selected_items] except: indices = [] for ind in indices: selection_model.select(model.index(ind), QItemSelectionModel.Select) with disable_updates(self): select(self.split_by_view.model(), self.split_by_view.selectionModel(), split_by_labels) select(self.sort_by_view.model(), self.sort_by_view.selectionModel(), sort_by_labels) with widget_disable(self): self.split_and_update() def on_split_key_changed(self, *args): """Split key has changed """ with widget_disable(self): if not self._disable_updates: self.base_group_index = 0 self.split_by_labels = self.selected_split_by_labels() self.split_and_update() def on_sort_key_changed(self, *args): """Sort key has changed """ with widget_disable(self): if not self._disable_updates: self.base_group_index = 0 self.sort_by_labels = self.selected_sort_by_labels() self.split_and_update() def on_distance_measure_changed(self): """Distance measure has changed """ if self.data is not None: with widget_disable(self): self.update_distances() self.replot_experiments() def on_view_resize(self, size): """The view with the quality plot has changed """ if self.main_widget: current = self.main_widget.size() self.main_widget.resize(size.width() - 6, current.height()) self.scene.setSceneRect(self.scene.itemsBoundingRect()) def on_rug_item_clicked(self, item): """An ``item`` in the quality plot has been clicked. """ update = False sort_by_labels = self.selected_sort_by_labels() if sort_by_labels and item.in_group: ## The item is part of the group if item.group_index != self.base_group_index: self.base_group_index = item.group_index update = True else: if sort_by_labels: # If the user clicked on an background item it # invalidates the sorted labels selection with disable_updates(self): self.sort_by_view.selectionModel().clear() update = True index = item.index group = item.group label = group_label(self.selected_split_by_labels(), group) if self._base_index_hints.get(label, 0) != index: self._base_index_hints[label] = index update = True if update: with widget_disable(self): self.split_and_update() def eventFilter(self, obj, event): if obj is self.scene_view and event.type() == QEvent.Resize: self.on_view_resize(event.size()) return super().eventFilter(obj, event) def split_and_update(self): """ Split the data based on the selected sort/split labels and update the quality plot. """ split_labels = self.selected_split_by_labels() sort_labels = self.selected_sort_by_labels() self.warning(0) if not split_labels: self.warning(0, "No separate by label selected.") self.groups, self.unique_pos = \ exp.separate_by(self.data, split_labels, consider=sort_labels, add_empty=True) self.groups = sorted(self.groups.items(), key=lambda t: list(map(float_if_posible, t[0]))) self.unique_pos = sorted(self.unique_pos.items(), key=lambda t: list(map(float_if_posible, t[0]))) if self.groups: if sort_labels: group_base = self.selected_base_group_index() base_indices = self.selected_base_indices(group_base) else: base_indices = self.selected_base_indices() self.update_distances(base_indices) self.replot_experiments() def get_cached_distances(self, measure): if measure not in self._cached_distances: attrs = self.data.domain.attributes mat = numpy.zeros((len(attrs), len(attrs))) self._cached_distances[measure] = \ (mat, set(zip(range(len(attrs)), range(len(attrs))))) return self._cached_distances[measure] def get_cached_distance(self, measure, i, j): matrix, computed = self.get_cached_distances(measure) key = (i, j) if i < j else (j, i) if key in computed: return matrix[i, j] else: return None def get_distance(self, measure, i, j): d = self.get_cached_distance(measure, i, j) if d is None: vec_i = take_columns(self.data, [i]) vec_j = take_columns(self.data, [j]) d = measure(vec_i, vec_j) mat, computed = self.get_cached_distances(measure) mat[i, j] = d key = key = (i, j) if i < j else (j, i) computed.add(key) return d def store_distance(self, measure, i, j, dist): matrix, computed = self.get_cached_distances(measure) key = (i, j) if i < j else (j, i) matrix[j, i] = matrix[i, j] = dist computed.add(key) def update_distances(self, base_indices=()): """Recompute the experiment distances. """ distance = self.selected_distance() if base_indices == (): base_group_index = self.selected_base_group_index() base_indices = [ind[base_group_index] \ for _, ind in self.groups] assert(len(base_indices) == len(self.groups)) base_distances = [] attributes = self.data.domain.attributes pb = gui.ProgressBar(self, len(self.groups) * len(attributes)) for (group, indices), base_index in zip(self.groups, base_indices): # Base column of the group if base_index is not None: base_vec = take_columns(self.data, [base_index]) distances = [] # Compute the distances between base column # and all the rest data columns. for i in range(len(attributes)): if i == base_index: distances.append(0.0) elif self.get_cached_distance(distance, i, base_index) is not None: distances.append(self.get_cached_distance(distance, i, base_index)) else: vec_i = take_columns(self.data, [i]) dist = distance(base_vec, vec_i) self.store_distance(distance, i, base_index, dist) distances.append(dist) pb.advance() base_distances.append(distances) else: base_distances.append(None) pb.finish() self.distances = base_distances def replot_experiments(self): """Replot the whole quality plot. """ self.scene.clear() labels = [] max_dist = numpy.nanmax(list(filter(None, self.distances))) rug_widgets = [] group_pen = QPen(Qt.black) group_pen.setWidth(2) group_pen.setCapStyle(Qt.RoundCap) background_pen = QPen(QColor(0, 0, 250, 150)) background_pen.setWidth(1) background_pen.setCapStyle(Qt.RoundCap) main_widget = QGraphicsWidget() layout = QGraphicsGridLayout() attributes = self.data.domain.attributes if self.data is not None: for (group, indices), dist_vec in zip(self.groups, self.distances): indices_set = set(indices) rug_items = [] if dist_vec is not None: for i, attr in enumerate(attributes): # Is this a within group distance or background in_group = i in indices_set if in_group: rug_item = ClickableRugItem(dist_vec[i] / max_dist, 1.0, self.on_rug_item_clicked) rug_item.setPen(group_pen) tooltip = experiment_description(attr) rug_item.setToolTip(tooltip) rug_item.group_index = indices.index(i) rug_item.setZValue(rug_item.zValue() + 1) else: rug_item = ClickableRugItem(dist_vec[i] / max_dist, 0.85, self.on_rug_item_clicked) rug_item.setPen(background_pen) tooltip = experiment_description(attr) rug_item.setToolTip(tooltip) rug_item.group = group rug_item.index = i rug_item.in_group = in_group rug_items.append(rug_item) rug_widget = RugGraphicsWidget(parent=main_widget) rug_widget.set_rug(rug_items) rug_widgets.append(rug_widget) label = group_label(self.selected_split_by_labels(), group) label_item = QGraphicsSimpleTextItem(label, main_widget) label_item = GraphicsSimpleTextLayoutItem(label_item, parent=layout) label_item.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) labels.append(label_item) for i, (label, rug_w) in enumerate(zip(labels, rug_widgets)): layout.addItem(label, i, 0, Qt.AlignVCenter) layout.addItem(rug_w, i, 1) layout.setRowMaximumHeight(i, 30) main_widget.setLayout(layout) self.scene.addItem(main_widget) self.main_widget = main_widget self.rug_widgets = rug_widgets self.labels = labels self.on_view_resize(self.scene_view.size())
class TestDendrogramWidget(GuiTest): def setUp(self) -> None: super().setUp() self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.resize(300, 300) self.widget = DendrogramWidget() self.scene.addItem(self.widget) def tearDown(self) -> None: self.scene.clear() del self.widget super().tearDown() def test_widget(self): w = self.widget w.set_root(t(0.0, leaf(0, 0), leaf(1, 1))) w.resize(w.effectiveSizeHint(Qt.PreferredSize)) h = w.height_at(QPoint()) self.assertEqual(h, 0) h = w.height_at(QPoint(10, 0)) self.assertEqual(h, 0) self.assertEqual(w.pos_at_height(0).x(), w.rect().x()) self.assertEqual(w.pos_at_height(1).x(), w.rect().x()) height = np.finfo(float).eps w.set_root(t(height, leaf(0, 0), leaf(1, 1))) h = w.height_at(QPoint()) self.assertEqual(h, height) h = w.height_at(QPoint(int(w.size().width()), 0)) self.assertEqual(h, 0) self.assertEqual(w.pos_at_height(0).x(), w.rect().right()) self.assertEqual(w.pos_at_height(height).x(), w.rect().left()) view = self.view view.grab() # ensure w is laid out root = w.root() rootitem = w.item(root) r = view.mapFromScene(rootitem.sceneBoundingRect()).boundingRect() # move/hover over the item mouseMove(view.viewport(), r.center()) self.assertEqual(w._highlighted_item, rootitem) # click select QTest.mouseClick(view.viewport(), Qt.LeftButton, Qt.NoModifier, r.center()) self.assertTrue(w.isItemSelected(rootitem)) p = r.topLeft() + QPoint(-3, -3) # just out of the item mouseMove(view.viewport(), p) self.assertEqual(w._highlighted_item, None) def test_update_palette(self): w = self.widget w.set_root(t(1.0, leaf(0, 0), leaf(1, 1))) w.setSelectedClusters([w.root()]) p = QPalette() p.setColor(QPalette.All, QPalette.WindowText, QColor(Qt.red)) w.setPalette(p) item = w.item(w.root()) self.assertEqual(item.pen().color(), p.color(QPalette.WindowText))
class OWBoxPlot(widget.OWWidget): name = "Box Plot" description = "Visualize the distribution of feature values in a box plot." icon = "icons/BoxPlot.svg" priority = 100 keywords = ["whisker"] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) class Warning(widget.OWWidget.Warning): no_vars = widget.Msg( "Data contains no categorical or numeric variables") buttons_area_orientation = None #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() # If this was a list, context handler would try to match its elements to # variable names! selection = ContextSetting((), schema_only=True) attribute = ContextSetting(None) order_by_importance = Setting(False) order_grouping_by_importance = Setting(False) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMeans) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) show_labels = Setting(True) sort_freqs = Setting(False) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "box_scene" def __init__(self): super().__init__() self._axis_font = QFont() self._axis_font.setPixelSize(12) self._label_font = QFont() self._label_font.setPixelSize(11) self.dataset = None self.stats = [] self.dist = self.conts = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.scale_x = 1 self.scene_min_x = self.scene_max_x = self.scene_width = 0 self.label_width = 0 self.attrs = VariableListModel() sorted_model = SortProxyModel(sortRole=Qt.UserRole) sorted_model.setSourceModel(self.attrs) sorted_model.sort(0) box = gui.vBox(self.controlArea, "Variable") view = self.attr_list = ListViewSearch() view.setModel(sorted_model) view.setSelectionMode(view.SingleSelection) view.selectionModel().selectionChanged.connect(self.attr_changed) view.setMinimumSize(QSize(30, 30)) # Any other policy than Ignored will let the QListBox's scrollbar # set the minimal height (see the penultimate paragraph of # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget) view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) box.layout().addWidget(view) gui.checkBox(box, self, "order_by_importance", "Order by relevance to subgroups", tooltip="Order by 𝜒² or ANOVA over the subgroups", callback=self.apply_attr_sorting) self.group_vars = VariableListModel(placeholder="None") sorted_model = SortProxyModel(sortRole=Qt.UserRole) sorted_model.setSourceModel(self.group_vars) sorted_model.sort(0) box = gui.vBox(self.controlArea, "Subgroups") view = self.group_list = ListViewSearch() view.setModel(sorted_model) view.selectionModel().selectionChanged.connect(self.grouping_changed) view.setMinimumSize(QSize(30, 30)) # See the comment above view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) box.layout().addWidget(view) gui.checkBox(box, self, "order_grouping_by_importance", "Order by relevance to variable", tooltip="Order by 𝜒² or ANOVA over the variable values", callback=self.apply_group_sorting) # TODO: move Compare median/mean to grouping box # The vertical size policy is needed to let only the list views expand self.display_box = gui.vBox(self.controlArea, "Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)) gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.update_graph) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["No comparison", "Compare medians", "Compare means"], callback=self.update_graph) # The vertical size policy is needed to let only the list views expand self.stretching_box = box = gui.vBox(self.controlArea, box="Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Fixed)) self.stretching_box.sizeHint = self.display_box.sizeHint gui.checkBox(box, self, 'stretched', "Stretch bars", callback=self.update_graph, stateWhenDisabled=False) gui.checkBox(box, self, 'show_labels', "Show box labels", callback=self.update_graph) self.sort_cb = gui.checkBox(box, self, 'sort_freqs', "Sort by subgroup frequencies", callback=self.update_graph, stateWhenDisabled=False) gui.vBox(self.mainArea) self.box_scene = QGraphicsScene(self) self.box_scene.selectionChanged.connect(self.on_selection_changed) self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) self.stat_test = "" self.mainArea.setMinimumWidth(300) self.update_box_visibilities() def sizeHint(self): return QSize(900, 500) def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.update_graph() return super().eventFilter(obj, event) @property def show_stretched(self): return self.stretched and self.group_var is not self.attribute def reset_attrs(self): domain = self.dataset.domain self.attrs[:] = [ var for var in chain(domain.class_vars, domain.metas, domain.attributes) if var.is_primitive() and not var.attributes.get("hidden", False) ] def reset_groups(self): domain = self.dataset.domain self.group_vars[:] = [None] + [ var for var in chain(domain.class_vars, domain.metas, domain.attributes) if var.is_discrete and not var.attributes.get("hidden", False) ] @Inputs.data def set_data(self, dataset): self.closeContext() self._reset_all_data() if dataset and not (len(dataset.domain.variables) or any(var.is_primitive() for var in dataset.domain.metas)): self.Warning.no_vars() dataset = None self.dataset = dataset if dataset: self.reset_attrs() self.reset_groups() self._select_default_variables() self.openContext(self.dataset) self._set_list_view_selections() self.compute_box_data() self.apply_attr_sorting() self.apply_group_sorting() self.update_graph() self.select_box_items() self.update_box_visibilities() self.commit() def _reset_all_data(self): self.clear_scene() self.Warning.no_vars.clear() self.stats = [] self.dist = self.conts = None self.group_var = None self.attribute = None self.stat_test = "" self.attrs[:] = [] self.group_vars[:] = [None] self.selection = () def _select_default_variables(self): # visualize first non-class variable, group by class (if present) domain = self.dataset.domain if len(self.attrs) > len(domain.class_vars): self.attribute = self.attrs[len(domain.class_vars)] elif self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var def _set_list_view_selections(self): for view, var, callback in ((self.attr_list, self.attribute, self.attr_changed), (self.group_list, self.group_var, self.grouping_changed)): src_model = view.model().sourceModel() if var not in src_model: continue sel_model = view.selectionModel() sel_model.selectionChanged.disconnect(callback) row = src_model.indexOf(var) index = view.model().index(row, 0) sel_model.select(index, sel_model.ClearAndSelect) self._ensure_selection_visible(view) sel_model.selectionChanged.connect(callback) def apply_attr_sorting(self): def compute_score(attr): # This function and the one in apply_group_sorting are similar, but # different in too many details, so they are kept as separate # functions. # If you discover a bug in this function, check the other one, too. if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group_var, attr)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self._sort_list(self.attrs, self.attr_list, compute_score) else: self._sort_list(self.attrs, self.attr_list, None) def apply_group_sorting(self): def compute_stat(group): # This function and the one in apply_attr_sorting are similar, but # different in too many details, so they are kept as separate # functions. # If you discover a bug in this function, check the other one, too. if group is attr: return 3 if group is None: return -1 if attr.is_continuous: group_col = data.get_column_view(group)[0].astype(float) groups = (attr_col[group_col == i] for i in range(len(group.values))) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: p = self._chi_square(group, attr)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return attr = self.attribute if self.order_grouping_by_importance: if attr.is_continuous: attr_col = data.get_column_view(attr)[0].astype(float) self._sort_list(self.group_vars, self.group_list, compute_stat) else: self._sort_list(self.group_vars, self.group_list, None) def _sort_list(self, source_model, view, key=None): if key is None: c = count() def key(_): # pylint: disable=function-redefined return next(c) for i, attr in enumerate(source_model): source_model.setData(source_model.index(i), key(attr), Qt.UserRole) self._ensure_selection_visible(view) @staticmethod def _ensure_selection_visible(view): selection = view.selectedIndexes() if len(selection) == 1: view.scrollTo(selection[0]) def _chi_square(self, group_var, attr): # Chi-square with the given distribution into groups if not attr.values or not group_var.values: return 0, 2, 0 observed = np.array( contingency.get_contingency(self.dataset, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 0, 2, 0 return chi2_contingency(observed)[:3] def grouping_changed(self, selected): if not selected: return # should never come here self.group_var = selected.indexes()[0].data(gui.TableVariable) self._variables_changed(self.apply_attr_sorting) def attr_changed(self, selected): if not selected: return # should never come here self.attribute = selected.indexes()[0].data(gui.TableVariable) self._variables_changed(self.apply_group_sorting) def _variables_changed(self, sorting): self.selection = () self.compute_box_data() sorting() self.update_graph() self.update_box_visibilities() self.commit() def update_graph(self): pending_selection = self.selection self.box_scene.selectionChanged.disconnect(self.on_selection_changed) try: # not for exceptions, just to reconnect after all possible paths self.clear_scene() if self.dataset is None or self.attribute is None: return if self.attribute.is_continuous: self._display_changed_cont() else: self._display_changed_disc() self.selection = pending_selection self.draw_stat() self.select_box_items() if self.attribute.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn( self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) finally: self.box_scene.selectionChanged.connect(self.on_selection_changed) def select_box_items(self): selection = set(self.selection) for box in self.box_scene.items(): if isinstance(box, FilterGraphicsRectItem): box.setSelected(box.data_range in selection) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset if dataset is None \ or not attr.is_continuous and not attr.values \ or self.group_var and not self.group_var.values: self.stats = [] self.dist = self.conts = None return if self.group_var: self.dist = None self.conts = contingency.get_contingency(dataset, attr, self.group_var) missing_val_str = f"missing '{self.group_var.name}'" group_var_labels = self.group_var.values + ("", ) if self.attribute.is_continuous: stats, label_texts = [], [] for cont, value in zip(self.conts.array_with_unknowns, group_var_labels): if np.sum(cont[1]): stats.append(BoxData(cont, value)) label_texts.append(value or missing_val_str) self.stats = stats self.label_txts_all = label_texts else: self.label_txts_all = [ v or missing_val_str for v, c in zip( group_var_labels, self.conts.array_with_unknowns) if np.sum(c) > 0 ] else: self.dist = distribution.get_distribution(dataset, attr) self.conts = None if self.attribute.is_continuous: self.stats = [BoxData(self.dist, None)] self.label_txts_all = [""] self.label_txts = [ txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0 ] self.stats = [stat for stat in self.stats if stat.n > 0] def update_box_visibilities(self): self.controls.stretched.setDisabled(self.group_var is self.attribute) if not self.attribute: self.stretching_box.hide() self.display_box.hide() elif self.attribute.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() self.sort_cb.setEnabled(self.group_var is not None) def clear_scene(self): self.box_scene.clear() self.box_view.viewport().update() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] def _display_changed_cont(self): self.mean_labels = [ self.mean_label(stat, self.attribute, lab) for stat, lab in zip(self.stats, self.label_txts) ] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [ self.label_group(stat, self.attribute, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels) ] self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts ] for it in chain(self.labels, self.attr_labels): self.box_scene.addItem(it) self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: vals = [getattr(stat, criterion) for stat in self.stats] overmax = max((val for val in vals if val is not None), default=0) \ + 1 vals = [val if val is not None else overmax for val in vals] self.order = sorted(self.order, key=vals.__getitem__) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 for item in self.boxes[box_index]: self.box_scene.addItem(item) item.setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians and \ stat.median is not None: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self._compute_tests_cont() self._show_posthoc() def _display_changed_disc(self): self.clear_scene() self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all ] if not self.show_stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts.array_with_unknowns if np.sum(cont) > 0 ] else: self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))] self.order = list(range(len(self.attr_labels))) self.draw_axis_disc() if self.group_var: conts = self.conts.array_with_unknowns self.boxes = [ self.strudel(cont, val) for cont, val in zip(conts, self.group_var.values + ("", )) if np.sum(cont) > 0 ] sums_ = np.sum(conts, axis=1) sums_ = sums_[sums_ > 0] # only bars with sum > 0 are shown if self.sort_freqs: # pylint: disable=invalid-unary-operand-type self.order = sorted(self.order, key=(-sums_).__getitem__) else: conts = self.dist.array_with_unknowns self.boxes = [self.strudel(conts)] sums_ = [np.sum(conts)] for row, box_index in enumerate(self.order): y = (-len(self.boxes) + row) * 40 + 10 box = self.boxes[box_index] bars, labels = box[::2], box[1::2] self.__draw_group_labels(y, box_index) if not self.show_stretched: self.__draw_row_counts(y, self.labels[box_index], sums_[box_index]) if self.show_labels and self.attribute is not self.group_var: self.__draw_bar_labels(y, bars, labels) self.__draw_bars(y, bars) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self._compute_tests_disc() def __draw_group_labels(self, y, row): """Draw group labels Parameters ---------- y: int vertical offset of bars row: int row index """ label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_row_counts(self, y, label, row_sum_): """Draw row counts Parameters ---------- y: int vertical offset of bars label: QGraphicsSimpleTextItem Label for group row_sum_: int Sum for the group """ assert not self.attribute.is_continuous b = label.boundingRect() right = self.scale_x * row_sum_ label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) def __draw_bar_labels(self, y, bars, labels): """Draw bar labels Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars being drawn labels: List[QGraphicsTextItem] list of labels for corresponding bars """ for text_item, bar_part in zip(labels, bars): label = self.Label(text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) label.setMaxWidth(bar_part.boundingRect().width()) self.box_scene.addItem(label) def __draw_bars(self, y, bars): """Draw bars Parameters ---------- y: int vertical offset of bars bars: List[FilterGraphicsRectItem] list of bars to draw """ for item in bars: item.setPos(0, y) self.box_scene.addItem(item) # noinspection PyPep8Naming def _compute_tests_cont(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests # pylint: disable=comparison-with-itself def stat_ttest(): d1, d2 = self.stats if d1.n < 2 or d2.n < 2: return np.nan, np.nan pooled_var = d1.var / d1.n + d2.var / d2.n # pylint: disable=comparison-with-itself if pooled_var == 0 or np.isnan(pooled_var): return np.nan, np.nan df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): if any(stat.n == 0 for stat in self.stats): return np.nan, np.nan n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg)**2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) if var_within == 0 or df_within == 0 or df_between == 0: return np.nan, np.nan F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p n = len(self.dataset) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance, " \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, p) else: t, p = stat_ttest() t = "" if np.isnan( t) else f"Student's t: {t:.3f} (p={p:.3f}, N={n})" else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, p) else: F, p = stat_ANOVA() t = "" if np.isnan(F) else f"ANOVA: {F:.3f} (p={p:.3f}, N={n})" self.stat_test = t def _compute_tests_disc(self): if self.group_var is None or self.attribute is None: self.stat_test = "" else: chi, p, dof = self._chi_square(self.group_var, self.attribute) if np.isnan(p): self.stat_test = "" else: self.stat_test = f"χ²: {chi:.2f} (p={p:.3f}, dof={dof})" def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem(attr.str_val(stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)] mean_labels = self.mean_labels or [ self.mean_label(stats[0], self.attribute, "") ] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_max_x = gtop * scale_x self.scene_width = self.scene_max_x - self.scene_min_x val = first_val last_text = self.scene_min_x while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = QGraphicsSimpleTextItem( self.attribute.str_val(val) if not misssing_stats else "?") t.setFont(self._axis_font) t.setFlag(QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() x_start = val * scale_x - r.width() / 2 x_finish = x_start + r.width() if x_start > last_text + 10 and x_finish < self.scene_max_x: t.setPos(x_start, 8) self.box_scene.addItem(t) last_text = x_finish if val >= top: break val += step self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_stat(self): if self.stat_test: label = QGraphicsSimpleTextItem(self.stat_test) brect = self.box_scene.sceneRect() label.setPos(brect.center().x() - label.boundingRect().width() / 2, 8 + self._axis_font.pixelSize() * 2) label.setFlag(QGraphicsItem.ItemIgnoresTransformations) self.box_scene.addItem(label) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ assert not self.attribute.is_continuous if self.show_stretched: if not self.attr_labels: return step = steps = 10 else: if self.group_var: max_box = max( float(np.sum(dist)) for dist in self.conts.array_with_unknowns) else: max_box = float(np.sum(self.dist.array_with_unknowns)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.show_stretched and self.labels: if self.group_var: rows = list(zip(self.conts.array_with_unknowns, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max( sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.show_stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem(attr.str_val(val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) if stat.median is not None: msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() / 2 line(msc) if stat.q25 is not None: x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) if stat.q75 is not None: x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = [] whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5) vert_line = line(stat.a_min, 0, stat.a_max, 0) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0) var_line.setPen(self._pen_paramet) box.extend([whisker1, whisker2, vert_line, mean_line, var_line]) if stat.q25 is not None or stat.q75 is not None: # if any of them is None it means that its value is equal to median box_from = stat.q25 or stat.median box_to = stat.q75 or stat.median mbox = FilterGraphicsRectItem(stat.data_range, box_from * scale_x, -height / 2, (box_to - box_from) * scale_x, height) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) box.append(mbox) if stat.median is not None: median_line = line(stat.median, -height / 2, stat.median, height / 2) median_line.setPen(self._pen_median) median_line.setZValue(-150) box.append(median_line) return box def strudel(self, dist, group_val=None): attr = self.attribute ss = np.sum(dist) box = [] if ss < 1e-6: cond = DiscDataRange(None, group_val) box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10)) cum = 0 missing_val_str = f"missing '{attr.name}'" values = attr.values + ("", ) colors = attr.palette.qcolors_w_nan total = sum(dist) for freq, value, color in zip(dist, values, colors): if freq < 1e-6: continue v = freq if self.show_stretched: v /= ss v *= self.scale_x cond = DiscDataRange(value, group_val) rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12) rect.setBrush(QBrush(color)) rect.setPen(QPen(Qt.NoPen)) value = value or missing_val_str if self.show_stretched: tooltip = f"{value}: {100 * freq / total:.2f}%" else: tooltip = f"{value}: ({int(freq)})" rect.setToolTip(tooltip) text = QGraphicsTextItem(value) box.append(rect) box.append(text) cum += v return box def on_selection_changed(self): self.selection = tuple(item.data_range for item in self.box_scene.selectedItems() if item.data_range) self.commit() def commit(self): conditions = self._gather_conditions() if conditions: selected = Values(conditions, conjunction=False)(self.dataset) selection = np.in1d(self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0] else: selected, selection = None, [] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, selection)) def _gather_conditions(self): conditions = [] attr = self.attribute group_attr = self.group_var for data_range in self.selection: if attr.is_discrete: # If some value was removed from the data (in case settings are # loaded from a scheme), do not include the corresponding # filter; this is appropriate since data with such value does # not exist anyway if not data_range.value: condition = IsDefined([attr], negate=True) elif data_range.value not in attr.values: continue else: condition = FilterDiscrete(attr, [data_range.value]) else: condition = FilterContinuous(attr, FilterContinuous.Between, data_range.low, data_range.high) if data_range.group_value: if not data_range.group_value: grp_filter = IsDefined([group_attr], negate=True) elif data_range.group_value not in group_attr.values: continue else: grp_filter = FilterDiscrete(group_attr, [data_range.group_value]) condition = Values([condition, grp_filter], conjunction=True) conditions.append(condition) return conditions def _show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) if x is None: continue x *= self.scale_x xs.append(x * self.scale_x) by = y_up + pos * height line(by + 12, 0) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if to in (last_to, frm): continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = -6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): return self.attribute.name if self.attribute else None def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text) class Label(QGraphicsSimpleTextItem): """Boxplot Label with settable maxWidth""" # Minimum width to display label text MIN_LABEL_WIDTH = 25 # padding bellow the text PADDING = 3 __max_width = None def maxWidth(self): return self.__max_width def setMaxWidth(self, max_width): self.__max_width = max_width def paint(self, painter, option, widget): """Overrides QGraphicsSimpleTextItem.paint If label text is too long, it is elided to fit into the allowed region """ if self.__max_width is None: width = option.rect.width() else: width = self.__max_width if width < self.MIN_LABEL_WIDTH: # if space is too narrow, no label return fm = painter.fontMetrics() text = fm.elidedText(self.text(), Qt.ElideRight, width) painter.drawText( option.rect.x(), option.rect.y() + self.boundingRect().height() - self.PADDING, text)
class OWBoxPlot(widget.OWWidget): """ Here's how the widget's functions call each other: - `set_data` is a signal handler fills the list boxes and calls `attr_changed`. - `attr_changed` handles changes of attribute or grouping (callbacks for list boxes). It recomputes box data by calling `compute_box_data`, shows the appropriate display box (discrete/continuous) and then calls `layout_changed` - `layout_changed` constructs all the elements for the scene (as lists of QGraphicsItemGroup) and calls `display_changed`. It is called when the attribute or grouping is changed (by attr_changed) and on resize event. - `display_changed` puts the elements corresponding to the current display settings on the scene. It is called when the elements are reconstructed (layout is changed due to selection of attributes or resize event), or when the user changes display settings or colors. For discrete attributes, the flow is a bit simpler: the elements are not constructed in advance (by layout_changed). Instead, layout_changed and display_changed call display_changed_disc that draws everything. """ name = "Box Plot" description = "Visualize the distribution of feature values in a box plot." icon = "icons/BoxPlot.svg" priority = 100 inputs = [("Data", Orange.data.Table, "set_data")] #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() attribute = ContextSetting(None) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMedians) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _axis_font = QFont() _axis_font.setPixelSize(12) _label_font = QFont() _label_font.setPixelSize(11) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "box_scene" def __init__(self): super().__init__() self.stats = [] self.dataset = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.p = -1.0 self.scale_x = self.scene_min_x = self.scene_width = 0 self.label_width = 0 common_options = dict(callback=self.attr_changed, sizeHint=(200, 100)) self.attrs = VariableListModel() gui.listView(self.controlArea, self, "attribute", box="Variable", model=self.attrs, **common_options) self.group_vars = VariableListModel() gui.listView(self.controlArea, self, "group_var", box="Grouping", model=self.group_vars, **common_options) # TODO: move Compare median/mean to grouping box self.display_box = gui.vBox(self.controlArea, "Display") gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.display_changed) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["No comparison", "Compare medians", "Compare means"], callback=self.display_changed) self.stretching_box = gui.checkBox(self.controlArea, self, 'stretched', "Stretch bars", box='Display', callback=self.display_changed).box gui.vBox(self.mainArea, addSpace=True) self.box_scene = QGraphicsScene() self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) e = gui.hBox(self.mainArea, addSpace=False) self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>") self.mainArea.setMinimumWidth(650) self.stats = self.dist = self.conts = [] self.is_continuous = False self.update_display_box() def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.layout_changed() return super().eventFilter(obj, event) # noinspection PyTypeChecker def set_data(self, dataset): if dataset is not None and (not bool(dataset) or not len(dataset.domain)): dataset = None self.closeContext() self.dataset = dataset self.dist = self.stats = self.conts = [] self.group_var = None self.attribute = None if dataset: domain = dataset.domain self.group_vars[:] = \ [None] + \ [a for a in chain(domain.variables, domain.metas) if a.is_discrete] self.attrs[:] = chain(domain.variables, (a for a in domain.metas if a.is_primitive())) if self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var else: self.group_var = None # Reset to trigger selection via callback self.openContext(self.dataset) self.attr_changed() else: self.reset_all_data() def reset_all_data(self): self.clear_scene() self.infot1.setText("") self.attrs[:] = [] self.group_vars[:] = [] self.is_continuous = False self.update_display_box() def attr_changed(self): self.compute_box_data() self.update_display_box() self.layout_changed() if self.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn(self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset if dataset is None: self.stats = self.dist = self.conts = [] return self.is_continuous = attr.is_continuous if self.group_var: self.dist = [] self.conts = contingency.get_contingency(dataset, attr, self.group_var) if self.is_continuous: self.stats = [BoxData(cont) for cont in self.conts] self.label_txts_all = self.group_var.values else: self.dist = distribution.get_distribution(dataset, attr) self.conts = [] if self.is_continuous: self.stats = [BoxData(self.dist)] self.label_txts_all = [""] self.label_txts = [ txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0 ] self.stats = [stat for stat in self.stats if stat.n > 0] def update_display_box(self): if self.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() def clear_scene(self): self.box_scene.clear() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] def layout_changed(self): attr = self.attribute if not attr: return self.clear_scene() if self.dataset is None or len(self.conts) == len(self.dist) == 0: return if not self.is_continuous: return self.display_changed_disc() self.mean_labels = [ self.mean_label(stat, attr, lab) for stat, lab in zip(self.stats, self.label_txts) ] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [ self.label_group(stat, attr, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels) ] self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts ] for it in chain(self.labels, self.boxes, self.attr_labels): self.box_scene.addItem(it) self.display_changed() def display_changed(self): if self.dataset is None: return if not self.is_continuous: return self.display_changed_disc() self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: self.order = sorted( self.order, key=lambda i: getattr(self.stats[i], criterion)) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 self.boxes[box_index].setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self.compute_tests() self.show_posthoc() def display_changed_disc(self): self.clear_scene() self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all ] if not self.stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts ] else: self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))] self.draw_axis_disc() if self.group_var: self.boxes = [self.strudel(cont) for cont in self.conts] else: self.boxes = [self.strudel(self.dist)] for row, box in enumerate(self.boxes): y = (-len(self.boxes) + row) * 40 + 10 label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) if not self.stretched: label = self.labels[row] b = label.boundingRect() if self.group_var: right = self.scale_x * sum(self.conts[row]) else: right = self.scale_x * sum(self.dist) label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) if self.attribute is not self.group_var: for text_item, bar_part in zip(box.childItems()[1::2], box.childItems()[::2]): label = QGraphicsSimpleTextItem(text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) self.box_scene.addItem(label) for text_item in box.childItems()[1::2]: box.removeFromGroup(text_item) self.box_scene.addItem(box) box.setPos(0, y) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self.infot1.setText("") # noinspection PyPep8Naming def compute_tests(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests def stat_ttest(): d1, d2 = self.stats pooled_var = d1.var / d1.n + d2.var / d2.n df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg)**2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance, " \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, self.p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p) else: t, self.p = stat_ttest() t = "Student's t: %.3f (p=%.3f)" % (t, self.p) else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, self.p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p) else: F, self.p = stat_ANOVA() t = "ANOVA: %.3f (p=%.3f)" % (F, self.p) self.infot1.setText("<center>%s</center>" % t) def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]))] mean_labels = self.mean_labels or [ self.mean_label(stats[0], self.attribute, "") ] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_width = (gtop - gbottom) * scale_x val = first_val while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText( self.attribute.repr_val(val) if not misssing_stats else "?", self._axis_font) t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() t.setPos(val * scale_x - r.width() / 2, 8) if val >= top: break val += step self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ if self.stretched: step = steps = 10 else: if self.group_var: max_box = max(float(np.sum(dist)) for dist in self.conts) else: max_box = float(np.sum(self.dist)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.stretched and self.labels: if self.group_var: rows = list(zip(self.conts, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max( sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() line(msc) x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = QGraphicsItemGroup() whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5, box) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5, box) vert_line = line(stat.a_min, 0, stat.a_max, 0, box) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3, box) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0, box) var_line.setPen(self._pen_paramet) mbox = QGraphicsRectItem(stat.q25 * scale_x, -height / 2, (stat.q75 - stat.q25) * scale_x, height, box) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) median_line = line(stat.median, -height / 2, stat.median, height / 2, box) median_line.setPen(self._pen_median) median_line.setZValue(-150) return box def strudel(self, dist): attr = self.attribute ss = np.sum(dist) box = QGraphicsItemGroup() if ss < 1e-6: QGraphicsRectItem(0, -10, 1, 10, box) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x rect = QGraphicsRectItem(cum + 1, -6, v - 2, 12, box) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.addToGroup(text) cum += v return box def show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) * self.scale_x xs.append(x) by = y_up + pos * height line(by + 12, 3) line(by - 12, by - 25) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if last_to == to or frm == to: continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = -6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): if self.attribute: return self.attribute.name def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text)