def draw_distributions(self): """Draw distributions with discrete attributes""" if not (self.show_distributions and self.data is not None and self.domain.has_discrete_class): return class_count = len(self.domain.class_var.values) class_ = self.domain.class_var # we create a hash table of possible class values (happens only if we have a discrete class) if self.domain_contingencies is None: self.domain_contingencies = dict( zip([attr for attr in self.domain if attr.is_discrete], get_contingencies(self.data, skipContinuous=True))) self.domain_contingencies[class_] = get_contingency(self.data, class_, class_) max_count = max([contingency.max() for contingency in self.domain_contingencies.values()] or [1]) sorted_class_values = get_variable_values_sorted(self.domain.class_var) for axis_idx, attr_idx in enumerate(self.attribute_indices): attr = self.domain[attr_idx] if attr.is_discrete: continue contingency = self.domain_contingencies[attr] attr_len = len(attr.values) # we create a hash table of variable values and their indices sorted_variable_values = get_variable_values_sorted(attr) # create bar curve for j in range(attr_len): attribute_value = sorted_variable_values[j] value_count = contingency[:, attribute_value] for i in range(class_count): class_value = sorted_class_values[i] color = QColor(*self.colors[i]) color.setAlpha(self.alpha_value) width = float(value_count[class_value] * 0.5) / float(max_count) y_off = float(1.0 + 2.0 * j) / float(2 * attr_len) height = 0.7 / float(class_count * attr_len) y_low_bottom = y_off + float(class_count * height) / 2.0 - i * height curve = PolygonCurve(QPen(color), QBrush(color), xData=[axis_idx, axis_idx + width, axis_idx + width, axis_idx], yData=[y_low_bottom, y_low_bottom, y_low_bottom - height, y_low_bottom - height], tooltip=attr.name) curve.attach(self)
def draw_axes(self): self.remove_all_axes() for i in range(len(self.attributes)): axis_id = UserAxis + i a = self.add_axis(axis_id, line=QLineF(i, 0, i, 1), arrows=AxisStart | AxisEnd, zoomable=True) a.always_horizontal_text = True a.max_text_width = 100 a.title_margin = -10 a.text_margin = 0 a.setZValue(5) self.set_axis_title(axis_id, self.domain[self.attributes[i]].name) self.set_show_axis_title(axis_id, self.show_attr_values) if self.show_attr_values: attr = self.domain[self.attributes[i]] if attr.is_continuous: self.set_axis_scale(axis_id, self.attr_values[attr][0], self.attr_values[attr][1]) elif attr.is_discrete: attribute_values = get_variable_values_sorted( self.domain[self.attributes[i]]) attr_len = len(attribute_values) values = [ float(1.0 + 2.0 * j) / float(2 * attr_len) for j in range(len(attribute_values)) ] a.set_bounds((0, 1)) self.set_axis_labels(axis_id, labels=attribute_values, values=values)
def get_max_label_width(attr): values = get_variable_values_sorted(data.domain[attr]) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw
def create_legend(): if self.variable_color is None: names = [ "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:" ] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] edges = repeat(Qt.black) else: names = get_variable_values_sorted(class_var) edges = colors = [QColor(*col) for col in class_var.colors] items = [] size = 8 for name, color, edgecolor in zip(names, colors, edges): item = QGraphicsItemGroup() item.addToGroup( CanvasRectangle(None, -size / 2, -size / 2, size, size, edgecolor, color)) item.addToGroup( CanvasText(None, name, size, 0, Qt.AlignVCenter)) items.append(item) return wrap_legend_items(items, hspacing=20, vspacing=16 + size, max_width=self.canvas_view.width() - xoff)
def get_max_label_width(attr): values = get_variable_values_sorted(attr) maxw = 0 for val in values: t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False) maxw = max(int(t.boundingRect().width()), maxw) return maxw
def update_properties(self): ## Mostly copied from OWScatterPlotGraph if not self.plot(): return if not self.rect: x, y = self.axes() self.rect = self.plot().data_rect_for_axes(x, y) s = self.graph_transform().mapRect(self.rect).size().toSize() if not s.isValid(): return rx = s.width() ry = s.height() rx -= rx % self.granularity ry -= ry % self.granularity p = self.graph_transform().map(QPointF( 0, 0)) - self.graph_transform().map(self.rect.topLeft()) p = p.toPoint() ox = p.x() oy = -p.y() if self.classifier.classVar.is_continuous: imagebmp = orangeom.potentialsBitmap(self.classifier, rx, ry, ox, oy, self.granularity, self.scale) palette = [ qRgb(255. * i / 255., 255. * i / 255., 255 - (255. * i / 255.)) for i in range(255) ] + [qRgb(255, 255, 255)] else: imagebmp, nShades = orangeom.potentialsBitmap( self.classifier, rx, ry, ox, oy, self.granularity, self.scale, self.spacing) palette = [] sortedClasses = get_variable_values_sorted( self.classifier.domain.classVar) for cls in self.classifier.classVar.values: color = self.plot().discPalette.getRGB( sortedClasses.index(cls)) towhite = [255 - c for c in color] for s in range(nShades): si = 1 - float(s) / nShades palette.append( qRgb(*tuple( [color[i] + towhite[i] * si for i in (0, 1, 2)]))) palette.extend( [qRgb(255, 255, 255) for i in range(256 - len(palette))]) self.potentialsImage = QImage(imagebmp, rx, ry, QImage.Format_Indexed8) self.potentialsImage.setColorTable( ColorPaletteDlg.signedPalette(palette ) if qVersion() < "4.5" else palette) self.potentialsImage.setNumColors(256) self.pixmap_item.setPixmap(QPixmap.fromImage(self.potentialsImage)) self.pixmap_item.setPos(self.graph_transform().map( self.rect.bottomLeft()))
def setup_plot(self): super().setup_plot() for axis, var in (("bottom", self.attr_x), ("left", self.attr_y)): self.graph.set_axis_title(axis, var) if var and var.is_discrete: self.graph.set_axis_labels(axis, get_variable_values_sorted(var)) else: self.graph.set_axis_labels(axis, None)
def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 _, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = [ "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:", ] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [ CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names ] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle( self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i], ) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width()
def update_axes(self): for axis, var in self.master.get_axes().items(): axis_item = self.plot_widget.plotItem.getAxis(axis) if var and var.is_discrete: ticks = [list(enumerate(get_variable_values_sorted(var)))] axis_item.setTicks(ticks) else: axis_item.setTicks(None) use_time = var and var.is_time self.plot_widget.plotItem.getAxis(axis).use_time(use_time) self.plot_widget.setLabel(axis=axis, text=var or "") if not var: self.plot_widget.hideAxis(axis)
def attr_changed(self): self.graph.reset_graph() for axis, var in (("bottom", self.attr_x), ("left", self.attr_y)): self.graph.set_axis_title(axis, var) if var and var.is_discrete: self.graph.set_axis_labels(axis, get_variable_values_sorted(var)) else: self.graph.set_axis_labels(axis, None) self.cb_class_density.setEnabled(self.can_draw_density()) self.cb_reg_line.setEnabled(self.can_draw_regresssion_line()) self.send_features()
def draw_legend(self): domain = self.data.domain class_var = domain.class_var if class_var: if class_var.is_discrete: self.legend().clear() values = get_variable_values_sorted(class_var) for i, value in enumerate(values): self.legend().add_item( class_var.name, value, OWPoint(OWPoint.Rect, QColor(*self.colors[i]), 10)) else: values = self.attr_values[class_var] decimals = class_var.number_of_decimals self.legend().add_color_gradient( class_var.name, ["%%.%df" % decimals % v for v in values]) else: self.legend().clear() self.old_legend_keys = []
def update_properties(self): ## Mostly copied from OWScatterPlotGraph if not self.plot(): return if not self.rect: x,y = self.axes() self.rect = self.plot().data_rect_for_axes(x,y) s = self.graph_transform().mapRect(self.rect).size().toSize() if not s.isValid(): return rx = s.width() ry = s.height() rx -= rx % self.granularity ry -= ry % self.granularity p = self.graph_transform().map(QPointF(0, 0)) - self.graph_transform().map(self.rect.topLeft()) p = p.toPoint() ox = p.x() oy = -p.y() if self.classifier.classVar.is_continuous: imagebmp = orangeom.potentialsBitmap(self.classifier, rx, ry, ox, oy, self.granularity, self.scale) palette = [qRgb(255.*i/255., 255.*i/255., 255-(255.*i/255.)) for i in range(255)] + [qRgb(255, 255, 255)] else: imagebmp, nShades = orangeom.potentialsBitmap(self.classifier, rx, ry, ox, oy, self.granularity, self.scale, self.spacing) palette = [] sortedClasses = get_variable_values_sorted(self.classifier.domain.classVar) for cls in self.classifier.classVar.values: color = self.plot().discPalette.getRGB(sortedClasses.index(cls)) towhite = [255-c for c in color] for s in range(nShades): si = 1-float(s)/nShades palette.append(qRgb(*tuple([color[i]+towhite[i]*si for i in (0, 1, 2)]))) palette.extend([qRgb(255, 255, 255) for i in range(256-len(palette))]) self.potentialsImage = QImage(imagebmp, rx, ry, QImage.Format_Indexed8) self.potentialsImage.setColorTable(palette) self.potentialsImage.setNumColors(256) self.pixmap_item.setPixmap(QPixmap.fromImage(self.potentialsImage)) self.pixmap_item.setPos(self.graph_transform().map(self.rect.bottomLeft()))
def draw_legend(x0_x1, y0_y1): x0, x1 = x0_x1 y0, y1 = y0_y1 if self.interior_coloring == self.PEARSON: names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8", "Residuals:"] colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:] else: names = get_variable_values_sorted(class_var) + \ [class_var.name + ":"] colors = [QColor(*col) for col in class_var.colors] names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter) for name in names] totalwidth = sum(text.boundingRect().width() for text in names) # compute the x position of the center of the legend y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35 distance = 30 startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2 names[-1].setPos(startx + 15, y) names[-1].show() xoffset = names[-1].boundingRect().width() + distance size = 8 for i in range(len(names) - 1): if self.interior_coloring == self.PEARSON: edgecolor = Qt.black else: edgecolor = colors[i] CanvasRectangle(self.canvas, startx + xoffset, y - size / 2, size, size, edgecolor, colors[i]) names[i].setPos(startx + xoffset + 10, y) xoffset += distance + names[i].boundingRect().width()
def draw_axes(self): self.remove_all_axes() for i in range(len(self.attributes)): axis_id = UserAxis + i a = self.add_axis(axis_id, line=QLineF(i, 0, i, 1), arrows=AxisStart | AxisEnd, zoomable=True) a.always_horizontal_text = True a.max_text_width = 100 a.title_margin = -10 a.text_margin = 0 a.setZValue(5) self.set_axis_title(axis_id, self.domain[self.attributes[i]].name) self.set_show_axis_title(axis_id, self.show_attr_values) if self.show_attr_values: attr = self.domain[self.attributes[i]] if attr.is_continuous: self.set_axis_scale(axis_id, self.attr_values[attr][0], self.attr_values[attr][1]) elif attr.is_discrete: attribute_values = get_variable_values_sorted(self.domain[self.attributes[i]]) attr_len = len(attribute_values) values = [float(1.0 + 2.0 * j) / float(2 * attr_len) for j in range(len(attribute_values))] a.set_bounds((0, 1)) self.set_axis_labels(axis_id, labels=attribute_values, values=values)
def update_data(self, attr_x, attr_y, reset_view=True): self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() self.shown_x, self.shown_y = attr_x, attr_y if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: index_x = self.domain.index(attr_x) index_y = self.domain.index(attr_y) self.valid_data = self.get_valid_list([index_x, index_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: self.selection = None self.n_points = 0 self.master.Warning.missing_coords( self.shown_x.name, self.shown_y.name) return x_data, y_data = self.get_xy_data_positions( attr_x, attr_y, self.valid_data) self.n_points = len(x_data) if reset_view: min_x, max_x = np.nanmin(x_data), np.nanmax(x_data) min_y, max_y = np.nanmin(y_data), np.nanmax(y_data) self.view_box.setRange( QRectF(min_x, min_y, max_x - min_x, max_y - min_y), padding=0.025) self.view_box.init_history() self.view_box.tag_history() [min_x, max_x], [min_y, max_y] = self.view_box.viewRange() for axis, name, index in (("bottom", attr_x, index_x), ("left", attr_y, index_y)): self.set_axis_title(axis, name) var = self.domain[index] if var.is_discrete: self.set_labels(axis, get_variable_values_sorted(var)) else: self.set_labels(axis, None) color_data, brush_data = self.compute_colors() color_data_sel, brush_data_sel = self.compute_colors_sel() size_data = self.compute_sizes() shape_data = self.compute_symbols() if self.should_draw_density(): rgb_data = [pen.color().getRgb()[:3] for pen in color_data] self.density_img = classdensity.class_density_image( min_x, max_x, min_y, max_y, self.resolution, x_data, y_data, rgb_data) self.plot_widget.addItem(self.density_img) data_indices = np.flatnonzero(self.valid_data) if len(data_indices) != self.original_data.shape[1]: self.master.Information.missing_coords( self.shown_x.name, self.shown_y.name) self.scatterplot_item = ScatterPlotItem( x=x_data, y=y_data, data=data_indices, symbol=shape_data, size=size_data, pen=color_data, brush=brush_data ) self.scatterplot_item_sel = ScatterPlotItem( x=x_data, y=y_data, data=data_indices, symbol=shape_data, size=size_data + SELECTION_WIDTH, pen=color_data_sel, brush=brush_data_sel ) self.plot_widget.addItem(self.scatterplot_item_sel) self.plot_widget.addItem(self.scatterplot_item) self.scatterplot_item.selected_points = [] self.scatterplot_item.sigClicked.connect(self.select_by_click) self.update_labels() self.make_legend() self.plot_widget.replot()
def add_rect(x0, x1, y0, y1, condition, used_attrs, used_vals, attr_vals=""): area_index = len(self.areas) x1 += (x0 == x1) y1 += (y0 == y1) # rectangles of width and height 1 are not shown - increase y1 += (x1 - x0 + y1 - y0 == 2) colors = class_var and [QColor(*col) for col in class_var.colors] def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle(self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle(self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.variable_color is None: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = float((actual - expected) / sqrt(expected)) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: if self.subset_indices is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [ conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior)) ] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori) for cls, act, apr in zip(cls_values, actual, apriori)) else: text = "" outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4]))
def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [ Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter ] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) rwidth = width * perc xs = [ x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc ] CanvasText(self.canvas, val, xs[side], ys[side], align, max_width=rwidth if side == 0 else None) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [ x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET ] ys = [ y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2 ] CanvasText(self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2)
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)
def update_data(self, attr_x, attr_y, reset_view=True): self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() self.shown_x, self.shown_y = attr_x, attr_y if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: index_x = self.domain.index(attr_x) index_y = self.domain.index(attr_y) self.valid_data = self.get_valid_list([index_x, index_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: self.selection = None self.n_points = 0 self.master.Warning.missing_coords(self.shown_x.name, self.shown_y.name) return x_data, y_data = self.get_xy_data_positions(attr_x, attr_y, self.valid_data) self.n_points = len(x_data) if reset_view: min_x, max_x = np.nanmin(x_data), np.nanmax(x_data) min_y, max_y = np.nanmin(y_data), np.nanmax(y_data) self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x, max_y - min_y), padding=0.025) self.view_box.init_history() self.view_box.tag_history() [min_x, max_x], [min_y, max_y] = self.view_box.viewRange() for axis, name, index in (("bottom", attr_x, index_x), ("left", attr_y, index_y)): self.set_axis_title(axis, name) var = self.domain[index] if var.is_discrete: self.set_labels(axis, get_variable_values_sorted(var)) else: self.set_labels(axis, None) color_data, brush_data = self.compute_colors() color_data_sel, brush_data_sel = self.compute_colors_sel() size_data = self.compute_sizes() shape_data = self.compute_symbols() if self.should_draw_density(): rgb_data = [pen.color().getRgb()[:3] for pen in color_data] self.density_img = classdensity.class_density_image( min_x, max_x, min_y, max_y, self.resolution, x_data, y_data, rgb_data) self.plot_widget.addItem(self.density_img) data_indices = np.flatnonzero(self.valid_data) if len(data_indices) != self.original_data.shape[1]: self.master.Information.missing_coords(self.shown_x.name, self.shown_y.name) self.scatterplot_item = ScatterPlotItem(x=x_data, y=y_data, data=data_indices, symbol=shape_data, size=size_data, pen=color_data, brush=brush_data) self.scatterplot_item_sel = ScatterPlotItem(x=x_data, y=y_data, data=data_indices, symbol=shape_data, size=size_data + SELECTION_WIDTH, pen=color_data_sel, brush=brush_data_sel) self.plot_widget.addItem(self.scatterplot_item_sel) self.plot_widget.addItem(self.scatterplot_item) self.scatterplot_item.selected_points = [] self.scatterplot_item.sigClicked.connect(self.select_by_click) self.update_labels() self.make_legend() self.plot_widget.replot()
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs, used_vals, attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) total, counts = get_counts(attr_vals, values) # when visualizing the third attribute and the first attribute has # the last value, reverse the order in which the boxes are drawn; # otherwise, if the last cell, nearest to the labels of the fourth # attribute, is empty, we wouldn't be able to position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) newattrvals = attr_vals + "-" + val if attr_vals else val tooltip = "{} {}: <b>{}</b><br/>".format( condition, attr.name, htmlval) attrs = used_attrs + [attr] vals = used_vals + [val] args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args) else: draw_data( attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args) else: draw_data( attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)
def add_rect(x0, x1, y0, y1, condition="", used_attrs=[], used_vals=[], attr_vals=""): area_index = len(self.areas) if x0 == x1: x1 += 1 if y0 == y1: y1 += 1 # rectangles of width and height 1 are not shown - increase if x1 - x0 + y1 - y0 == 2: y1 += 1 if class_var and class_var.is_discrete: colors = [QColor(*col) for col in class_var.colors] else: colors = None def select_area(_, ev): self.select_area(area_index, ev) def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args): if pen_color is None: return CanvasRectangle( self.canvas, x, y, w, h, z=z, onclick=select_area, **args) if brush_color is None: brush_color = pen_color return CanvasRectangle( self.canvas, x, y, w, h, pen_color, brush_color, z=z, onclick=select_area, **args) def line(x1, y1, x2, y2): r = QGraphicsLineItem(x1, y1, x2, y2, None) self.canvas.addItem(r) r.setPen(QPen(Qt.white, 2)) r.setZValue(30) outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30) self.areas.append((used_attrs, used_vals, outer_rect)) if not conditionaldict[attr_vals]: return if self.interior_coloring == self.PEARSON: s = sum(apriori_dists[0]) expected = s * reduce( mul, (apriori_dists[i][used_vals[i]] / float(s) for i in range(len(used_vals)))) actual = conditionaldict[attr_vals] pearson = (actual - expected) / sqrt(expected) if pearson == 0: ind = 0 else: ind = max(0, min(int(log(abs(pearson), 2)), 3)) color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind] rect(x0, y0, x1 - x0, y1 - y0, -20, color) outer_rect.setToolTip( condition + "<hr/>" + "Expected instances: %.1f<br>" "Actual instances: %d<br>" "Standardized (Pearson) residual: %.1f" % (expected, conditionaldict[attr_vals], pearson)) else: cls_values = get_variable_values_sorted(class_var) prior = get_distribution(data, class_var.name) total = 0 for i, value in enumerate(cls_values): val = conditionaldict[attr_vals + "-" + value] if val == 0: continue if i == len(cls_values) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / conditionaldict[attr_vals] rect(x0, y0 + total, x1 - x0, v, -20, colors[i]) total += v if self.use_boxes and \ abs(x1 - x0) > bar_width and \ abs(y1 - y0) > bar_width: total = 0 line(x0 + bar_width, y0, x0 + bar_width, y1) n = sum(prior) for i, (val, color) in enumerate(zip(prior, colors)): if i == len(prior) - 1: h = y1 - y0 - total else: h = (y1 - y0) * val / n rect(x0, y0 + total, bar_width, h, 20, color) total += h if conditionalsubsetdict: if conditionalsubsetdict[attr_vals]: counts = [conditionalsubsetdict[attr_vals + "-" + val] for val in cls_values] if sum(counts) == 1: rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550, colors[counts.index(1)], Qt.white, penWidth=2, penStyle=Qt.DashLine) if self.subset_data is not None: line(x1 - bar_width, y0, x1 - bar_width, y1) total = 0 n = conditionalsubsetdict[attr_vals] if n: for i, (cls, color) in \ enumerate(zip(cls_values, colors)): val = conditionalsubsetdict[ attr_vals + "-" + cls] if val == 0: continue if i == len(prior) - 1: v = y1 - y0 - total else: v = ((y1 - y0) * val) / n rect(x1 - bar_width, y0 + total, bar_width, v, 15, color) total += v actual = [conditionaldict[attr_vals + "-" + cls_values[i]] for i in range(len(prior))] n_actual = sum(actual) if n_actual > 0: apriori = [prior[key] for key in cls_values] n_apriori = sum(apriori) text = "<br/>".join( "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" % (cls, act, 100.0 * act / n_actual, apr / n_apriori * n_actual, 100.0 * apr / n_apriori ) for cls, act, apr in zip(cls_values, actual, apriori )) else: text = "" outer_rect.setToolTip( "{}<hr>Instances: {}<br><br>{}".format( condition, n_actual, text[:-4]))
def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 if attr_vals == "": counts = [conditionaldict.get(val, 1) for val in values] else: counts = [conditionaldict.get(attr_vals + "-" + val, 1) for val in values] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i in range(len(values)): val = values[i] perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText( self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText( self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText( self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True)
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)
def draw_distributions(self): """Draw distributions with discrete attributes""" if not (self.show_distributions and self.data is not None and self.domain.has_discrete_class): return class_count = len(self.domain.class_var.values) class_ = self.domain.class_var # we create a hash table of possible class values (happens only if we have a discrete class) if self.domain_contingencies is None: self.domain_contingencies = dict( zip([attr for attr in self.domain if attr.is_discrete], get_contingencies(self.data, skipContinuous=True))) self.domain_contingencies[class_] = get_contingency( self.data, class_, class_) max_count = max([ contingency.max() for contingency in self.domain_contingencies.values() ] or [1]) sorted_class_values = get_variable_values_sorted(self.domain.class_var) for axis_idx, attr_idx in enumerate(self.attribute_indices): attr = self.domain[attr_idx] if attr.is_discrete: continue contingency = self.domain_contingencies[attr] attr_len = len(attr.values) # we create a hash table of variable values and their indices sorted_variable_values = get_variable_values_sorted(attr) # create bar curve for j in range(attr_len): attribute_value = sorted_variable_values[j] value_count = contingency[:, attribute_value] for i in range(class_count): class_value = sorted_class_values[i] color = QColor(*self.colors[i]) color.setAlpha(self.alpha_value) width = float( value_count[class_value] * 0.5) / float(max_count) y_off = float(1.0 + 2.0 * j) / float(2 * attr_len) height = 0.7 / float(class_count * attr_len) y_low_bottom = y_off + float( class_count * height) / 2.0 - i * height curve = PolygonCurve(QPen(color), QBrush(color), xData=[ axis_idx, axis_idx + width, axis_idx + width, axis_idx ], yData=[ y_low_bottom, y_low_bottom, y_low_bottom - height, y_low_bottom - height ], tooltip=attr.name) curve.attach(self)
def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = \ get_variable_values_sorted(data.domain[used_attrs[0]]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 if attr_vals == "": counts = [conditionaldict.get(val, 1) for val in values] else: counts = [ conditionaldict.get(attr_vals + "-" + val, 1) for val in values ] total = sum(counts) if total == 0: counts = [1] * len(values) total = sum(counts) aligns = [ Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter ] align = aligns[side] for i in range(len(values)): val = values[i] perc = counts[i] / float(total) if distributiondict[val] != 0: if side == 0: CanvasText(self.canvas, str(val), x0 + currpos + width * 0.5 * perc, y1 + self.ATTR_VAL_OFFSET, align) elif side == 1: CanvasText(self.canvas, str(val), x0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) elif side == 2: CanvasText(self.canvas, str(val), x0 + currpos + width * perc * 0.5, y0 - self.ATTR_VAL_OFFSET, align) else: CanvasText(self.canvas, str(val), x1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, align) if side % 2 == 0: currpos += perc * width + spacing * (total_attrs - side) else: currpos += perc * height + spacing * (total_attrs - side) if side == 0: CanvasText(self.canvas, attr, x0 + (x1 - x0) / 2, y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, align, bold=1) elif side == 1: CanvasText(self.canvas, attr, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True) elif side == 2: CanvasText(self.canvas, attr, x0 + (x1 - x0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, align, bold=1) else: CanvasText(self.canvas, attr, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET, y0 + (y1 - y0) / 2, align, bold=1, vertical=True)
def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs, used_vals, attr_vals): x0, x1 = x0_x1 y0, y1 = y0_y1 if side in drawn_sides: return # the text on the right will be drawn when we are processing # visualization of the last value of the first attribute if side == 3: attr1values = get_variable_values_sorted(used_attrs[0]) if used_vals[0] != attr1values[-1]: return if not conditionaldict[attr_vals]: if side not in draw_positions: draw_positions[side] = (x0, x1, y0, y1) return else: if side in draw_positions: # restore the positions of attribute values and name (x0, x1, y0, y1) = draw_positions[side] drawn_sides.add(side) values = get_variable_values_sorted(attr) if side % 2: values = values[::-1] spaces = spacing * (total_attrs - side) * (len(values) - 1) width = x1 - x0 - spaces * (side % 2 == 0) height = y1 - y0 - spaces * (side % 2 == 1) # calculate position of first attribute currpos = 0 total, counts = get_counts(attr_vals, values) aligns = [Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter, Qt.AlignBottom | Qt.AlignHCenter, Qt.AlignLeft | Qt.AlignVCenter] align = aligns[side] for i, val in enumerate(values): if distributiondict[val] != 0: perc = counts[i] / float(total) xs = [x0 + currpos + width * 0.5 * perc, x0 - self.ATTR_VAL_OFFSET, x0 + currpos + width * perc * 0.5, x1 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc, y0 - self.ATTR_VAL_OFFSET, y0 + currpos + height * 0.5 * perc] CanvasText(self.canvas, val, xs[side], ys[side], align) space = height if side % 2 else width currpos += perc * space + spacing * (total_attrs - side) xs = [x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET, x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET] ys = [y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2, y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET, y0 + (y1 - y0) / 2] CanvasText( self.canvas, attr.name, xs[side], ys[side], align, bold=True, vertical=side % 2)
def update_data(self, attr_x, attr_y, reset_view=True): self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() if self.shown_y != attr_y: # 'reset' the axis text width estimation. Without this the left # axis tick labels space only ever expands yaxis = self.plot_widget.getAxis("left") yaxis.textWidth = 30 self.shown_x, self.shown_y = attr_x, attr_y if attr_x not in self.data.domain or attr_y not in self.data.domain: data = self.sparse_to_dense() self.set_data(data) if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: self.valid_data = self.get_valid_list([attr_x, attr_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: self.selection = None self.n_points = 0 self.master.Warning.missing_coords( self.shown_x.name, self.shown_y.name) return x_data, y_data = self.get_xy_data_positions( attr_x, attr_y, self.valid_data) self.n_points = len(x_data) if reset_view: min_x, max_x = np.nanmin(x_data), np.nanmax(x_data) min_y, max_y = np.nanmin(y_data), np.nanmax(y_data) self.view_box.setRange( QRectF(min_x, min_y, max_x - min_x, max_y - min_y), padding=0.025) self.view_box.init_history() self.view_box.tag_history() [min_x, max_x], [min_y, max_y] = self.view_box.viewRange() for axis, var in (("bottom", attr_x), ("left", attr_y)): self.set_axis_title(axis, var) if var.is_discrete: self.set_labels(axis, get_variable_values_sorted(var)) else: self.set_labels(axis, None) color_data, brush_data = self.compute_colors() color_data_sel, brush_data_sel = self.compute_colors_sel() size_data = self.compute_sizes() shape_data = self.compute_symbols() if self.should_draw_density(): rgb_data = [pen.color().getRgb()[:3] for pen in color_data] self.density_img = classdensity.class_density_image( min_x, max_x, min_y, max_y, self.resolution, x_data, y_data, rgb_data) self.plot_widget.addItem(self.density_img) self.data_indices = np.flatnonzero(self.valid_data) if len(self.data_indices) != len(self.data): self.master.Information.missing_coords( self.shown_x.name, self.shown_y.name) self.scatterplot_item = ScatterPlotItem( x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data, pen=color_data, brush=brush_data ) self.scatterplot_item_sel = ScatterPlotItem( x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data + SELECTION_WIDTH, pen=color_data_sel, brush=brush_data_sel ) self.plot_widget.addItem(self.scatterplot_item_sel) self.plot_widget.addItem(self.scatterplot_item) self.scatterplot_item.selected_points = [] self.scatterplot_item.sigClicked.connect(self.select_by_click) if self.show_reg_line: _x_data = self.data.get_column_view(self.shown_x)[0] _y_data = self.data.get_column_view(self.shown_y)[0] _x_data = _x_data[self.valid_data] _y_data = _y_data[self.valid_data] assert _x_data.size assert _y_data.size self.draw_regression_line( _x_data, _y_data, np.min(_x_data), np.max(_y_data)) self.update_labels() self.make_legend() self.plot_widget.replot()
def update_data(self, attr_x, attr_y, reset_view=True): self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() if self.shown_y != attr_y: # 'reset' the axis text width estimation. Without this the left # axis tick labels space only ever expands yaxis = self.plot_widget.getAxis("left") yaxis.textWidth = 30 self.shown_x, self.shown_y = attr_x, attr_y if attr_x not in self.data.domain or attr_y not in self.data.domain: data = self.sparse_to_dense() self.set_data(data) if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: self.valid_data = self.get_valid_list([attr_x, attr_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: self.selection = None self.n_points = 0 self.master.Warning.missing_coords(self.shown_x.name, self.shown_y.name) return x_data, y_data = self.get_xy_data_positions(attr_x, attr_y, self.valid_data) self.n_points = len(x_data) if reset_view: min_x, max_x = np.nanmin(x_data), np.nanmax(x_data) min_y, max_y = np.nanmin(y_data), np.nanmax(y_data) self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x, max_y - min_y), padding=0.025) self.view_box.init_history() self.view_box.tag_history() [min_x, max_x], [min_y, max_y] = self.view_box.viewRange() for axis, var in (("bottom", attr_x), ("left", attr_y)): self.set_axis_title(axis, var) if var.is_discrete: self.set_labels(axis, get_variable_values_sorted(var)) else: self.set_labels(axis, None) color_data, brush_data = self.compute_colors() color_data_sel, brush_data_sel = self.compute_colors_sel() size_data = self.compute_sizes() shape_data = self.compute_symbols() if self.should_draw_density(): rgb_data = [pen.color().getRgb()[:3] for pen in color_data] self.density_img = classdensity.class_density_image( min_x, max_x, min_y, max_y, self.resolution, x_data, y_data, rgb_data) self.plot_widget.addItem(self.density_img) self.data_indices = np.flatnonzero(self.valid_data) if len(self.data_indices) != len(self.data): self.master.Information.missing_coords(self.shown_x.name, self.shown_y.name) self.scatterplot_item = ScatterPlotItem(x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data, pen=color_data, brush=brush_data) self.scatterplot_item_sel = ScatterPlotItem(x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data + SELECTION_WIDTH, pen=color_data_sel, brush=brush_data_sel) self.plot_widget.addItem(self.scatterplot_item_sel) self.plot_widget.addItem(self.scatterplot_item) self.scatterplot_item.selected_points = [] self.scatterplot_item.sigClicked.connect(self.select_by_click) if self.show_reg_line: _x_data = self.data.get_column_view(self.shown_x)[0] _y_data = self.data.get_column_view(self.shown_y)[0] _x_data = _x_data[self.valid_data] _y_data = _y_data[self.valid_data] assert _x_data.size assert _y_data.size self.draw_regression_line(_x_data, _y_data, np.min(_x_data), np.max(_y_data)) self.update_labels() self.make_legend() self.plot_widget.replot()
def draw_statistics(self): """Draw lines that represent standard deviation or quartiles""" return # TODO: Implement using BasicStats if self.show_statistics and self.data is not None: data = [] domain = self.data.domain for attr_idx in self.attribute_indices: if not self.domain[attr_idx].is_continuous: data.append([()]) continue # only for continuous attributes if not domain.class_var or domain.has_continuous_class: if self.show_statistics == MEANS: m = self.domain_data_stat[attr_idx].mean dev = self.domain_data_stat[attr_idx].var data.append([(m - dev, m, m + dev)]) elif self.show_statistics == MEDIAN: data.append([(0, 0, 0)]); continue sorted_array = np.sort(attr_values) if len(sorted_array) > 0: data.append([(sorted_array[int(len(sorted_array) / 4.0)], sorted_array[int(len(sorted_array) / 2.0)], sorted_array[int(len(sorted_array) * 0.75)])]) else: data.append([(0, 0, 0)]) else: curr = [] class_values = get_variable_values_sorted(self.domain.class_var) class_index = self.domain.index(self.domain.class_var) for c in range(len(class_values)): attr_values = self.data[attr_idx, self.data[class_index] == c] attr_values = attr_values[~np.isnan(attr_values)] if len(attr_values) == 0: curr.append((0, 0, 0)) continue if self.show_statistics == MEANS: m = attr_values.mean() dev = attr_values.std() curr.append((m - dev, m, m + dev)) elif self.show_statistics == MEDIAN: sorted_array = np.sort(attr_values) curr.append((sorted_array[int(len(attr_values) / 4.0)], sorted_array[int(len(attr_values) / 2.0)], sorted_array[int(len(attr_values) * 0.75)])) data.append(curr) # draw vertical lines for i in range(len(data)): for c in range(len(data[i])): if data[i][c] == (): continue x = i - 0.03 * (len(data[i]) - 1) / 2.0 + c * 0.03 col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve("", col, col, 3, OWCurve.Lines, OWPoint.NoSymbol, xData=[x, x, x], yData=[data[i][c][0], data[i][c][1], data[i][c][2]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][0], data[i][c][0]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][1], data[i][c][1]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][2], data[i][c][2]], lineWidth=4) # draw lines with mean/median values if not domain.class_var or domain.has_continuous_class: class_count = 1 else: class_count = len(self.domain.class_var.values) for c in range(class_count): diff = - 0.03 * (class_count - 1) / 2.0 + c * 0.03 ys = [] xs = [] for i in range(len(data)): if data[i] != [()]: ys.append(data[i][c][1]) xs.append(i + diff) else: if len(xs) > 1: col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=xs, yData=ys, lineWidth=4) xs = [] ys = [] col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=xs, yData=ys, lineWidth=4)
def draw_statistics(self): """Draw lines that represent standard deviation or quartiles""" return # TODO: Implement using BasicStats if self.show_statistics and self.data is not None: data = [] domain = self.data.domain for attr_idx in self.attribute_indices: if not self.domain[attr_idx].is_continuous: data.append([()]) continue # only for continuous attributes if not domain.class_var or domain.has_continuous_class: if self.show_statistics == MEANS: m = self.domain_data_stat[attr_idx].mean dev = self.domain_data_stat[attr_idx].var data.append([(m - dev, m, m + dev)]) elif self.show_statistics == MEDIAN: data.append([(0, 0, 0)]) continue sorted_array = np.sort(attr_values) if len(sorted_array) > 0: data.append([ (sorted_array[int(len(sorted_array) / 4.0)], sorted_array[int(len(sorted_array) / 2.0)], sorted_array[int(len(sorted_array) * 0.75)]) ]) else: data.append([(0, 0, 0)]) else: curr = [] class_values = get_variable_values_sorted( self.domain.class_var) class_index = self.domain.index(self.domain.class_var) for c in range(len(class_values)): attr_values = self.data[attr_idx, self.data[class_index] == c] attr_values = attr_values[~np.isnan(attr_values)] if len(attr_values) == 0: curr.append((0, 0, 0)) continue if self.show_statistics == MEANS: m = attr_values.mean() dev = attr_values.std() curr.append((m - dev, m, m + dev)) elif self.show_statistics == MEDIAN: sorted_array = np.sort(attr_values) curr.append( (sorted_array[int(len(attr_values) / 4.0)], sorted_array[int(len(attr_values) / 2.0)], sorted_array[int(len(attr_values) * 0.75)])) data.append(curr) # draw vertical lines for i in range(len(data)): for c in range(len(data[i])): if data[i][c] == (): continue x = i - 0.03 * (len(data[i]) - 1) / 2.0 + c * 0.03 col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve( "", col, col, 3, OWCurve.Lines, OWPoint.NoSymbol, xData=[x, x, x], yData=[data[i][c][0], data[i][c][1], data[i][c][2]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][0], data[i][c][0]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][1], data[i][c][1]], lineWidth=4) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=[x - 0.03, x + 0.03], yData=[data[i][c][2], data[i][c][2]], lineWidth=4) # draw lines with mean/median values if not domain.class_var or domain.has_continuous_class: class_count = 1 else: class_count = len(self.domain.class_var.values) for c in range(class_count): diff = -0.03 * (class_count - 1) / 2.0 + c * 0.03 ys = [] xs = [] for i in range(len(data)): if data[i] != [()]: ys.append(data[i][c][1]) xs.append(i + diff) else: if len(xs) > 1: col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=xs, yData=ys, lineWidth=4) xs = [] ys = [] col = QColor(self.discrete_palette[c]) col.setAlpha(self.alpha_value_2) self.add_curve("", col, col, 1, OWCurve.Lines, OWPoint.NoSymbol, xData=xs, yData=ys, lineWidth=4)
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [ conditionaldict[attr_vals + "-" + val] for val in values ] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)