def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attrX is None or self.attrY is None: return data = self.data[:, [self.attrX, self.attrY]] valsX = [] valsY = [] contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) contXY, _ = get_conditional_distribution( data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] try: actualProb = contXY['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = (( data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) #get text width of Y labels max_ylabel_w = 0 for j in range(len(valsY)): xl = CanvasText(self.canvas, "", 0, 0, html_text=getHtmlCompatibleString( data.domain[self.attrY].values[j]), show=False) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths # get text width of Y attribute name text = CanvasText(self.canvas, data.domain[self.attrY].name, x=0, y=0, bold=1, show=0, vertical=True) xOff = int(text.boundingRect().height() + max_ylabel_w) yOff = 55 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) sqareSize = max(sqareSize, 10) self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" % ( self.attrX, self.attrY, self.attrX, self.attrY) CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text=name) CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold=0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal * yVal) / float(sum_) if expected == 0: continue pearson2 = (actual - expected) * (actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_xlabel_h = 0 normX, normY = sum(valsX), sum(valsY) self.areas = [] for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i]) / float(normX)) for j in range(len(valsY) - 1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j]) / float(normY)) # create rectangle selected = len(self.areas) in self.selection rect = CanvasRectangle(self.canvas, currX + 2, currY + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = i, j self.areas.append(rect) self.addRectIndependencePearson( rect, currX + 2, currY + 2, width - 4, height - 4, (xAttr, xVal), (yAttr, yVal), actual, sum_, width=1 + 3 * selected, # Ugly! This is needed since # resize redraws the graph! When this is handled by resizing # just the viewer, update_selection will take care of this ) expected = float(xVal * yVal) / float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % ( self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0 * float(xVal) / float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0 * float(yVal) / float(sum_), expected, 100.0 * float(xVal * yVal) / float(sum_ * sum_), actual, 100.0 * float(actual) / float(sum_), chisquare, pearson) rect.setToolTip(tooltipText) currY += height if currX == xOff: CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text=getHtmlCompatibleString( data.domain[self.attrY].values[j])) xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text=getHtmlCompatibleString( data.domain[self.attrX].values[i])) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) currX += width # show attribute names CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=1, vertical=True) CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold=1)
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * (len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [ conditionaldict[attr_vals + "-" + val] for val in values ] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = getHtmlCompatibleString(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)
def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attrX is None or self.attrY is None: return data = self.data[:, [self.attrX, self.attrY]] valsX = [] valsY = [] contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) contXY, _ = get_conditional_distribution( data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] try: actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) #get text width of Y labels max_ylabel_w = 0 for j in range(len(valsY)): xl = CanvasText(self.canvas, "", 0, 0, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths # get text width of Y attribute name text = CanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) xOff = int(text.boundingRect().height() + max_ylabel_w) yOff = 55 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) sqareSize = max(sqareSize, 10) self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY) CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text= name) CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold = 0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal*yVal)/float(sum_) if expected == 0: continue pearson2 = (actual - expected)*(actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_xlabel_h = 0 normX, normY = sum(valsX), sum(valsY) self.areas = [] for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i])/float(normX)) for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j])/float(normY)) # create rectangle selected = len(self.areas) in self.selection rect = CanvasRectangle( self.canvas, currX+2, currY+2, width-4, height-4, z = -10, onclick=self.select_area) rect.value_pair = i, j self.areas.append(rect) self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_, width=1 + 3 * selected, # Ugly! This is needed since # resize redraws the graph! When this is handled by resizing # just the viewer, update_selection will take care of this ) expected = float(xVal*yVal)/float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) rect.setToolTip(tooltipText) currY += height if currX == xOff: CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j])) xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text= getHtmlCompatibleString(data.domain[self.attrX].values[i])) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) currX += width # show attribute names CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True) CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1)
def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) # remove all canvas items if not self.data: return if not self.attrX or not self.attrY: return data = self.getConditionalData() if not data or len(data) == 0: return valsX = [] valsY = [] # contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data) # distribution of X attribute # contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data) # distribution of Y attribute contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) # create cartesian product of selected attributes and compute contingency # (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]]) # tempData = data.select(list(data.domain) + [cart]) # contXY = orange.ContingencyAttrAttr(cart, cart, tempData) # distribution of X attribute # contXY = get_contingency(tempData, cart, cart) contXY = self.getConditionalDistributions(data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualProb = 0 try: actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) # get text width of Y attribute name text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) xOff = int(text.boundingRect().height() + 40) yOff = 50 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 30) if sqareSize < 0: return # canvas is too small to draw rectangles self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name if self.attrCondition == "(None)": name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY) else: name = "<b>P(%s, %s | %s = %s) ≠ P(%s | %s = %s)×P(%s | %s = %s)</b>" %(self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue)) OWCanvasText(self.canvas, "" , xOff+ sqareSize/2, 20, Qt.AlignCenter, htmlText = name) OWCanvasText(self.canvas, "N = " + str(len(data)), xOff+ sqareSize/2, 38, Qt.AlignCenter, bold = 0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal*yVal)/float(sum_) if expected == 0: continue pearson2 = (actual - expected)*(actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_ylabel_w = 0 normX, normY = sum(valsX), sum(valsY) for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i])/float(normX)) #for j in range(len(valsY)): for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j])/float(normY)) # create rectangle rect = OWCanvasRectangle(self.canvas, currX+2, currY+2, width-4, height-4, z = -10) self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_) expected = float(xVal*yVal)/float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) rect.setToolTip(tooltipText) currY += height if currX == xOff: xl = OWCanvasText(self.canvas, "", xOff - 10, currY - height/2, Qt.AlignRight | Qt.AlignVCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j])) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) OWCanvasText(self.canvas, "", currX + width/2, yOff + sqareSize + 5, Qt.AlignCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrX].values[i])) currX += width # show attribute names OWCanvasText(self.canvas, self.attrY, max(xOff-20-max_ylabel_w, 20), yOff + sqareSize/2, Qt.AlignRight | Qt.AlignVCenter, bold = 1, vertical=True) OWCanvasText(self.canvas, self.attrX, xOff + sqareSize/2, yOff + sqareSize + 15, Qt.AlignCenter, bold = 1)
def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) # remove all canvas items if not self.data: return if not self.attrX or not self.attrY: return data = self.getConditionalData() if not data or len(data) == 0: return valsX = [] valsY = [] # contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data) # distribution of X attribute # contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data) # distribution of Y attribute contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) # create cartesian product of selected attributes and compute contingency # (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]]) # tempData = data.select(list(data.domain) + [cart]) # contXY = orange.ContingencyAttrAttr(cart, cart, tempData) # distribution of X attribute # contXY = get_contingency(tempData, cart, cart) contXY = self.getConditionalDistributions( data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualProb = 0 try: actualProb = contXY['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = (( data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) # get text width of Y attribute name text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x=0, y=0, bold=1, show=0, vertical=True) xOff = int(text.boundingRect().height() + 40) yOff = 50 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 30) if sqareSize < 0: return # canvas is too small to draw rectangles self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name if self.attrCondition == "(None)": name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" % ( self.attrX, self.attrY, self.attrX, self.attrY) else: name = "<b>P(%s, %s | %s = %s) ≠ P(%s | %s = %s)×P(%s | %s = %s)</b>" % ( self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString( self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString( self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue)) OWCanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, htmlText=name) OWCanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold=0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal * yVal) / float(sum_) if expected == 0: continue pearson2 = (actual - expected) * (actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_ylabel_w = 0 normX, normY = sum(valsX), sum(valsY) for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i]) / float(normX)) #for j in range(len(valsY)): for j in range(len(valsY) - 1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j]) / float(normY)) # create rectangle rect = OWCanvasRectangle(self.canvas, currX + 2, currY + 2, width - 4, height - 4, z=-10) self.addRectIndependencePearson(rect, currX + 2, currY + 2, width - 4, height - 4, (xAttr, xVal), (yAttr, yVal), actual, sum_) expected = float(xVal * yVal) / float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % ( self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0 * float(xVal) / float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0 * float(yVal) / float(sum_), expected, 100.0 * float(xVal * yVal) / float(sum_ * sum_), actual, 100.0 * float(actual) / float(sum_), chisquare, pearson) rect.setToolTip(tooltipText) currY += height if currX == xOff: xl = OWCanvasText(self.canvas, "", xOff - 10, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, htmlText=getHtmlCompatibleString( data.domain[self.attrY].values[j])) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) OWCanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize + 5, Qt.AlignCenter, htmlText=getHtmlCompatibleString( data.domain[self.attrX].values[i])) currX += width # show attribute names OWCanvasText(self.canvas, self.attrY, max(xOff - 20 - max_ylabel_w, 20), yOff + sqareSize / 2, Qt.AlignRight | Qt.AlignVCenter, bold=1, vertical=True) OWCanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + 15, Qt.AlignCenter, bold=1)
def draw_data(attr_list, x0_x1, y0_y1, side, condition, total_attrs, used_attrs=[], used_vals=[], attr_vals=""): x0, x1 = x0_x1 y0, y1 = y0_y1 if conditionaldict[attr_vals] == 0: add_rect(x0, x1, y0, y1, "", used_attrs, used_vals, attr_vals=attr_vals) # store coordinates for later drawing of labels draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals) return attr = attr_list[0] # how much smaller rectangles do we draw edge = len(attr_list) * spacing values = get_variable_values_sorted(data.domain[attr]) if side % 2: values = values[::-1] # reverse names if necessary if side % 2 == 0: # we are drawing on the x axis # remove the space needed for separating different attr. values whole = max(0, (x1 - x0) - edge * ( len(values) - 1)) if whole == 0: edge = (x1 - x0) / float(len(values) - 1) else: # we are drawing on the y axis whole = max(0, (y1 - y0) - edge * (len(values) - 1)) if whole == 0: edge = (y1 - y0) / float(len(values) - 1) if attr_vals == "": counts = [conditionaldict[val] for val in values] else: counts = [conditionaldict[attr_vals + "-" + val] for val in values] total = sum(counts) # if we are visualizing the third attribute and the first attribute # has the last value, we have to reverse the order in which the # boxes will be drawn otherwise, if the last cell, nearest to the # labels of the fourth attribute, is empty, we wouldn't be able to # position the labels valrange = list(range(len(values))) if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2: attr1values = get_variable_values_sorted( data.domain[used_attrs[0]]) if used_vals[0] == attr1values[-1]: valrange = valrange[::-1] for i in valrange: start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] htmlval = getHtmlCompatibleString(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: newattrvals = val tooltip = condition + 4 * " " + attr + \ ": <b>" + htmlval + "</b><br>" attrs = used_attrs + [attr] vals = used_vals + [val] common_args = attrs, vals, newattrvals if side % 2 == 0: # if we are moving horizontally if len(attr_list) == 1: add_rect(x0 + start, x0 + end, y0, y1, tooltip, *common_args) else: draw_data(attr_list[1:], (x0 + start, x0 + end), (y0, y1), side + 1, tooltip, total_attrs, *common_args) else: if len(attr_list) == 1: add_rect(x0, x1, y0 + start, y0 + end, tooltip, *common_args) else: draw_data(attr_list[1:], (x0, x1), (y0 + start, y0 + end), side + 1, tooltip, total_attrs, *common_args) draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs, used_attrs, used_vals, attr_vals)