def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attrX is None or self.attrY is None: return data = self.data[:, [self.attrX, self.attrY]] valsX = [] valsY = [] contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) contXY, _ = get_conditional_distribution( data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] try: actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) #get text width of Y labels max_ylabel_w = 0 for j in range(len(valsY)): xl = CanvasText(self.canvas, "", 0, 0, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths # get text width of Y attribute name text = CanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) xOff = int(text.boundingRect().height() + max_ylabel_w) yOff = 55 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) sqareSize = max(sqareSize, 10) self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY) CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text= name) CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold = 0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal*yVal)/float(sum_) if expected == 0: continue pearson2 = (actual - expected)*(actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_xlabel_h = 0 normX, normY = sum(valsX), sum(valsY) self.areas = [] for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i])/float(normX)) for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j])/float(normY)) # create rectangle selected = len(self.areas) in self.selection rect = CanvasRectangle( self.canvas, currX+2, currY+2, width-4, height-4, z = -10, onclick=self.select_area) rect.value_pair = i, j self.areas.append(rect) self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_, width=1 + 3 * selected, # Ugly! This is needed since # resize redraws the graph! When this is handled by resizing # just the viewer, update_selection will take care of this ) expected = float(xVal*yVal)/float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) rect.setToolTip(tooltipText) currY += height if currX == xOff: CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j])) xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text= getHtmlCompatibleString(data.domain[self.attrX].values[i])) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) currX += width # show attribute names CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True) CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1)
def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attrX is None or self.attrY is None: return data = self.data[:, [self.attrX, self.attrY]] valsX = [] valsY = [] contX = get_contingency(data, self.attrX, self.attrX) contY = get_contingency(data, self.attrY, self.attrY) # compute contingency of x and y attributes for entry in contX: sum_ = 0 try: for val in entry: sum_ += val except: pass valsX.append(sum_) for entry in contY: sum_ = 0 try: for val in entry: sum_ += val except: pass valsY.append(sum_) contXY, _ = get_conditional_distribution( data, [data.domain[self.attrX], data.domain[self.attrY]]) # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] try: actualProb = contXY['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] # for val in contXY['%s-%s' %(i, j)]: actualProb += val except: actualProb = 0 probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = (( data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) #get text width of Y labels max_ylabel_w = 0 for j in range(len(valsY)): xl = CanvasText(self.canvas, "", 0, 0, html_text=getHtmlCompatibleString( data.domain[self.attrY].values[j]), show=False) max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths # get text width of Y attribute name text = CanvasText(self.canvas, data.domain[self.attrY].name, x=0, y=0, bold=1, show=0, vertical=True) xOff = int(text.boundingRect().height() + max_ylabel_w) yOff = 55 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) sqareSize = max(sqareSize, 10) self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" % ( self.attrX, self.attrY, self.attrX, self.attrY) CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text=name) CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold=0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] expected = float(xVal * yVal) / float(sum_) if expected == 0: continue pearson2 = (actual - expected) * (actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff max_xlabel_h = 0 normX, normY = sum(valsX), sum(valsY) self.areas = [] for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i]) / float(normX)) for j in range(len(valsY) - 1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j]) / float(normY)) # create rectangle selected = len(self.areas) in self.selection rect = CanvasRectangle(self.canvas, currX + 2, currY + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = i, j self.areas.append(rect) self.addRectIndependencePearson( rect, currX + 2, currY + 2, width - 4, height - 4, (xAttr, xVal), (yAttr, yVal), actual, sum_, width=1 + 3 * selected, # Ugly! This is needed since # resize redraws the graph! When this is handled by resizing # just the viewer, update_selection will take care of this ) expected = float(xVal * yVal) / float(sum_) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % ( self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0 * float(xVal) / float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0 * float(yVal) / float(sum_), expected, 100.0 * float(xVal * yVal) / float(sum_ * sum_), actual, 100.0 * float(actual) / float(sum_), chisquare, pearson) rect.setToolTip(tooltipText) currY += height if currX == xOff: CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text=getHtmlCompatibleString( data.domain[self.attrY].values[j])) xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text=getHtmlCompatibleString( data.domain[self.attrX].values[i])) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) currX += width # show attribute names CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=1, vertical=True) CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold=1)
def test_get_conditional_distribution(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) sepal_length = iris.domain["sepal length"] get_conditional_distribution(iris, [sepal_length]) get_conditional_distribution(iris, list(iris.domain.variables))