Exemple #1
0
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attrX is None or self.attrY is None:
            return
        data = self.data[:, [self.attrX, self.attrY]]
        valsX = []
        valsY = []
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)
        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsY.append(sum_)

        contXY, _ = get_conditional_distribution(
            data, [data.domain[self.attrX], data.domain[self.attrY]])
        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]
                try:
                    actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data))

        #get text width of Y labels
        max_ylabel_w = 0
        for j in range(len(valsY)):
            xl = CanvasText(self.canvas, "", 0, 0, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False)
            max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w)
        max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths
        # get text width of Y attribute name
        text = CanvasText(self.canvas, data.domain[self.attrY].name, x  = 0, y = 0, bold = 1, show = 0, vertical=True)
        xOff = int(text.boundingRect().height() + max_ylabel_w)
        yOff = 55
        sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50)
        sqareSize = max(sqareSize, 10)
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height())

        # print graph name
        name  = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY)
        CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text= name)
        CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold = 0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                expected = float(xVal*yVal)/float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected)*(actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_xlabel_h = 0
        normX, normY = sum(valsX), sum(valsY)
        self.areas = []
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i])/float(normX))

            for j in range(len(valsY)-1, -1, -1):   # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j])/float(normY))

                # create rectangle
                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(
                    self.canvas, currX+2, currY+2, width-4, height-4, z = -10,
                    onclick=self.select_area)
                rect.value_pair = i, j
                self.areas.append(rect)
                self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_,
                    width=1 + 3 * selected,  # Ugly! This is needed since
                    # resize redraws the graph! When this is handled by resizing
                    # just the viewer, update_selection will take care of this
                    )

                expected = float(xVal*yVal)/float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson )
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]))

            xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text= getHtmlCompatibleString(data.domain[self.attrX].values[i]))
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)

            currX += width

        # show attribute names
        CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True)
        CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1)
Exemple #2
0
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attrX is None or self.attrY is None:
            return
        data = self.data[:, [self.attrX, self.attrY]]
        valsX = []
        valsY = []
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)
        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsY.append(sum_)

        contXY, _ = get_conditional_distribution(
            data, [data.domain[self.attrX], data.domain[self.attrY]])
        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]
                try:
                    actualProb = contXY['%s-%s' %
                                        (data.domain[self.attrX].values[i],
                                         data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' % (data.domain[self.attrX].values[i],
                                 data.domain[self.attrY].values[j])] = ((
                                     data.domain[self.attrX].values[i],
                                     valx), (data.domain[self.attrY].values[j],
                                             valy), actualProb, len(data))

        #get text width of Y labels
        max_ylabel_w = 0
        for j in range(len(valsY)):
            xl = CanvasText(self.canvas,
                            "",
                            0,
                            0,
                            html_text=getHtmlCompatibleString(
                                data.domain[self.attrY].values[j]),
                            show=False)
            max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w)
        max_ylabel_w = min(max_ylabel_w, 200)  #upper limit for label widths
        # get text width of Y attribute name
        text = CanvasText(self.canvas,
                          data.domain[self.attrY].name,
                          x=0,
                          y=0,
                          bold=1,
                          show=0,
                          vertical=True)
        xOff = int(text.boundingRect().height() + max_ylabel_w)
        yOff = 55
        sqareSize = min(self.canvasView.width() - xOff - 35,
                        self.canvasView.height() - yOff - 50)
        sqareSize = max(sqareSize, 10)
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(),
                                     self.canvasView.height())

        # print graph name
        name = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" % (
            self.attrX, self.attrY, self.attrX, self.attrY)
        CanvasText(self.canvas,
                   "",
                   xOff + sqareSize / 2,
                   20,
                   Qt.AlignCenter,
                   html_text=name)
        CanvasText(self.canvas,
                   "N = " + str(len(data)),
                   xOff + sqareSize / 2,
                   38,
                   Qt.AlignCenter,
                   bold=0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                expected = float(xVal * yVal) / float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected) * (actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_xlabel_h = 0
        normX, normY = sum(valsX), sum(valsY)
        self.areas = []
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i]) / float(normX))

            for j in range(len(valsY) - 1, -1,
                           -1):  # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j]) / float(normY))

                # create rectangle
                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(self.canvas,
                                       currX + 2,
                                       currY + 2,
                                       width - 4,
                                       height - 4,
                                       z=-10,
                                       onclick=self.select_area)
                rect.value_pair = i, j
                self.areas.append(rect)
                self.addRectIndependencePearson(
                    rect,
                    currX + 2,
                    currY + 2,
                    width - 4,
                    height - 4,
                    (xAttr, xVal),
                    (yAttr, yVal),
                    actual,
                    sum_,
                    width=1 + 3 * selected,  # Ugly! This is needed since
                    # resize redraws the graph! When this is handled by resizing
                    # just the viewer, update_selection will take care of this
                )

                expected = float(xVal * yVal) / float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % (
                    self.attrX, getHtmlCompatibleString(xAttr), xVal,
                    100.0 * float(xVal) / float(sum_), self.attrY,
                    getHtmlCompatibleString(yAttr), yVal,
                    100.0 * float(yVal) / float(sum_), expected,
                    100.0 * float(xVal * yVal) / float(sum_ * sum_), actual,
                    100.0 * float(actual) / float(sum_), chisquare, pearson)
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    CanvasText(self.canvas,
                               "",
                               xOff,
                               currY - height / 2,
                               Qt.AlignRight | Qt.AlignVCenter,
                               html_text=getHtmlCompatibleString(
                                   data.domain[self.attrY].values[j]))

            xl = CanvasText(self.canvas,
                            "",
                            currX + width / 2,
                            yOff + sqareSize,
                            Qt.AlignHCenter | Qt.AlignTop,
                            html_text=getHtmlCompatibleString(
                                data.domain[self.attrX].values[i]))
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)

            currX += width

        # show attribute names
        CanvasText(self.canvas,
                   self.attrY,
                   0,
                   yOff + sqareSize / 2,
                   Qt.AlignLeft | Qt.AlignVCenter,
                   bold=1,
                   vertical=True)
        CanvasText(self.canvas,
                   self.attrX,
                   xOff + sqareSize / 2,
                   yOff + sqareSize + max_xlabel_h,
                   Qt.AlignHCenter | Qt.AlignTop,
                   bold=1)
Exemple #3
0
 def test_get_conditional_distribution(self):
     iris = SqlTable(self.conn, self.iris, inspect_values=True)
     sepal_length = iris.domain["sepal length"]
     get_conditional_distribution(iris, [sepal_length])
     get_conditional_distribution(iris, list(iris.domain.variables))