예제 #1
0
파일: owsieve.py 프로젝트: mrihtar/orange3
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attrX is None or self.attrY is None:
            return
        data = self.data[:, [self.attrX, self.attrY]]
        valsX = []
        valsY = []
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)
        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsY.append(sum_)

        contXY, _ = get_conditional_distribution(
            data, [data.domain[self.attrX], data.domain[self.attrY]])
        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]
                try:
                    actualProb = contXY['%s-%s' %
                                        (data.domain[self.attrX].values[i],
                                         data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' % (data.domain[self.attrX].values[i],
                                 data.domain[self.attrY].values[j])] = ((
                                     data.domain[self.attrX].values[i],
                                     valx), (data.domain[self.attrY].values[j],
                                             valy), actualProb, len(data))

        #get text width of Y labels
        max_ylabel_w = 0
        for j in range(len(valsY)):
            xl = CanvasText(self.canvas,
                            "",
                            0,
                            0,
                            html_text=getHtmlCompatibleString(
                                data.domain[self.attrY].values[j]),
                            show=False)
            max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w)
        max_ylabel_w = min(max_ylabel_w, 200)  #upper limit for label widths
        # get text width of Y attribute name
        text = CanvasText(self.canvas,
                          data.domain[self.attrY].name,
                          x=0,
                          y=0,
                          bold=1,
                          show=0,
                          vertical=True)
        xOff = int(text.boundingRect().height() + max_ylabel_w)
        yOff = 55
        sqareSize = min(self.canvasView.width() - xOff - 35,
                        self.canvasView.height() - yOff - 50)
        sqareSize = max(sqareSize, 10)
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(),
                                     self.canvasView.height())

        # print graph name
        name = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" % (
            self.attrX, self.attrY, self.attrX, self.attrY)
        CanvasText(self.canvas,
                   "",
                   xOff + sqareSize / 2,
                   20,
                   Qt.AlignCenter,
                   html_text=name)
        CanvasText(self.canvas,
                   "N = " + str(len(data)),
                   xOff + sqareSize / 2,
                   38,
                   Qt.AlignCenter,
                   bold=0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                expected = float(xVal * yVal) / float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected) * (actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_xlabel_h = 0
        normX, normY = sum(valsX), sum(valsY)
        self.areas = []
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i]) / float(normX))

            for j in range(len(valsY) - 1, -1,
                           -1):  # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j]) / float(normY))

                # create rectangle
                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(self.canvas,
                                       currX + 2,
                                       currY + 2,
                                       width - 4,
                                       height - 4,
                                       z=-10,
                                       onclick=self.select_area)
                rect.value_pair = i, j
                self.areas.append(rect)
                self.addRectIndependencePearson(
                    rect,
                    currX + 2,
                    currY + 2,
                    width - 4,
                    height - 4,
                    (xAttr, xVal),
                    (yAttr, yVal),
                    actual,
                    sum_,
                    width=1 + 3 * selected,  # Ugly! This is needed since
                    # resize redraws the graph! When this is handled by resizing
                    # just the viewer, update_selection will take care of this
                )

                expected = float(xVal * yVal) / float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % (
                    self.attrX, getHtmlCompatibleString(xAttr), xVal,
                    100.0 * float(xVal) / float(sum_), self.attrY,
                    getHtmlCompatibleString(yAttr), yVal,
                    100.0 * float(yVal) / float(sum_), expected,
                    100.0 * float(xVal * yVal) / float(sum_ * sum_), actual,
                    100.0 * float(actual) / float(sum_), chisquare, pearson)
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    CanvasText(self.canvas,
                               "",
                               xOff,
                               currY - height / 2,
                               Qt.AlignRight | Qt.AlignVCenter,
                               html_text=getHtmlCompatibleString(
                                   data.domain[self.attrY].values[j]))

            xl = CanvasText(self.canvas,
                            "",
                            currX + width / 2,
                            yOff + sqareSize,
                            Qt.AlignHCenter | Qt.AlignTop,
                            html_text=getHtmlCompatibleString(
                                data.domain[self.attrX].values[i]))
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)

            currX += width

        # show attribute names
        CanvasText(self.canvas,
                   self.attrY,
                   0,
                   yOff + sqareSize / 2,
                   Qt.AlignLeft | Qt.AlignVCenter,
                   bold=1,
                   vertical=True)
        CanvasText(self.canvas,
                   self.attrX,
                   xOff + sqareSize / 2,
                   yOff + sqareSize + max_xlabel_h,
                   Qt.AlignHCenter | Qt.AlignTop,
                   bold=1)
예제 #2
0
파일: owmosaic.py 프로젝트: karoema/orange3
        def draw_data(attr_list,
                      x0_x1,
                      y0_y1,
                      side,
                      condition,
                      total_attrs,
                      used_attrs=[],
                      used_vals=[],
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0,
                         x1,
                         y0,
                         y1,
                         "",
                         used_attrs,
                         used_vals,
                         attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            if attr_vals == "":
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [
                    conditionaldict[attr_vals + "-" + val] for val in values
                ]
            total = sum(counts)

            # if we are visualizing the third attribute and the first attribute
            # has the last value, we have to reverse the order in which the
            # boxes will be drawn otherwise, if the last cell, nearest to the
            # labels of the fourth attribute, is empty, we wouldn't be able to
            # position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(
                    data.domain[used_attrs[0]])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = getHtmlCompatibleString(val)
                if attr_vals != "":
                    newattrvals = attr_vals + "-" + val
                else:
                    newattrvals = val

                tooltip = condition + 4 * "&nbsp;" + attr + \
                    ": <b>" + htmlval + "</b><br>"
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                common_args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1, tooltip,
                                 *common_args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1, tooltip, total_attrs,
                                  *common_args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end, tooltip,
                                 *common_args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1, tooltip,
                                  total_attrs, *common_args)

            draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                      used_attrs, used_vals, attr_vals)
예제 #3
0
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attrX is None or self.attrY is None:
            return
        data = self.data[:, [self.attrX, self.attrY]]
        valsX = []
        valsY = []
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)
        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsY.append(sum_)

        contXY, _ = get_conditional_distribution(
            data, [data.domain[self.attrX], data.domain[self.attrY]])
        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]
                try:
                    actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data))

        #get text width of Y labels
        max_ylabel_w = 0
        for j in range(len(valsY)):
            xl = CanvasText(self.canvas, "", 0, 0, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False)
            max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w)
        max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths
        # get text width of Y attribute name
        text = CanvasText(self.canvas, data.domain[self.attrY].name, x  = 0, y = 0, bold = 1, show = 0, vertical=True)
        xOff = int(text.boundingRect().height() + max_ylabel_w)
        yOff = 55
        sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50)
        sqareSize = max(sqareSize, 10)
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height())

        # print graph name
        name  = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY)
        CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text= name)
        CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold = 0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                expected = float(xVal*yVal)/float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected)*(actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_xlabel_h = 0
        normX, normY = sum(valsX), sum(valsY)
        self.areas = []
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i])/float(normX))

            for j in range(len(valsY)-1, -1, -1):   # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j])/float(normY))

                # create rectangle
                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(
                    self.canvas, currX+2, currY+2, width-4, height-4, z = -10,
                    onclick=self.select_area)
                rect.value_pair = i, j
                self.areas.append(rect)
                self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_,
                    width=1 + 3 * selected,  # Ugly! This is needed since
                    # resize redraws the graph! When this is handled by resizing
                    # just the viewer, update_selection will take care of this
                    )

                expected = float(xVal*yVal)/float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson )
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]))

            xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text= getHtmlCompatibleString(data.domain[self.attrX].values[i]))
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)

            currX += width

        # show attribute names
        CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True)
        CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1)
예제 #4
0
파일: owsieve.py 프로젝트: Micseb/orange3
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)    # remove all canvas items
        if not self.data: return
        if not self.attrX or not self.attrY: return

        data = self.getConditionalData()
        if not data or len(data) == 0: return

        valsX = []
        valsY = []
        # contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data)   # distribution of X attribute
        # contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data)   # distribution of Y attribute
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)

        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry: sum_ += val
            except: pass
            valsY.append(sum_)

        # create cartesian product of selected attributes and compute contingency
        # (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]])
        # tempData = data.select(list(data.domain) + [cart])
        # contXY = orange.ContingencyAttrAttr(cart, cart, tempData)   # distribution of X attribute
        # contXY = get_contingency(tempData, cart, cart)
        contXY = self.getConditionalDistributions(data, [data.domain[self.attrX], data.domain[self.attrY]])

        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]

                actualProb = 0
                try:
                    actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data))

        # get text width of Y attribute name
        text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x  = 0, y = 0, bold = 1, show = 0, vertical=True)
        xOff = int(text.boundingRect().height() + 40)
        yOff = 50
        sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 30)
        if sqareSize < 0: return    # canvas is too small to draw rectangles
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height())

        # print graph name
        if self.attrCondition == "(None)":
            name  = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY)
        else:
            name = "<b>P(%s, %s | %s = %s) &#8800; P(%s | %s = %s)&times;P(%s | %s = %s)</b>" %(self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue))
        OWCanvasText(self.canvas, "" , xOff+ sqareSize/2, 20, Qt.AlignCenter, htmlText = name)
        OWCanvasText(self.canvas, "N = " + str(len(data)), xOff+ sqareSize/2, 38, Qt.AlignCenter, bold = 0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                expected = float(xVal*yVal)/float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected)*(actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_ylabel_w = 0

        normX, normY = sum(valsX), sum(valsY)
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i])/float(normX))
            
            #for j in range(len(valsY)):
            for j in range(len(valsY)-1, -1, -1):   # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j])/float(normY))

                # create rectangle
                rect = OWCanvasRectangle(self.canvas, currX+2, currY+2, width-4, height-4, z = -10)
                self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_)

                expected = float(xVal*yVal)/float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson )
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    xl = OWCanvasText(self.canvas, "", xOff - 10, currY - height/2, Qt.AlignRight | Qt.AlignVCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j]))
                    max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w)

            OWCanvasText(self.canvas, "", currX + width/2, yOff + sqareSize + 5, Qt.AlignCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrX].values[i]))
            currX += width

        # show attribute names
        OWCanvasText(self.canvas, self.attrY, max(xOff-20-max_ylabel_w, 20), yOff + sqareSize/2, Qt.AlignRight | Qt.AlignVCenter, bold = 1, vertical=True)
        OWCanvasText(self.canvas, self.attrX, xOff + sqareSize/2, yOff + sqareSize + 15, Qt.AlignCenter, bold = 1)
예제 #5
0
파일: owsieve.py 프로젝트: thatcher/orange3
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)  # remove all canvas items
        if not self.data: return
        if not self.attrX or not self.attrY: return

        data = self.getConditionalData()
        if not data or len(data) == 0: return

        valsX = []
        valsY = []
        # contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data)   # distribution of X attribute
        # contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data)   # distribution of Y attribute
        contX = get_contingency(data, self.attrX, self.attrX)
        contY = get_contingency(data, self.attrY, self.attrY)

        # compute contingency of x and y attributes
        for entry in contX:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsX.append(sum_)

        for entry in contY:
            sum_ = 0
            try:
                for val in entry:
                    sum_ += val
            except:
                pass
            valsY.append(sum_)

        # create cartesian product of selected attributes and compute contingency
        # (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]])
        # tempData = data.select(list(data.domain) + [cart])
        # contXY = orange.ContingencyAttrAttr(cart, cart, tempData)   # distribution of X attribute
        # contXY = get_contingency(tempData, cart, cart)
        contXY = self.getConditionalDistributions(
            data, [data.domain[self.attrX], data.domain[self.attrY]])

        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]

                actualProb = 0
                try:
                    actualProb = contXY['%s-%s' %
                                        (data.domain[self.attrX].values[i],
                                         data.domain[self.attrY].values[j])]
                    # for val in contXY['%s-%s' %(i, j)]: actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' % (data.domain[self.attrX].values[i],
                                 data.domain[self.attrY].values[j])] = ((
                                     data.domain[self.attrX].values[i],
                                     valx), (data.domain[self.attrY].values[j],
                                             valy), actualProb, len(data))

        # get text width of Y attribute name
        text = OWCanvasText(self.canvas,
                            data.domain[self.attrY].name,
                            x=0,
                            y=0,
                            bold=1,
                            show=0,
                            vertical=True)
        xOff = int(text.boundingRect().height() + 40)
        yOff = 50
        sqareSize = min(self.canvasView.width() - xOff - 35,
                        self.canvasView.height() - yOff - 30)
        if sqareSize < 0: return  # canvas is too small to draw rectangles
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(),
                                     self.canvasView.height())

        # print graph name
        if self.attrCondition == "(None)":
            name = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" % (
                self.attrX, self.attrY, self.attrX, self.attrY)
        else:
            name = "<b>P(%s, %s | %s = %s) &#8800; P(%s | %s = %s)&times;P(%s | %s = %s)</b>" % (
                self.attrX, self.attrY, self.attrCondition,
                getHtmlCompatibleString(
                    self.attrConditionValue), self.attrX, self.attrCondition,
                getHtmlCompatibleString(
                    self.attrConditionValue), self.attrY, self.attrCondition,
                getHtmlCompatibleString(self.attrConditionValue))
        OWCanvasText(self.canvas,
                     "",
                     xOff + sqareSize / 2,
                     20,
                     Qt.AlignCenter,
                     htmlText=name)
        OWCanvasText(self.canvas,
                     "N = " + str(len(data)),
                     xOff + sqareSize / 2,
                     38,
                     Qt.AlignCenter,
                     bold=0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                expected = float(xVal * yVal) / float(sum_)
                if expected == 0: continue
                pearson2 = (actual - expected) * (actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        max_ylabel_w = 0

        normX, normY = sum(valsX), sum(valsY)
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i]) / float(normX))

            #for j in range(len(valsY)):
            for j in range(len(valsY) - 1, -1,
                           -1):  # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum_) = probs['%s-%s' % (data.domain[self.attrX].values[i],
                                          data.domain[self.attrY].values[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j]) / float(normY))

                # create rectangle
                rect = OWCanvasRectangle(self.canvas,
                                         currX + 2,
                                         currY + 2,
                                         width - 4,
                                         height - 4,
                                         z=-10)
                self.addRectIndependencePearson(rect, currX + 2, currY + 2,
                                                width - 4, height - 4,
                                                (xAttr, xVal), (yAttr, yVal),
                                                actual, sum_)

                expected = float(xVal * yVal) / float(sum_)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % (
                    self.attrX, getHtmlCompatibleString(xAttr), xVal,
                    100.0 * float(xVal) / float(sum_), self.attrY,
                    getHtmlCompatibleString(yAttr), yVal,
                    100.0 * float(yVal) / float(sum_), expected,
                    100.0 * float(xVal * yVal) / float(sum_ * sum_), actual,
                    100.0 * float(actual) / float(sum_), chisquare, pearson)
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    xl = OWCanvasText(self.canvas,
                                      "",
                                      xOff - 10,
                                      currY - height / 2,
                                      Qt.AlignRight | Qt.AlignVCenter,
                                      htmlText=getHtmlCompatibleString(
                                          data.domain[self.attrY].values[j]))
                    max_ylabel_w = max(int(xl.boundingRect().width()),
                                       max_ylabel_w)

            OWCanvasText(self.canvas,
                         "",
                         currX + width / 2,
                         yOff + sqareSize + 5,
                         Qt.AlignCenter,
                         htmlText=getHtmlCompatibleString(
                             data.domain[self.attrX].values[i]))
            currX += width

        # show attribute names
        OWCanvasText(self.canvas,
                     self.attrY,
                     max(xOff - 20 - max_ylabel_w, 20),
                     yOff + sqareSize / 2,
                     Qt.AlignRight | Qt.AlignVCenter,
                     bold=1,
                     vertical=True)
        OWCanvasText(self.canvas,
                     self.attrX,
                     xOff + sqareSize / 2,
                     yOff + sqareSize + 15,
                     Qt.AlignCenter,
                     bold=1)
예제 #6
0
        def draw_data(attr_list, x0_x1, y0_y1, side, condition,
                      total_attrs, used_attrs=[], used_vals=[],
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0, x1, y0, y1, "",
                         used_attrs, used_vals, attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (
                    len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            if attr_vals == "":
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [conditionaldict[attr_vals + "-" + val]
                          for val in values]
            total = sum(counts)

            # if we are visualizing the third attribute and the first attribute
            # has the last value, we have to reverse the order in which the
            # boxes will be drawn otherwise, if the last cell, nearest to the
            # labels of the fourth attribute, is empty, we wouldn't be able to
            # position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(
                        data.domain[used_attrs[0]])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = getHtmlCompatibleString(val)
                if attr_vals != "":
                    newattrvals = attr_vals + "-" + val
                else:
                    newattrvals = val

                tooltip = condition + 4 * "&nbsp;" + attr + \
                    ": <b>" + htmlval + "</b><br>"
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                common_args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1,
                                  tooltip, total_attrs, *common_args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1,
                                  tooltip, total_attrs, *common_args)

            draw_text(side, attr_list[0], (x0, x1), (y0, y1),
                      total_attrs, used_attrs, used_vals, attr_vals)