Beispiel #1
0
def test():
    x = data.domain.attributes[1]
    y = data.domain.attributes[2]
    c = data.domain.classVar
    print "H(%s) = %5.5f" % (x.name, _entropy(p2f(orange.Distribution(x, data))))
    print "H(%s) = %5.5f" % (y.name, _entropy(p2f(orange.Distribution(y, data))))
    print "H(%s,%s)= %5.5f" % (x.name, y.name, joint_entropy(x, y, data))
    print "I(%s;%s)= %5.5f" % (x.name, y.name, mutual_information(x, y, data))
    print "H(%s|%s)= %5.5f" % (x.name, c.name, mutual_information(x, c, data))
    print "InfoGain = %5.5f" % orange.MeasureAttribute_info(x, data)
Beispiel #2
0
 def __call__(self, attr, data, aprioriDist=None, weightID=None):
     import numpy
     from orngContingency import Entropy
     if attr in data.domain:  # if we receive attr as string we have to convert to variable
         attr = data.domain[attr]
     attrClassCont = orange.ContingencyAttrClass(attr, data)
     dist = []
     for vals in attrClassCont.values():
         dist += list(vals)
     classAttrEntropy = Entropy(numpy.array(dist))
     infoGain = orange.MeasureAttribute_info(attr, data)
     if classAttrEntropy > 0:
         return float(infoGain) / classAttrEntropy
     else:
         return 0
    meas.unknownsTreatment = meas.ReduceByUnknowns
    print fstr % (("- reduce unknowns:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))

    meas.unknownsTreatment = meas.UnknownsToCommon
    print fstr % (("- unknowns to common:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))

    meas.unknownsTreatment = meas.UnknownsAsValue
    print fstr % (("- unknowns as value:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))
    print


print "Information gain"
printVariants(orange.MeasureAttribute_info())

print "Gain ratio"
printVariants(orange.MeasureAttribute_gainRatio())

print "Gini index"
printVariants(orange.MeasureAttribute_gini())

print "Relief"
meas = orange.MeasureAttribute_relief()
print fstr % (
    ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)]))
print fstr % (
    ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)]))
print
Beispiel #4
0
    def __init__(self, parent=None, signalManager=None, name="Interactive Discretization"):
        OWWidget.__init__(self, parent, signalManager, name)
        self.showBaseLine=1
        self.showLookaheadLine=1
        self.showTargetClassProb=1
        self.showRug=0
        self.snap=1
        self.measure=0
        self.targetClass=0
        self.discretization = self.classDiscretization = self.indiDiscretization = 1
        self.intervals = self.classIntervals = self.indiIntervals = 3
        self.outputOriginalClass = True
        self.indiData = []
        self.indiLabels = []
        self.resetIndividuals = 0
        self.customClassSplits = ""

        self.selectedAttr = 0
        self.customSplits = ["", "", ""]
        self.autoApply = True
        self.dataChanged = False
        self.autoSynchronize = True
        self.pointsChanged = False

        self.customLineEdits = []
        self.needsDiscrete = []

        self.data = self.originalData = None

        self.loadSettings()

        self.inputs=[("Data", ExampleTable, self.setData)]
        self.outputs=[("Data", ExampleTable)]
        self.measures=[("Information gain", orange.MeasureAttribute_info()),
                       #("Gain ratio", orange.MeasureAttribute_gainRatio),
                       ("Gini", orange.MeasureAttribute_gini()),
                       ("chi-square", orange.MeasureAttribute_chiSquare()),
                       ("chi-square prob.", orange.MeasureAttribute_chiSquare(computeProbabilities=1)),
                       ("Relevance", orange.MeasureAttribute_relevance()),
                       ("ReliefF", orange.MeasureAttribute_relief())]
        self.discretizationMethods=["Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove continuous attributes"]
        self.classDiscretizationMethods=["Equal-frequency discretization", "Equal-width discretization"]
        self.indiDiscretizationMethods=["Default", "Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove attribute"]

        self.mainHBox =  OWGUI.widgetBox(self.mainArea, orientation=0)

        vbox = self.controlArea
        box = OWGUI.radioButtonsInBox(vbox, self, "discretization", self.discretizationMethods[:-1], "Default discretization", callback=[self.clearLineEditFocus, self.defaultMethodChanged])
        self.needsDiscrete.append(box.buttons[1])
        box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed))
        indent = OWGUI.checkButtonOffsetHint(self.needsDiscrete[-1])
        self.interBox = OWGUI.widgetBox(OWGUI.indentedBox(box, sep=indent))
        OWGUI.widgetLabel(self.interBox, "Number of intervals (for equal width/frequency)")
        OWGUI.separator(self.interBox, height=4)
        self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(self.interBox), self, "intervals", None, 2, 10, callback=[self.clearLineEditFocus, self.defaultMethodChanged])
        OWGUI.appendRadioButton(box, self, "discretization", self.discretizationMethods[-1])
        OWGUI.separator(vbox)

        ribg = OWGUI.radioButtonsInBox(vbox, self, "resetIndividuals", ["Use default discretization for all attributes", "Explore and set individual discretizations"], "Individual attribute treatment", callback = self.setAllIndividuals)
        ll = QWidget(ribg)
        ll.setFixedHeight(1)
        OWGUI.widgetLabel(ribg, "Set discretization of all attributes to")
        hcustbox = OWGUI.widgetBox(OWGUI.indentedBox(ribg), 0, 0)
        for c in range(1, 4):
            OWGUI.appendRadioButton(ribg, self, "resetIndividuals", "Custom %i" % c, insertInto = hcustbox)

        OWGUI.separator(vbox)

        box = self.classDiscBox = OWGUI.radioButtonsInBox(vbox, self, "classDiscretization", self.classDiscretizationMethods, "Class discretization", callback=[self.clearLineEditFocus, self.classMethodChanged])
        cinterBox = OWGUI.widgetBox(box)
        self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(cinterBox, sep=indent), self, "classIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.classMethodChanged], label="Number of intervals")
        hbox = OWGUI.widgetBox(box, orientation = 0)
        OWGUI.appendRadioButton(box, self, "discretization", "Custom" + "  ", insertInto = hbox)
        self.classCustomLineEdit = OWGUI.lineEdit(hbox, self, "customClassSplits", callback = self.classCustomChanged, focusInCallback = self.classCustomSelected)
#        Can't validate - need to allow spaces
        box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed))
        OWGUI.separator(box)
        self.classIntervalsLabel = OWGUI.widgetLabel(box, "Current splits: ")
        OWGUI.separator(box)
        OWGUI.checkBox(box, self, "outputOriginalClass", "Output original class", callback = self.commitIf)
        OWGUI.widgetLabel(box, "("+"Widget always uses discretized class internally."+")")

        OWGUI.separator(vbox)
        #OWGUI.rubber(vbox)

        box = OWGUI.widgetBox(vbox, "Commit")
        applyButton = OWGUI.button(box, self, "Commit", callback = self.commit, default=True)
        autoApplyCB = OWGUI.checkBox(box, self, "autoApply", "Commit automatically", callback=[self.clearLineEditFocus])
        OWGUI.setStopper(self, applyButton, autoApplyCB, "dataChanged", self.commit)
        OWGUI.rubber(vbox)

        #self.mainSeparator = OWGUI.separator(self.mainHBox, width=25)        # space between control and main area
        self.mainIABox =  OWGUI.widgetBox(self.mainHBox, "Individual attribute settings")
        self.mainBox = OWGUI.widgetBox(self.mainIABox, orientation=0)
        OWGUI.separator(self.mainIABox)#, height=30)
        graphBox = OWGUI.widgetBox(self.mainIABox, "", orientation=0)
        
        
#        self.needsDiscrete.append(graphBox)
        graphOptBox = OWGUI.widgetBox(graphBox)
        OWGUI.separator(graphBox, width=10)
        
        graphGraphBox = OWGUI.widgetBox(graphBox)
        self.graph = DiscGraph(self, graphGraphBox)
        graphGraphBox.layout().addWidget(self.graph)
        reportButton2 = OWGUI.button(graphGraphBox, self, "Report Graph", callback = self.reportGraph, debuggingEnabled=0)

        #graphOptBox.layout().setSpacing(4)
        box = OWGUI.widgetBox(graphOptBox, "Split gain measure", addSpace=True)
        self.measureCombo=OWGUI.comboBox(box, self, "measure", orientation=0, items=[e[0] for e in self.measures], callback=[self.clearLineEditFocus, self.graph.invalidateBaseScore, self.graph.plotBaseCurve])
        OWGUI.checkBox(box, self, "showBaseLine", "Show discretization gain", callback=[self.clearLineEditFocus, self.graph.plotBaseCurve])
        OWGUI.checkBox(box, self, "showLookaheadLine", "Show lookahead gain", callback=self.clearLineEditFocus)
        self.needsDiscrete.append(box)

        box = OWGUI.widgetBox(graphOptBox, "Target class", addSpace=True)
        self.targetCombo=OWGUI.comboBox(box, self, "targetClass", orientation=0, callback=[self.clearLineEditFocus, self.graph.targetClassChanged])
        stc = OWGUI.checkBox(box, self, "showTargetClassProb", "Show target class probability", callback=[self.clearLineEditFocus, self.graph.plotProbCurve])
        OWGUI.checkBox(box, self, "showRug", "Show rug (may be slow)", callback=[self.clearLineEditFocus, self.graph.plotRug])
        self.needsDiscrete.extend([self.targetCombo, stc])

        box = OWGUI.widgetBox(graphOptBox, "Editing", addSpace=True)
        OWGUI.checkBox(box, self, "snap", "Snap to grid", callback=[self.clearLineEditFocus])
        syncCB = OWGUI.checkBox(box, self, "autoSynchronize", "Apply on the fly", callback=self.clearLineEditFocus)
        syncButton = OWGUI.button(box, self, "Apply", callback = self.synchronizePressed)
        OWGUI.setStopper(self, syncButton, syncCB, "pointsChanged", self.synchronize)
        OWGUI.rubber(graphOptBox)

        self.attrList = OWGUI.listBox(self.mainBox, self, callback = self.individualSelected)
        self.attrList.setItemDelegate(CustomListItemDelegate(self.attrList))
        self.attrList.setFixedWidth(300)

        self.defaultMethodChanged()

        OWGUI.separator(self.mainBox, width=10)
        box = OWGUI.radioButtonsInBox(OWGUI.widgetBox(self.mainBox), self, "indiDiscretization", [], callback=[self.clearLineEditFocus, self.indiMethodChanged])
        #hbbox = OWGUI.widgetBox(box)
        #hbbox.layout().setSpacing(4)
        for meth in self.indiDiscretizationMethods[:-1]:
            OWGUI.appendRadioButton(box, self, "indiDiscretization", meth)
        self.needsDiscrete.append(box.buttons[2])
        self.indiInterBox = OWGUI.indentedBox(box, sep=indent, orientation = "horizontal")
        OWGUI.widgetLabel(self.indiInterBox, "Num. of intervals: ")
        self.indiIntervalSlider = OWGUI.hSlider(self.indiInterBox, self, "indiIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.indiMethodChanged], width = 100)
        OWGUI.rubber(self.indiInterBox) 
        OWGUI.appendRadioButton(box, self, "indiDiscretization", self.indiDiscretizationMethods[-1])
        #OWGUI.rubber(hbbox)
        #OWGUI.separator(box)
        #hbbox = OWGUI.widgetBox(box)
        for i in range(3):
            hbox = OWGUI.widgetBox(box, orientation = "horizontal")
            OWGUI.appendRadioButton(box, self, "indiDiscretization", "Custom %i" % (i+1) + " ", insertInto = hbox)
            le = OWGUI.lineEdit(hbox, self, "", callback = lambda w=i: self.customChanged(w), focusInCallback = lambda w=i: self.customSelected(w))
            le.setFixedWidth(110)
            self.customLineEdits.append(le)
            OWGUI.toolButton(hbox, self, "CC", width=30, callback = lambda w=i: self.copyToCustom(w))
            OWGUI.rubber(hbox)
        OWGUI.rubber(box)

        #self.controlArea.setFixedWidth(0)

        self.contAttrIcon =  self.createAttributeIconDict()[orange.VarTypes.Continuous]
        
        self.setAllIndividuals()
Beispiel #5
0
# Description: Shows how to use probability estimators with measure of attribute quality
# Category:    attribute quality
# Classes:     MeasureAttribute, MeasureAttribute_info, ProbabilityEstimatorConstructor_m, ConditionalProbabilityEstimatorConstructor_ByRows
# Uses:        lenses
# Referenced:  MeasureAttribute.htm

import orange
data = orange.ExampleTable("lenses")

ms = (0, 2, 5, 10, 20)
measures = []
for m in ms:
    meas = orange.MeasureAttribute_info()
    meas.estimatorConstructor = orange.ProbabilityEstimatorConstructor_m(m=m)
    meas.conditionalEstimatorConstructor = orange.ConditionalProbabilityEstimatorConstructor_ByRows(
    )
    meas.conditionalEstimatorConstructor.estimatorConstructor = meas.estimatorConstructor
    measures.append(meas)

print "%15s\t%5i\t%5i\t%5i\t%5i\t%5i\t" % (("attr", ) + ms)
for attr in data.domain.attributes:
    print "%15s\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t%5.3f" % (
        (attr.name, ) + tuple([meas(attr, data) for meas in measures]))