def test(): x = data.domain.attributes[1] y = data.domain.attributes[2] c = data.domain.classVar print "H(%s) = %5.5f" % (x.name, _entropy(p2f(orange.Distribution(x, data)))) print "H(%s) = %5.5f" % (y.name, _entropy(p2f(orange.Distribution(y, data)))) print "H(%s,%s)= %5.5f" % (x.name, y.name, joint_entropy(x, y, data)) print "I(%s;%s)= %5.5f" % (x.name, y.name, mutual_information(x, y, data)) print "H(%s|%s)= %5.5f" % (x.name, c.name, mutual_information(x, c, data)) print "InfoGain = %5.5f" % orange.MeasureAttribute_info(x, data)
def __call__(self, attr, data, aprioriDist=None, weightID=None): import numpy from orngContingency import Entropy if attr in data.domain: # if we receive attr as string we have to convert to variable attr = data.domain[attr] attrClassCont = orange.ContingencyAttrClass(attr, data) dist = [] for vals in attrClassCont.values(): dist += list(vals) classAttrEntropy = Entropy(numpy.array(dist)) infoGain = orange.MeasureAttribute_info(attr, data) if classAttrEntropy > 0: return float(infoGain) / classAttrEntropy else: return 0
meas.unknownsTreatment = meas.ReduceByUnknowns print fstr % (("- reduce unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.UnknownsToCommon print fstr % (("- unknowns to common:", ) + tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.UnknownsAsValue print fstr % (("- unknowns as value:", ) + tuple([meas(i, data2) for i in range(attrs)])) print print "Information gain" printVariants(orange.MeasureAttribute_info()) print "Gain ratio" printVariants(orange.MeasureAttribute_gainRatio()) print "Gini index" printVariants(orange.MeasureAttribute_gini()) print "Relief" meas = orange.MeasureAttribute_relief() print fstr % ( ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)])) print fstr % ( ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)])) print
def __init__(self, parent=None, signalManager=None, name="Interactive Discretization"): OWWidget.__init__(self, parent, signalManager, name) self.showBaseLine=1 self.showLookaheadLine=1 self.showTargetClassProb=1 self.showRug=0 self.snap=1 self.measure=0 self.targetClass=0 self.discretization = self.classDiscretization = self.indiDiscretization = 1 self.intervals = self.classIntervals = self.indiIntervals = 3 self.outputOriginalClass = True self.indiData = [] self.indiLabels = [] self.resetIndividuals = 0 self.customClassSplits = "" self.selectedAttr = 0 self.customSplits = ["", "", ""] self.autoApply = True self.dataChanged = False self.autoSynchronize = True self.pointsChanged = False self.customLineEdits = [] self.needsDiscrete = [] self.data = self.originalData = None self.loadSettings() self.inputs=[("Data", ExampleTable, self.setData)] self.outputs=[("Data", ExampleTable)] self.measures=[("Information gain", orange.MeasureAttribute_info()), #("Gain ratio", orange.MeasureAttribute_gainRatio), ("Gini", orange.MeasureAttribute_gini()), ("chi-square", orange.MeasureAttribute_chiSquare()), ("chi-square prob.", orange.MeasureAttribute_chiSquare(computeProbabilities=1)), ("Relevance", orange.MeasureAttribute_relevance()), ("ReliefF", orange.MeasureAttribute_relief())] self.discretizationMethods=["Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove continuous attributes"] self.classDiscretizationMethods=["Equal-frequency discretization", "Equal-width discretization"] self.indiDiscretizationMethods=["Default", "Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove attribute"] self.mainHBox = OWGUI.widgetBox(self.mainArea, orientation=0) vbox = self.controlArea box = OWGUI.radioButtonsInBox(vbox, self, "discretization", self.discretizationMethods[:-1], "Default discretization", callback=[self.clearLineEditFocus, self.defaultMethodChanged]) self.needsDiscrete.append(box.buttons[1]) box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) indent = OWGUI.checkButtonOffsetHint(self.needsDiscrete[-1]) self.interBox = OWGUI.widgetBox(OWGUI.indentedBox(box, sep=indent)) OWGUI.widgetLabel(self.interBox, "Number of intervals (for equal width/frequency)") OWGUI.separator(self.interBox, height=4) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(self.interBox), self, "intervals", None, 2, 10, callback=[self.clearLineEditFocus, self.defaultMethodChanged]) OWGUI.appendRadioButton(box, self, "discretization", self.discretizationMethods[-1]) OWGUI.separator(vbox) ribg = OWGUI.radioButtonsInBox(vbox, self, "resetIndividuals", ["Use default discretization for all attributes", "Explore and set individual discretizations"], "Individual attribute treatment", callback = self.setAllIndividuals) ll = QWidget(ribg) ll.setFixedHeight(1) OWGUI.widgetLabel(ribg, "Set discretization of all attributes to") hcustbox = OWGUI.widgetBox(OWGUI.indentedBox(ribg), 0, 0) for c in range(1, 4): OWGUI.appendRadioButton(ribg, self, "resetIndividuals", "Custom %i" % c, insertInto = hcustbox) OWGUI.separator(vbox) box = self.classDiscBox = OWGUI.radioButtonsInBox(vbox, self, "classDiscretization", self.classDiscretizationMethods, "Class discretization", callback=[self.clearLineEditFocus, self.classMethodChanged]) cinterBox = OWGUI.widgetBox(box) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(cinterBox, sep=indent), self, "classIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.classMethodChanged], label="Number of intervals") hbox = OWGUI.widgetBox(box, orientation = 0) OWGUI.appendRadioButton(box, self, "discretization", "Custom" + " ", insertInto = hbox) self.classCustomLineEdit = OWGUI.lineEdit(hbox, self, "customClassSplits", callback = self.classCustomChanged, focusInCallback = self.classCustomSelected) # Can't validate - need to allow spaces box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) OWGUI.separator(box) self.classIntervalsLabel = OWGUI.widgetLabel(box, "Current splits: ") OWGUI.separator(box) OWGUI.checkBox(box, self, "outputOriginalClass", "Output original class", callback = self.commitIf) OWGUI.widgetLabel(box, "("+"Widget always uses discretized class internally."+")") OWGUI.separator(vbox) #OWGUI.rubber(vbox) box = OWGUI.widgetBox(vbox, "Commit") applyButton = OWGUI.button(box, self, "Commit", callback = self.commit, default=True) autoApplyCB = OWGUI.checkBox(box, self, "autoApply", "Commit automatically", callback=[self.clearLineEditFocus]) OWGUI.setStopper(self, applyButton, autoApplyCB, "dataChanged", self.commit) OWGUI.rubber(vbox) #self.mainSeparator = OWGUI.separator(self.mainHBox, width=25) # space between control and main area self.mainIABox = OWGUI.widgetBox(self.mainHBox, "Individual attribute settings") self.mainBox = OWGUI.widgetBox(self.mainIABox, orientation=0) OWGUI.separator(self.mainIABox)#, height=30) graphBox = OWGUI.widgetBox(self.mainIABox, "", orientation=0) # self.needsDiscrete.append(graphBox) graphOptBox = OWGUI.widgetBox(graphBox) OWGUI.separator(graphBox, width=10) graphGraphBox = OWGUI.widgetBox(graphBox) self.graph = DiscGraph(self, graphGraphBox) graphGraphBox.layout().addWidget(self.graph) reportButton2 = OWGUI.button(graphGraphBox, self, "Report Graph", callback = self.reportGraph, debuggingEnabled=0) #graphOptBox.layout().setSpacing(4) box = OWGUI.widgetBox(graphOptBox, "Split gain measure", addSpace=True) self.measureCombo=OWGUI.comboBox(box, self, "measure", orientation=0, items=[e[0] for e in self.measures], callback=[self.clearLineEditFocus, self.graph.invalidateBaseScore, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showBaseLine", "Show discretization gain", callback=[self.clearLineEditFocus, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showLookaheadLine", "Show lookahead gain", callback=self.clearLineEditFocus) self.needsDiscrete.append(box) box = OWGUI.widgetBox(graphOptBox, "Target class", addSpace=True) self.targetCombo=OWGUI.comboBox(box, self, "targetClass", orientation=0, callback=[self.clearLineEditFocus, self.graph.targetClassChanged]) stc = OWGUI.checkBox(box, self, "showTargetClassProb", "Show target class probability", callback=[self.clearLineEditFocus, self.graph.plotProbCurve]) OWGUI.checkBox(box, self, "showRug", "Show rug (may be slow)", callback=[self.clearLineEditFocus, self.graph.plotRug]) self.needsDiscrete.extend([self.targetCombo, stc]) box = OWGUI.widgetBox(graphOptBox, "Editing", addSpace=True) OWGUI.checkBox(box, self, "snap", "Snap to grid", callback=[self.clearLineEditFocus]) syncCB = OWGUI.checkBox(box, self, "autoSynchronize", "Apply on the fly", callback=self.clearLineEditFocus) syncButton = OWGUI.button(box, self, "Apply", callback = self.synchronizePressed) OWGUI.setStopper(self, syncButton, syncCB, "pointsChanged", self.synchronize) OWGUI.rubber(graphOptBox) self.attrList = OWGUI.listBox(self.mainBox, self, callback = self.individualSelected) self.attrList.setItemDelegate(CustomListItemDelegate(self.attrList)) self.attrList.setFixedWidth(300) self.defaultMethodChanged() OWGUI.separator(self.mainBox, width=10) box = OWGUI.radioButtonsInBox(OWGUI.widgetBox(self.mainBox), self, "indiDiscretization", [], callback=[self.clearLineEditFocus, self.indiMethodChanged]) #hbbox = OWGUI.widgetBox(box) #hbbox.layout().setSpacing(4) for meth in self.indiDiscretizationMethods[:-1]: OWGUI.appendRadioButton(box, self, "indiDiscretization", meth) self.needsDiscrete.append(box.buttons[2]) self.indiInterBox = OWGUI.indentedBox(box, sep=indent, orientation = "horizontal") OWGUI.widgetLabel(self.indiInterBox, "Num. of intervals: ") self.indiIntervalSlider = OWGUI.hSlider(self.indiInterBox, self, "indiIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.indiMethodChanged], width = 100) OWGUI.rubber(self.indiInterBox) OWGUI.appendRadioButton(box, self, "indiDiscretization", self.indiDiscretizationMethods[-1]) #OWGUI.rubber(hbbox) #OWGUI.separator(box) #hbbox = OWGUI.widgetBox(box) for i in range(3): hbox = OWGUI.widgetBox(box, orientation = "horizontal") OWGUI.appendRadioButton(box, self, "indiDiscretization", "Custom %i" % (i+1) + " ", insertInto = hbox) le = OWGUI.lineEdit(hbox, self, "", callback = lambda w=i: self.customChanged(w), focusInCallback = lambda w=i: self.customSelected(w)) le.setFixedWidth(110) self.customLineEdits.append(le) OWGUI.toolButton(hbox, self, "CC", width=30, callback = lambda w=i: self.copyToCustom(w)) OWGUI.rubber(hbox) OWGUI.rubber(box) #self.controlArea.setFixedWidth(0) self.contAttrIcon = self.createAttributeIconDict()[orange.VarTypes.Continuous] self.setAllIndividuals()
# Description: Shows how to use probability estimators with measure of attribute quality # Category: attribute quality # Classes: MeasureAttribute, MeasureAttribute_info, ProbabilityEstimatorConstructor_m, ConditionalProbabilityEstimatorConstructor_ByRows # Uses: lenses # Referenced: MeasureAttribute.htm import orange data = orange.ExampleTable("lenses") ms = (0, 2, 5, 10, 20) measures = [] for m in ms: meas = orange.MeasureAttribute_info() meas.estimatorConstructor = orange.ProbabilityEstimatorConstructor_m(m=m) meas.conditionalEstimatorConstructor = orange.ConditionalProbabilityEstimatorConstructor_ByRows( ) meas.conditionalEstimatorConstructor.estimatorConstructor = meas.estimatorConstructor measures.append(meas) print "%15s\t%5i\t%5i\t%5i\t%5i\t%5i\t" % (("attr", ) + ms) for attr in data.domain.attributes: print "%15s\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t%5.3f" % ( (attr.name, ) + tuple([meas(attr, data) for meas in measures]))