import orange, orngTree, orngStat, orngWrap learner = orngTree.TreeLearner() data = orange.ExampleTable("voting") tuner = orngWrap.TuneMParameters(object=learner, parameters=[ ("minSubset", [2, 5, 10, 20]), ("measure", [ orange.MeasureAttribute_gainRatio(), orange.MeasureAttribute_gini() ]) ], evaluate=orngStat.AUC) classifier = tuner(data)
def __init__(self, learner=None, trees=100, attributes=None, name='Random Forest', rand=None, callback=None): """random forest learner""" self.trees = trees self.name = name self.learner = learner self.attributes = attributes self.callback = callback if rand: self.rand = rand else: self.rand = random.Random() self.rand.seed(0) self.randstate = self.rand.getstate() #original state if not learner: # tree learner assembled as suggested by Brieman (2001) smallTreeLearner = orngTree.TreeLearner(storeNodeClassifier=0, storeContingencies=0, storeDistributions=1, minExamples=5).instance() smallTreeLearner.split.discreteSplitConstructor.measure = smallTreeLearner.split.continuousSplitConstructor.measure = orange.MeasureAttribute_gini( ) smallTreeLearner.split = SplitConstructor_AttributeSubset( smallTreeLearner.split, attributes, self.rand) self.learner = smallTreeLearner
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None): if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete: return ([attr.name for attr in data.domain.attributes], [], 0) shown = [data.domain.classVar.name] hidden = [] maxIndex = 0 # initialize outputs # # both are RELIEF if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF": attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief()) s, h = getTopAttrs(attrVals, 0.95) return (shown + s, hidden + h, 0) # # both are NONE elif attrContOrder == "None" and attrDiscOrder == "None": for item in data.domain.attributes: shown.append(item.name) return (shown, hidden, 0) # disc and cont attribute list discAttrs = [] contAttrs = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name) elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name) ############################### # sort continuous attributes if attrContOrder == "None": shown += contAttrs elif attrContOrder in [ "ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class" ]: if attrContOrder == "ReliefF": measure = orange.MeasureAttribute_relief(k=10, m=50) elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant() elif attrContOrder == "Signal to Noise": measure = S2NMeasure() else: measure = S2NMeasureMix() dataNew = data.select(contAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h else: print "Unknown value for attribute order: ", attrContOrder # ############################### # sort discrete attributes if attrDiscOrder == "None": shown += discAttrs elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF": if attrDiscOrder == "GainRatio": measure = orange.MeasureAttribute_gainRatio() elif attrDiscOrder == "Gini": measure = orange.MeasureAttribute_gini() else: measure = orange.MeasureAttribute_relief() dataNew = data.select(discAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h elif attrDiscOrder == "Oblivious decision graphs": #shown.append(data.domain.classVar.name) attrs = getFunctionalList(data) for item in attrs: shown.append(item) for attr in data.domain.attributes: if attr.name not in shown and attr.varType == orange.VarTypes.Discrete: hidden.append(attr.name) else: print "Unknown value for attribute order: ", attrDiscOrder return (shown, hidden, maxIndex)
tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.UnknownsAsValue print fstr % (("- unknowns as value:", ) + tuple([meas(i, data2) for i in range(attrs)])) print print "Information gain" printVariants(orange.MeasureAttribute_info()) print "Gain ratio" printVariants(orange.MeasureAttribute_gainRatio()) print "Gini index" printVariants(orange.MeasureAttribute_gini()) print "Relief" meas = orange.MeasureAttribute_relief() print fstr % ( ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)])) print fstr % ( ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)])) print print "Cost matrix ((0, 5), (1, 0))" meas = orange.MeasureAttribute_cost() meas.cost = ((0, 5), (1, 0)) printVariants(meas) print "Relevance"
def __init__(self, parent=None, signalManager=None, name="Interactive Discretization"): OWWidget.__init__(self, parent, signalManager, name) self.showBaseLine=1 self.showLookaheadLine=1 self.showTargetClassProb=1 self.showRug=0 self.snap=1 self.measure=0 self.targetClass=0 self.discretization = self.classDiscretization = self.indiDiscretization = 1 self.intervals = self.classIntervals = self.indiIntervals = 3 self.outputOriginalClass = True self.indiData = [] self.indiLabels = [] self.resetIndividuals = 0 self.customClassSplits = "" self.selectedAttr = 0 self.customSplits = ["", "", ""] self.autoApply = True self.dataChanged = False self.autoSynchronize = True self.pointsChanged = False self.customLineEdits = [] self.needsDiscrete = [] self.data = self.originalData = None self.loadSettings() self.inputs=[("Data", ExampleTable, self.setData)] self.outputs=[("Data", ExampleTable)] self.measures=[("Information gain", orange.MeasureAttribute_info()), #("Gain ratio", orange.MeasureAttribute_gainRatio), ("Gini", orange.MeasureAttribute_gini()), ("chi-square", orange.MeasureAttribute_chiSquare()), ("chi-square prob.", orange.MeasureAttribute_chiSquare(computeProbabilities=1)), ("Relevance", orange.MeasureAttribute_relevance()), ("ReliefF", orange.MeasureAttribute_relief())] self.discretizationMethods=["Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove continuous attributes"] self.classDiscretizationMethods=["Equal-frequency discretization", "Equal-width discretization"] self.indiDiscretizationMethods=["Default", "Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove attribute"] self.mainHBox = OWGUI.widgetBox(self.mainArea, orientation=0) vbox = self.controlArea box = OWGUI.radioButtonsInBox(vbox, self, "discretization", self.discretizationMethods[:-1], "Default discretization", callback=[self.clearLineEditFocus, self.defaultMethodChanged]) self.needsDiscrete.append(box.buttons[1]) box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) indent = OWGUI.checkButtonOffsetHint(self.needsDiscrete[-1]) self.interBox = OWGUI.widgetBox(OWGUI.indentedBox(box, sep=indent)) OWGUI.widgetLabel(self.interBox, "Number of intervals (for equal width/frequency)") OWGUI.separator(self.interBox, height=4) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(self.interBox), self, "intervals", None, 2, 10, callback=[self.clearLineEditFocus, self.defaultMethodChanged]) OWGUI.appendRadioButton(box, self, "discretization", self.discretizationMethods[-1]) OWGUI.separator(vbox) ribg = OWGUI.radioButtonsInBox(vbox, self, "resetIndividuals", ["Use default discretization for all attributes", "Explore and set individual discretizations"], "Individual attribute treatment", callback = self.setAllIndividuals) ll = QWidget(ribg) ll.setFixedHeight(1) OWGUI.widgetLabel(ribg, "Set discretization of all attributes to") hcustbox = OWGUI.widgetBox(OWGUI.indentedBox(ribg), 0, 0) for c in range(1, 4): OWGUI.appendRadioButton(ribg, self, "resetIndividuals", "Custom %i" % c, insertInto = hcustbox) OWGUI.separator(vbox) box = self.classDiscBox = OWGUI.radioButtonsInBox(vbox, self, "classDiscretization", self.classDiscretizationMethods, "Class discretization", callback=[self.clearLineEditFocus, self.classMethodChanged]) cinterBox = OWGUI.widgetBox(box) self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(cinterBox, sep=indent), self, "classIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.classMethodChanged], label="Number of intervals") hbox = OWGUI.widgetBox(box, orientation = 0) OWGUI.appendRadioButton(box, self, "discretization", "Custom" + " ", insertInto = hbox) self.classCustomLineEdit = OWGUI.lineEdit(hbox, self, "customClassSplits", callback = self.classCustomChanged, focusInCallback = self.classCustomSelected) # Can't validate - need to allow spaces box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed)) OWGUI.separator(box) self.classIntervalsLabel = OWGUI.widgetLabel(box, "Current splits: ") OWGUI.separator(box) OWGUI.checkBox(box, self, "outputOriginalClass", "Output original class", callback = self.commitIf) OWGUI.widgetLabel(box, "("+"Widget always uses discretized class internally."+")") OWGUI.separator(vbox) #OWGUI.rubber(vbox) box = OWGUI.widgetBox(vbox, "Commit") applyButton = OWGUI.button(box, self, "Commit", callback = self.commit, default=True) autoApplyCB = OWGUI.checkBox(box, self, "autoApply", "Commit automatically", callback=[self.clearLineEditFocus]) OWGUI.setStopper(self, applyButton, autoApplyCB, "dataChanged", self.commit) OWGUI.rubber(vbox) #self.mainSeparator = OWGUI.separator(self.mainHBox, width=25) # space between control and main area self.mainIABox = OWGUI.widgetBox(self.mainHBox, "Individual attribute settings") self.mainBox = OWGUI.widgetBox(self.mainIABox, orientation=0) OWGUI.separator(self.mainIABox)#, height=30) graphBox = OWGUI.widgetBox(self.mainIABox, "", orientation=0) # self.needsDiscrete.append(graphBox) graphOptBox = OWGUI.widgetBox(graphBox) OWGUI.separator(graphBox, width=10) graphGraphBox = OWGUI.widgetBox(graphBox) self.graph = DiscGraph(self, graphGraphBox) graphGraphBox.layout().addWidget(self.graph) reportButton2 = OWGUI.button(graphGraphBox, self, "Report Graph", callback = self.reportGraph, debuggingEnabled=0) #graphOptBox.layout().setSpacing(4) box = OWGUI.widgetBox(graphOptBox, "Split gain measure", addSpace=True) self.measureCombo=OWGUI.comboBox(box, self, "measure", orientation=0, items=[e[0] for e in self.measures], callback=[self.clearLineEditFocus, self.graph.invalidateBaseScore, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showBaseLine", "Show discretization gain", callback=[self.clearLineEditFocus, self.graph.plotBaseCurve]) OWGUI.checkBox(box, self, "showLookaheadLine", "Show lookahead gain", callback=self.clearLineEditFocus) self.needsDiscrete.append(box) box = OWGUI.widgetBox(graphOptBox, "Target class", addSpace=True) self.targetCombo=OWGUI.comboBox(box, self, "targetClass", orientation=0, callback=[self.clearLineEditFocus, self.graph.targetClassChanged]) stc = OWGUI.checkBox(box, self, "showTargetClassProb", "Show target class probability", callback=[self.clearLineEditFocus, self.graph.plotProbCurve]) OWGUI.checkBox(box, self, "showRug", "Show rug (may be slow)", callback=[self.clearLineEditFocus, self.graph.plotRug]) self.needsDiscrete.extend([self.targetCombo, stc]) box = OWGUI.widgetBox(graphOptBox, "Editing", addSpace=True) OWGUI.checkBox(box, self, "snap", "Snap to grid", callback=[self.clearLineEditFocus]) syncCB = OWGUI.checkBox(box, self, "autoSynchronize", "Apply on the fly", callback=self.clearLineEditFocus) syncButton = OWGUI.button(box, self, "Apply", callback = self.synchronizePressed) OWGUI.setStopper(self, syncButton, syncCB, "pointsChanged", self.synchronize) OWGUI.rubber(graphOptBox) self.attrList = OWGUI.listBox(self.mainBox, self, callback = self.individualSelected) self.attrList.setItemDelegate(CustomListItemDelegate(self.attrList)) self.attrList.setFixedWidth(300) self.defaultMethodChanged() OWGUI.separator(self.mainBox, width=10) box = OWGUI.radioButtonsInBox(OWGUI.widgetBox(self.mainBox), self, "indiDiscretization", [], callback=[self.clearLineEditFocus, self.indiMethodChanged]) #hbbox = OWGUI.widgetBox(box) #hbbox.layout().setSpacing(4) for meth in self.indiDiscretizationMethods[:-1]: OWGUI.appendRadioButton(box, self, "indiDiscretization", meth) self.needsDiscrete.append(box.buttons[2]) self.indiInterBox = OWGUI.indentedBox(box, sep=indent, orientation = "horizontal") OWGUI.widgetLabel(self.indiInterBox, "Num. of intervals: ") self.indiIntervalSlider = OWGUI.hSlider(self.indiInterBox, self, "indiIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.indiMethodChanged], width = 100) OWGUI.rubber(self.indiInterBox) OWGUI.appendRadioButton(box, self, "indiDiscretization", self.indiDiscretizationMethods[-1]) #OWGUI.rubber(hbbox) #OWGUI.separator(box) #hbbox = OWGUI.widgetBox(box) for i in range(3): hbox = OWGUI.widgetBox(box, orientation = "horizontal") OWGUI.appendRadioButton(box, self, "indiDiscretization", "Custom %i" % (i+1) + " ", insertInto = hbox) le = OWGUI.lineEdit(hbox, self, "", callback = lambda w=i: self.customChanged(w), focusInCallback = lambda w=i: self.customSelected(w)) le.setFixedWidth(110) self.customLineEdits.append(le) OWGUI.toolButton(hbox, self, "CC", width=30, callback = lambda w=i: self.copyToCustom(w)) OWGUI.rubber(hbox) OWGUI.rubber(box) #self.controlArea.setFixedWidth(0) self.contAttrIcon = self.createAttributeIconDict()[orange.VarTypes.Continuous] self.setAllIndividuals()