def __call__(self, examples, weight=0): if not self.learner: self.learner = self.instance() if not hasattr(self, "split") and not hasattr(self, "measure"): if examples.domain.classVar.varType == orange.VarTypes.Discrete: measure = orange.MeasureAttribute_gainRatio() else: measure = orange.MeasureAttribute_MSE() self.learner.split.continuousSplitConstructor.measure = measure self.learner.split.discreteSplitConstructor.measure = measure tree = self.learner(examples, weight) if getattr(self, "sameMajorityPruning", 0): tree = orange.TreePruner_SameMajority(tree) if getattr(self, "mForPruning", 0): tree = orange.TreePruner_m(tree, m=self.mForPruning) return tree
import orange, orngTree, orngStat, orngWrap learner = orngTree.TreeLearner() data = orange.ExampleTable("voting") tuner = orngWrap.TuneMParameters(object=learner, parameters=[ ("minSubset", [2, 5, 10, 20]), ("measure", [ orange.MeasureAttribute_gainRatio(), orange.MeasureAttribute_gini() ]) ], evaluate=orngStat.AUC) classifier = tuner(data)
meas.unknownsTreatment = meas.UnknownsToCommon print fstr % (("- unknowns to common:", ) + tuple([meas(i, data2) for i in range(attrs)])) meas.unknownsTreatment = meas.UnknownsAsValue print fstr % (("- unknowns as value:", ) + tuple([meas(i, data2) for i in range(attrs)])) print print "Information gain" printVariants(orange.MeasureAttribute_info()) print "Gain ratio" printVariants(orange.MeasureAttribute_gainRatio()) print "Gini index" printVariants(orange.MeasureAttribute_gini()) print "Relief" meas = orange.MeasureAttribute_relief() print fstr % ( ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)])) print fstr % ( ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)])) print print "Cost matrix ((0, 5), (1, 0))" meas = orange.MeasureAttribute_cost() meas.cost = ((0, 5), (1, 0))
import orange, orngMisc data = orange.ExampleTable("lymphography") findBest = orngMisc.BestOnTheFly(orngMisc.compare2_firstBigger) for attr in data.domain.attributes: findBest.candidate((orange.MeasureAttribute_gainRatio(attr, data), attr)) print "%5.3f: %s" % findBest.winner() findBest = orngMisc.BestOnTheFly(callCompareOn1st = True) for attr in data.domain.attributes: findBest.candidate((orange.MeasureAttribute_gainRatio(attr, data), attr)) print "%5.3f: %s" % findBest.winner() findBest = orngMisc.BestOnTheFly() for attr in data.domain.attributes: findBest.candidate(orange.MeasureAttribute_gainRatio(attr, data)) bestIndex = findBest.winnerIndex() print "%5.3f: %s" % (findBest.winner(), data.domain[bestIndex])
def instance(self): learner = orange.TreeLearner() hasSplit = hasattr(self, "split") if hasSplit: learner.split = self.split else: learner.split = orange.TreeSplitConstructor_Combined() learner.split.continuousSplitConstructor = orange.TreeSplitConstructor_Threshold( ) binarization = getattr(self, "binarization", 0) if binarization == 1: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_ExhaustiveBinary( ) elif binarization == 2: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_OneAgainstOthers( ) else: learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_Attribute( ) measures = { "infoGain": orange.MeasureAttribute_info, "gainRatio": orange.MeasureAttribute_gainRatio, "gini": orange.MeasureAttribute_gini, "relief": orange.MeasureAttribute_relief, "retis": orange.MeasureAttribute_MSE } measure = getattr(self, "measure", None) if type(measure) == str: measure = measures[measure]() if not hasSplit and not measure: measure = orange.MeasureAttribute_gainRatio() measureIsRelief = type(measure) == orange.MeasureAttribute_relief relM = getattr(self, "reliefM", None) if relM and measureIsRelief: measure.m = relM relK = getattr(self, "reliefK", None) if relK and measureIsRelief: measure.k = relK learner.split.continuousSplitConstructor.measure = measure learner.split.discreteSplitConstructor.measure = measure wa = getattr(self, "worstAcceptable", 0) if wa: learner.split.continuousSplitConstructor.worstAcceptable = wa learner.split.discreteSplitConstructor.worstAcceptable = wa ms = getattr(self, "minSubset", 0) if ms: learner.split.continuousSplitConstructor.minSubset = ms learner.split.discreteSplitConstructor.minSubset = ms if hasattr(self, "stop"): learner.stop = self.stop else: learner.stop = orange.TreeStopCriteria_common() mm = getattr(self, "maxMajority", 1.0) if mm < 1.0: learner.stop.maxMajority = self.maxMajority me = getattr(self, "minExamples", 0) if me: learner.stop.minExamples = self.minExamples for a in [ "storeDistributions", "storeContingencies", "storeExamples", "storeNodeClassifier", "nodeLearner", "maxDepth" ]: if hasattr(self, a): setattr(learner, a, getattr(self, a)) return learner
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None): if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete: return ([attr.name for attr in data.domain.attributes], [], 0) shown = [data.domain.classVar.name] hidden = [] maxIndex = 0 # initialize outputs # # both are RELIEF if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF": attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief()) s, h = getTopAttrs(attrVals, 0.95) return (shown + s, hidden + h, 0) # # both are NONE elif attrContOrder == "None" and attrDiscOrder == "None": for item in data.domain.attributes: shown.append(item.name) return (shown, hidden, 0) # disc and cont attribute list discAttrs = [] contAttrs = [] for attr in data.domain.attributes: if attr.varType == orange.VarTypes.Continuous: contAttrs.append(attr.name) elif attr.varType == orange.VarTypes.Discrete: discAttrs.append(attr.name) ############################### # sort continuous attributes if attrContOrder == "None": shown += contAttrs elif attrContOrder in [ "ReliefF", "Fisher discriminant", "Signal to Noise", "Signal to Noise For Each Class" ]: if attrContOrder == "ReliefF": measure = orange.MeasureAttribute_relief(k=10, m=50) elif attrContOrder == "Fisher discriminant": measure = MeasureFisherDiscriminant() elif attrContOrder == "Signal to Noise": measure = S2NMeasure() else: measure = S2NMeasureMix() dataNew = data.select(contAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h else: print "Unknown value for attribute order: ", attrContOrder # ############################### # sort discrete attributes if attrDiscOrder == "None": shown += discAttrs elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF": if attrDiscOrder == "GainRatio": measure = orange.MeasureAttribute_gainRatio() elif attrDiscOrder == "Gini": measure = orange.MeasureAttribute_gini() else: measure = orange.MeasureAttribute_relief() dataNew = data.select(discAttrs + [data.domain.classVar]) attrVals = orngFSS.attMeasure(dataNew, measure) s, h = getTopAttrs(attrVals, 0.95) shown += s hidden += h elif attrDiscOrder == "Oblivious decision graphs": #shown.append(data.domain.classVar.name) attrs = getFunctionalList(data) for item in attrs: shown.append(item) for attr in data.domain.attributes: if attr.name not in shown and attr.varType == orange.VarTypes.Discrete: hidden.append(attr.name) else: print "Unknown value for attribute order: ", attrDiscOrder return (shown, hidden, maxIndex)
def __call__(self, gen, weightID=0): selectBest = orngMisc.BestOnTheFly() for attr in gen.domain.attributes: selectBest.candidate(self.measure(attr, gen, None, weightID)) bestAttr = gen.domain.attributes[selectBest.winnerIndex()] classifier = orange.ClassifierByLookupTable(gen.domain.classVar, bestAttr) contingency = orange.ContingencyAttrClass(bestAttr, gen, weightID) for i in range(len(contingency)): classifier.lookupTable[i] = contingency[i].modus() classifier.distributions[i] = contingency[i] classifier.lookupTable[-1] = contingency.innerDistribution.modus() classifier.distributions[-1] = contingency.innerDistribution for d in classifier.distributions: d.normalize() return classifier oal = OneAttributeLearner(orange.MeasureAttribute_gainRatio()) c = oal(tab) print c.variable print c.variable.values print c.lookupTable print c.distributions for ex in tab: print "%s ---> %s" % (ex, c(ex))