예제 #1
0
    def __call__(self, examples, weight=0):
        if not self.learner:
            self.learner = self.instance()
        if not hasattr(self, "split") and not hasattr(self, "measure"):
            if examples.domain.classVar.varType == orange.VarTypes.Discrete:
                measure = orange.MeasureAttribute_gainRatio()
            else:
                measure = orange.MeasureAttribute_MSE()
            self.learner.split.continuousSplitConstructor.measure = measure
            self.learner.split.discreteSplitConstructor.measure = measure

        tree = self.learner(examples, weight)
        if getattr(self, "sameMajorityPruning", 0):
            tree = orange.TreePruner_SameMajority(tree)
        if getattr(self, "mForPruning", 0):
            tree = orange.TreePruner_m(tree, m=self.mForPruning)
        return tree
예제 #2
0
import orange, orngTree, orngStat, orngWrap

learner = orngTree.TreeLearner()
data = orange.ExampleTable("voting")
tuner = orngWrap.TuneMParameters(object=learner,
                                 parameters=[
                                     ("minSubset", [2, 5, 10, 20]),
                                     ("measure", [
                                         orange.MeasureAttribute_gainRatio(),
                                         orange.MeasureAttribute_gini()
                                     ])
                                 ],
                                 evaluate=orngStat.AUC)
classifier = tuner(data)
    meas.unknownsTreatment = meas.UnknownsToCommon
    print fstr % (("- unknowns to common:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))

    meas.unknownsTreatment = meas.UnknownsAsValue
    print fstr % (("- unknowns as value:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))
    print


print "Information gain"
printVariants(orange.MeasureAttribute_info())

print "Gain ratio"
printVariants(orange.MeasureAttribute_gainRatio())

print "Gini index"
printVariants(orange.MeasureAttribute_gini())

print "Relief"
meas = orange.MeasureAttribute_relief()
print fstr % (
    ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)]))
print fstr % (
    ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)]))
print

print "Cost matrix ((0, 5), (1, 0))"
meas = orange.MeasureAttribute_cost()
meas.cost = ((0, 5), (1, 0))
예제 #4
0
import orange, orngMisc

data = orange.ExampleTable("lymphography")

findBest = orngMisc.BestOnTheFly(orngMisc.compare2_firstBigger)

for attr in data.domain.attributes:
    findBest.candidate((orange.MeasureAttribute_gainRatio(attr, data), attr))

print "%5.3f: %s" % findBest.winner()


findBest = orngMisc.BestOnTheFly(callCompareOn1st = True)
for attr in data.domain.attributes:
    findBest.candidate((orange.MeasureAttribute_gainRatio(attr, data), attr))

print "%5.3f: %s" % findBest.winner()

findBest = orngMisc.BestOnTheFly()

for attr in data.domain.attributes:
    findBest.candidate(orange.MeasureAttribute_gainRatio(attr, data))

bestIndex = findBest.winnerIndex()
print "%5.3f: %s" % (findBest.winner(), data.domain[bestIndex])
예제 #5
0
    def instance(self):
        learner = orange.TreeLearner()

        hasSplit = hasattr(self, "split")
        if hasSplit:
            learner.split = self.split
        else:
            learner.split = orange.TreeSplitConstructor_Combined()
            learner.split.continuousSplitConstructor = orange.TreeSplitConstructor_Threshold(
            )
            binarization = getattr(self, "binarization", 0)
            if binarization == 1:
                learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_ExhaustiveBinary(
                )
            elif binarization == 2:
                learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_OneAgainstOthers(
                )
            else:
                learner.split.discreteSplitConstructor = orange.TreeSplitConstructor_Attribute(
                )

            measures = {
                "infoGain": orange.MeasureAttribute_info,
                "gainRatio": orange.MeasureAttribute_gainRatio,
                "gini": orange.MeasureAttribute_gini,
                "relief": orange.MeasureAttribute_relief,
                "retis": orange.MeasureAttribute_MSE
            }

            measure = getattr(self, "measure", None)
            if type(measure) == str:
                measure = measures[measure]()
            if not hasSplit and not measure:
                measure = orange.MeasureAttribute_gainRatio()

            measureIsRelief = type(measure) == orange.MeasureAttribute_relief
            relM = getattr(self, "reliefM", None)
            if relM and measureIsRelief:
                measure.m = relM

            relK = getattr(self, "reliefK", None)
            if relK and measureIsRelief:
                measure.k = relK

            learner.split.continuousSplitConstructor.measure = measure
            learner.split.discreteSplitConstructor.measure = measure

            wa = getattr(self, "worstAcceptable", 0)
            if wa:
                learner.split.continuousSplitConstructor.worstAcceptable = wa
                learner.split.discreteSplitConstructor.worstAcceptable = wa

            ms = getattr(self, "minSubset", 0)
            if ms:
                learner.split.continuousSplitConstructor.minSubset = ms
                learner.split.discreteSplitConstructor.minSubset = ms

        if hasattr(self, "stop"):
            learner.stop = self.stop
        else:
            learner.stop = orange.TreeStopCriteria_common()
            mm = getattr(self, "maxMajority", 1.0)
            if mm < 1.0:
                learner.stop.maxMajority = self.maxMajority
            me = getattr(self, "minExamples", 0)
            if me:
                learner.stop.minExamples = self.minExamples

        for a in [
                "storeDistributions", "storeContingencies", "storeExamples",
                "storeNodeClassifier", "nodeLearner", "maxDepth"
        ]:
            if hasattr(self, a):
                setattr(learner, a, getattr(self, a))

        return learner
예제 #6
0
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None):
    if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete:
        return ([attr.name for attr in data.domain.attributes], [], 0)

    shown = [data.domain.classVar.name]
    hidden = []
    maxIndex = 0  # initialize outputs

    # # both are RELIEF
    if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF":
        attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief())
        s, h = getTopAttrs(attrVals, 0.95)
        return (shown + s, hidden + h, 0)

    # # both are NONE
    elif attrContOrder == "None" and attrDiscOrder == "None":
        for item in data.domain.attributes:
            shown.append(item.name)
        return (shown, hidden, 0)

    # disc and cont attribute list
    discAttrs = []
    contAttrs = []
    for attr in data.domain.attributes:
        if attr.varType == orange.VarTypes.Continuous:
            contAttrs.append(attr.name)
        elif attr.varType == orange.VarTypes.Discrete:
            discAttrs.append(attr.name)

    ###############################
    # sort continuous attributes
    if attrContOrder == "None":
        shown += contAttrs
    elif attrContOrder in [
            "ReliefF", "Fisher discriminant", "Signal to Noise",
            "Signal to Noise For Each Class"
    ]:
        if attrContOrder == "ReliefF":
            measure = orange.MeasureAttribute_relief(k=10, m=50)
        elif attrContOrder == "Fisher discriminant":
            measure = MeasureFisherDiscriminant()
        elif attrContOrder == "Signal to Noise":
            measure = S2NMeasure()
        else:
            measure = S2NMeasureMix()

        dataNew = data.select(contAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h
    else:
        print "Unknown value for attribute order: ", attrContOrder

    # ###############################
    # sort discrete attributes
    if attrDiscOrder == "None":
        shown += discAttrs
    elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF":
        if attrDiscOrder == "GainRatio":
            measure = orange.MeasureAttribute_gainRatio()
        elif attrDiscOrder == "Gini":
            measure = orange.MeasureAttribute_gini()
        else:
            measure = orange.MeasureAttribute_relief()

        dataNew = data.select(discAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h

    elif attrDiscOrder == "Oblivious decision graphs":
        #shown.append(data.domain.classVar.name)
        attrs = getFunctionalList(data)
        for item in attrs:
            shown.append(item)
        for attr in data.domain.attributes:
            if attr.name not in shown and attr.varType == orange.VarTypes.Discrete:
                hidden.append(attr.name)
    else:
        print "Unknown value for attribute order: ", attrDiscOrder

    return (shown, hidden, maxIndex)
예제 #7
0
    def __call__(self, gen, weightID=0):
        selectBest = orngMisc.BestOnTheFly()
        for attr in gen.domain.attributes:
            selectBest.candidate(self.measure(attr, gen, None, weightID))
        bestAttr = gen.domain.attributes[selectBest.winnerIndex()]
        classifier = orange.ClassifierByLookupTable(gen.domain.classVar,
                                                    bestAttr)

        contingency = orange.ContingencyAttrClass(bestAttr, gen, weightID)
        for i in range(len(contingency)):
            classifier.lookupTable[i] = contingency[i].modus()
            classifier.distributions[i] = contingency[i]
        classifier.lookupTable[-1] = contingency.innerDistribution.modus()
        classifier.distributions[-1] = contingency.innerDistribution
        for d in classifier.distributions:
            d.normalize()

        return classifier


oal = OneAttributeLearner(orange.MeasureAttribute_gainRatio())
c = oal(tab)

print c.variable
print c.variable.values
print c.lookupTable
print c.distributions

for ex in tab:
    print "%s ---> %s" % (ex, c(ex))