예제 #1
0
    def __call__(self, data, weight=0):
        import orngWrap

        type = getattr(self, "type", "auto")

        if hasattr(self, "boundsize"):
            if type(self) == int:
                subgen = orange.SubsetsGenerator_constSize(B=self.boundsize)
            else:
                subgen = orange.SubsetsGenerator_minMaxSize(
                    min=self.boundsize[0], max=self.boundsize[1])
        else:
            subgen = orange.SubsetsGenerator_constSize(B=2)

        if type == "auto":
            im = orange.IMBySorting(data, [])
            if im.fuzzy():
                type = "error"
            else:
                type = "complexity"

        inducer = StructureInducer(
            removeDuplicates=1,
            redundancyRemover=AttributeRedundanciesRemover(),
            learnerForUnknown=orange.MajorityLearner())

        if type == "complexity":
            inducer.featureInducer = FeatureByMinComplexity()
            return inducer(data, weight)

        elif type == "error":
            ms = getattr(
                self, "m",
                orange.frange(0.1) + orange.frange(1.2, 3.0, 0.2) +
                orange.frange(4.0, 10.0, 1.0))

            inducer.redundancyRemover.inducer = inducer.featureInducer = FeatureByMinError(
            )

            # it's the same object for redundancy remover and the real inducer, so we can tune just one
            return orngWrap.Tune1Parameter(
                parameter="featureInducer.m",
                values=ms,
                object=inducer,
                returnWhat=orngWrap.Tune1Parameter.returnClassifier)(data,
                                                                     weight)

            print(inducer.featureInducer.m,
                  inducer.redundancyRemover.inducer.m)
            return inducer(data, weight)
예제 #2
0
def learningCurveWithTestData(
        learners,
        learnset,
        testset,
        times=10,
        proportions=orange.frange(0.1),
        strat=orange.MakeRandomIndices.StratifiedIfPossible,
        pps=[],
        **argkw):
    verb = argkw.get("verbose", 0)

    learnset, learnweight = demangleExamples(learnset)
    testweight = demangleExamples(testset)[1]

    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get(
        "randseed", 0) or argkw.get("randomGenerator", 0)
    pick = orange.MakeRandomIndices2(stratified=strat,
                                     randomGenerator=randomGenerator)
    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)
        testResults = ExperimentResults(
            times, [l.name for l in learners],
            testset.domain.classVar.values.native(), testweight != 0,
            testset.domain.classVar.baseValue)
        testResults.results = []

        for t in range(times):
            printVerbose("  repetition %d" % t, verb)
            learnAndTestOnTestData(learners, (learnset.selectref(
                pick(learnset, p), 0), learnweight), testset, testResults, t)

        allResults.append(testResults)

    return allResults
예제 #3
0
def learningCurveN(learners,
                   examples,
                   folds=10,
                   strat=orange.MakeRandomIndices.StratifiedIfPossible,
                   proportions=orange.frange(0.1),
                   pps=[],
                   **argkw):
    """construct a learning curve for learners"""
    seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
    if seed:
        randomGenerator = orange.RandomGenerator(seed)
    else:
        randomGenerator = argkw.get("randomGenerator",
                                    orange.RandomGenerator())

    if strat:
        cv = orange.MakeRandomIndicesCV(folds=folds,
                                        stratified=strat,
                                        randomGenerator=randomGenerator)
        pick = orange.MakeRandomIndices2(stratified=strat,
                                         randomGenerator=randomGenerator)
    else:
        cv = orange.RandomIndicesCV(folds=folds,
                                    stratified=strat,
                                    randomGenerator=randomGenerator)
        pick = orange.RandomIndices2(stratified=strat,
                                     randomGenerator=randomGenerator)
    return learningCurve(*(learners, examples, cv, pick, proportions, pps),
                         **argkw)
예제 #4
0
  def __call__(self, data, weight=0):
    import orngWrap
    
    type=getattr(self, "type", "auto")

    if hasattr(self, "boundsize"):
      if type(self)==int:
        subgen=orange.SubsetsGenerator_constSize(B = self.boundsize)
      else:
        subgen=orange.SubsetsGenerator_minMaxSize(min = self.boundsize[0], max = self.boundsize[1])
    else:
        subgen=orange.SubsetsGenerator_constSize(B = 2)
        

    if type=="auto":
      im=orange.IMBySorting(data, [])
      if im.fuzzy():
        type="error"
      else:
        type="complexity"

    inducer=StructureInducer(removeDuplicates = 1,
                             redundancyRemover = AttributeRedundanciesRemover(),
                             learnerForUnknown = orange.MajorityLearner()
                           )

    if type=="complexity":
      inducer.featureInducer = FeatureByMinComplexity()
      return inducer(data, weight)

    elif type=="error":
      ms=getattr(self, "m", orange.frange(0.1)+orange.frange(1.2, 3.0, 0.2)+orange.frange(4.0, 10.0, 1.0))
    
      inducer.redundancyRemover.inducer=inducer.featureInducer = FeatureByMinError()

      # it's the same object for redundancy remover and the real inducer, so we can tune just one
      return orngWrap.Tune1Parameter(
          parameter = "featureInducer.m",
          values = ms,
          object = inducer,
          returnWhat = orngWrap.Tune1Parameter.returnClassifier
      )(data, weight)
      
      print inducer.featureInducer.m, inducer.redundancyRemover.inducer.m
      return inducer(data, weight)
예제 #5
0
def learningCurve(learners,
                  examples,
                  cv=None,
                  pick=None,
                  proportions=orange.frange(0.1),
                  pps=[],
                  **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    callback = argkw.get("callback", 0)

    for pp in pps:
        if pp[0] != "L":
            raise SystemError("cannot preprocess testing examples")

    if not cv or not pick:
        seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
        if seed:
            randomGenerator = orange.RandomGenerator(seed)
        else:
            randomGenerator = argkw.get("randomGenerator",
                                        orange.RandomGenerator())
        if not cv:
            cv = orange.MakeRandomIndicesCV(
                folds=10,
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)
        if not pick:
            pick = orange.MakeRandomIndices2(
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)

    examples, weight = demangleExamples(examples)
    folds = cv(examples)
    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    nLrn = len(learners)

    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)

        if (cv.randseed < 0) or (pick.randseed < 0):
            cache = 0
        else:
            fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % (
                "%s", p, cv.randseed, pick.randseed, ppsp, ccsum)
            if "*" in fnstr:
                cache = 0

        conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
        testResults = ExperimentResults(
            cv.folds, [l.name for l in learners],
            examples.domain.classVar.values.native(), weight != 0,
            examples.domain.classVar.baseValue)
        testResults.results = [
            TestedExample(folds[i], conv(examples[i].getclass()), nLrn,
                          examples[i].getweight(weight))
            for i in range(len(examples))
        ]

        if cache and testResults.loadFromFiles(learners, fnstr):
            printVerbose("  loaded from cache", verb)
        else:
            for fold in range(cv.folds):
                printVerbose("  fold %d" % fold, verb)

                # learning
                learnset = examples.selectref(folds, fold, negate=1)
                learnset = learnset.selectref(pick(learnset, p0=p), 0)
                if not len(learnset):
                    continue

                for pp in pps:
                    learnset = pp[1](learnset)

                classifiers = [None] * nLrn
                for i in range(nLrn):
                    if not cache or not testResults.loaded[i]:
                        classifiers[i] = learners[i](learnset, weight)

                # testing
                for i in range(len(examples)):
                    if (folds[i] == fold):
                        # This is to prevent cheating:
                        ex = orange.Example(examples[i])
                        ex.setclass("?")
                        for cl in range(nLrn):
                            if not cache or not testResults.loaded[cl]:
                                cls, pro = classifiers[cl](ex, orange.GetBoth)
                                testResults.results[i].setResult(cl, cls, pro)
                if callback: callback()
            if cache:
                testResults.saveToFiles(learners, fnstr)

        allResults.append(testResults)

    return allResults
예제 #6
0
res = orngTest.proportionTest(learners, data, 0.7, 100, storeClassifiers=1)
print "#iter %i, #classifiers %i" % (len(
    res.classifiers), len(res.classifiers[0]))
print

##print "\nLearning with 100% class noise"
##classnoise = orange.Preprocessor_addClassNoise(proportion=1.0)
##res = orngTest.proportionTest(learners, data, 0.7, 100, pps = [("L", classnoise)])
##printResults(res)

print "\nGood old 10-fold cross validation"
res = orngTest.crossValidation(learners, data)
printResults(res)

print "\nLearning curve"
prop = orange.frange(0.2, 1.0, 0.2)
res = orngTest.learningCurveN(learners, data, folds=5, proportions=prop)
for i in range(len(prop)):
    print "%5.3f:" % prop[i],
    printResults(res[i])

print "\nLearning curve with pre-separated data"
indices = orange.MakeRandomIndices2(data, p0=0.7)
train = data.select(indices, 0)
test = data.select(indices, 1)
res = orngTest.learningCurveWithTestData(learners,
                                         train,
                                         test,
                                         times=5,
                                         proportions=prop)
for i in range(len(prop)):
예제 #7
0
res = orngTest.proportionTest(learners, data, 0.7, 100, storeClassifiers = 1)
print "#iter %i, #classifiers %i" % (len(res.classifiers), len(res.classifiers[0]) if len(res.classifiers) > 0 else -1)
print

##print "\nLearning with 100% class noise"
##classnoise = orange.Preprocessor_addClassNoise(proportion=1.0)
##res = orngTest.proportionTest(learners, data, 0.7, 100, pps = [("L", classnoise)])
##printResults(res)

print "\nGood old 10-fold cross validation"
res = orngTest.crossValidation(learners, data)
printResults(res)


print "\nLearning curve"
prop = orange.frange(0.2, 1.0, 0.2)
res = orngTest.learningCurveN(learners, data, folds = 5, proportions = prop)
for i in range(len(prop)):
    print "%5.3f:" % prop[i],
    printResults(res[i])

print "\nLearning curve with pre-separated data"
indices = orange.MakeRandomIndices2(data, p0 = 0.7)
train = data.select(indices, 0)
test = data.select(indices, 1)
res = orngTest.learningCurveWithTestData(learners, train, test, times = 5, proportions = prop)
for i in range(len(prop)):
    print "%5.3f:" % prop[i],
    printResults(res[i])