Example #1
0
def learningCurveWithTestData(
        learners,
        learnset,
        testset,
        times=10,
        proportions=orange.frange(0.1),
        strat=orange.MakeRandomIndices.StratifiedIfPossible,
        pps=[],
        **argkw):
    verb = argkw.get("verbose", 0)

    learnset, learnweight = demangleExamples(learnset)
    testweight = demangleExamples(testset)[1]

    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get(
        "randseed", 0) or argkw.get("randomGenerator", 0)
    pick = orange.MakeRandomIndices2(stratified=strat,
                                     randomGenerator=randomGenerator)
    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)
        testResults = ExperimentResults(
            times, [l.name for l in learners],
            testset.domain.classVar.values.native(), testweight != 0,
            testset.domain.classVar.baseValue)
        testResults.results = []

        for t in range(times):
            printVerbose("  repetition %d" % t, verb)
            learnAndTestOnTestData(learners, (learnset.selectref(
                pick(learnset, p), 0), learnweight), testset, testResults, t)

        allResults.append(testResults)

    return allResults
Example #2
0
def proportionTest(learners,
                   examples,
                   learnProp,
                   times=10,
                   strat=orange.MakeRandomIndices.StratifiedIfPossible,
                   pps=[],
                   callback=None,
                   **argkw):
    """train-and-test evaluation (train on a subset, test on remaing examples)"""
    # randomGenerator is set either to what users provided or to orange.RandomGenerator(0)
    # If we left it None or if we set MakeRandomIndices2.randseed, it would give same indices each time it's called
    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get(
        "randseed", 0) or argkw.get("randomGenerator", 0)
    pick = orange.MakeRandomIndices2(stratified=strat,
                                     p0=learnProp,
                                     randomGenerator=randomGenerator)

    examples, weight = demangleExamples(examples)
    classVar = examples.domain.classVar
    if classVar.varType == orange.VarTypes.Discrete:
        values = list(classVar.values)
        baseValue = classVar.baseValue
    else:
        baseValue = values = None
    testResults = ExperimentResults(times, [l.name for l in learners], values,
                                    weight != 0, baseValue)

    for time in range(times):
        indices = pick(examples)
        learnset = examples.selectref(indices, 0)
        testset = examples.selectref(indices, 1)
        learnAndTestOnTestData(learners, (learnset, weight), (testset, weight),
                               testResults, time, pps, **argkw)
        if callback: callback()
    return testResults
Example #3
0
def learnAndTestOnTestData(learners,
                           learnset,
                           testset,
                           testResults=None,
                           iterationNumber=0,
                           pps=[],
                           callback=None,
                           **argkw):
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)
    storeExamples = argkw.get("storeExamples", 0)

    learnset, learnweight = demangleExamples(learnset)
    testset, testweight = demangleExamples(testset)
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)

    for pp in pps:
        if pp[0] == "B":
            learnset = pp[1](learnset)
            testset = pp[1](testset)

    for pp in pps:
        if pp[0] == "L":
            learnset = pp[1](learnset)
        elif pp[0] == "T":
            testset = pp[1](testset)
        elif pp[0] == "LT":
            learnset, testset = pp[1](learnset, testset)

    classifiers = []
    for learner in learners:
        classifiers.append(learner(learnset, learnweight))
        if callback:
            callback()
    classifiers = [learner(learnset, learnweight) for learner in learners]
    for i in range(len(learners)):
        classifiers[i].name = getattr(learners[i], 'name', 'noname')
    testResults = testOnData(classifiers, (testset, testweight), testResults,
                             iterationNumber, storeExamples)
    if storeclassifiers:
        testResults.classifiers.append(classifiers)
    return testResults
Example #4
0
def testOnData(classifiers,
               testset,
               testResults=None,
               iterationNumber=0,
               storeExamples=False,
               **argkw):
    testset, testweight = demangleExamples(testset)

    if not testResults:
        classVar = testset.domain.classVar
        if testset.domain.classVar.varType == orange.VarTypes.Discrete:
            values = classVar.values.native()
            baseValue = classVar.baseValue
        else:
            values = None
            baseValue = -1
        testResults = ExperimentResults(1, [l.name for l in classifiers],
                                        values, testweight != 0, baseValue)

    examples = getattr(testResults, "examples", False)
    if examples and len(examples):
        # We must not modify an example table we do not own, so we clone it the
        # first time we have to add to it
        if not getattr(testResults, "examplesCloned", False):
            testResults.examples = orange.ExampleTable(testResults.examples)
            testResults.examplesCloned = True
        testResults.examples.extend(testset)
    else:
        # We do not clone at the first iteration - cloning might never be needed at all...
        testResults.examples = testset

    conv = testset.domain.classVar.varType == orange.VarTypes.Discrete and int or float
    for ex in testset:
        te = TestedExample(iterationNumber, conv(ex.getclass()), 0,
                           ex.getweight(testweight))

        for classifier in classifiers:
            # This is to prevent cheating:
            ex2 = orange.Example(ex)
            ex2.setclass("?")
            cr = classifier(ex2, orange.GetBoth)
            te.addResult(cr[0], cr[1])
        testResults.results.append(te)

    return testResults
Example #5
0
def learnAndTestOnLearnData(learners,
                            learnset,
                            testResults=None,
                            iterationNumber=0,
                            pps=[],
                            callback=None,
                            **argkw):
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)
    storeExamples = argkw.get("storeExamples", 0)

    learnset, learnweight = demangleExamples(learnset)

    hasLorT = 0
    for pp in pps:
        if pp[0] == "B":
            learnset = pp[1](learnset)
        else:
            hasLorT = 1

    if hasLorT:
        testset = orange.ExampleTable(learnset)
        for pp in pps:
            if pp[0] == "L":
                learnset = pp[1](learnset)
            elif pp[0] == "T":
                testset = pp[1](testset)
            elif pp[0] == "LT":
                learnset, testset = pp[1](learnset, testset)
    else:
        testset = learnset

    classifiers = []
    for learner in learners:
        classifiers.append(learner(learnset, learnweight))
        if callback:
            callback()
    for i in range(len(learners)):
        classifiers[i].name = getattr(learners[i], "name", "noname")
    testResults = testOnData(classifiers, (testset, learnweight), testResults,
                             iterationNumber, storeExamples)
    if storeclassifiers:
        testResults.classifiers.append(classifiers)
    return testResults
Example #6
0
def crossValidation(learners,
                    examples,
                    folds=10,
                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                    pps=[],
                    indicesrandseed="*",
                    **argkw):
    """cross-validation evaluation of learners"""
    (examples, weight) = demangleExamples(examples)
    if indicesrandseed != "*":
        indices = orange.MakeRandomIndicesCV(examples,
                                             folds,
                                             randseed=indicesrandseed,
                                             stratified=strat)
    else:
        randomGenerator = argkw.get("randseed", 0) or argkw.get(
            "randomGenerator", 0)
        indices = orange.MakeRandomIndicesCV(examples,
                                             folds,
                                             stratified=strat,
                                             randomGenerator=randomGenerator)
    return testWithIndices(learners, (examples, weight), indices,
                           indicesrandseed, pps, **argkw)
Example #7
0
def testWithIndices(learners,
                    examples,
                    indices,
                    indicesrandseed="*",
                    pps=[],
                    callback=None,
                    **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)
    cache = cache and not storeclassifiers

    examples, weight = demangleExamples(examples)
    nLrn = len(learners)

    if not examples:
        raise SystemError("Test data set with no examples")
    if not examples.domain.classVar:
        raise "Test data set without class attribute"


##    for pp in pps:
##        if pp[0]!="L":
##            raise SystemError, "cannot preprocess testing examples"

    nIterations = max(indices) + 1
    if examples.domain.classVar.varType == orange.VarTypes.Discrete:
        values = list(examples.domain.classVar.values)
        basevalue = examples.domain.classVar.baseValue
    else:
        basevalue = values = None

    conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
    testResults = ExperimentResults(nIterations,
                                    [getobjectname(l) for l in learners],
                                    values, weight != 0, basevalue)
    testResults.results = [
        TestedExample(indices[i], conv(examples[i].getclass()), nLrn,
                      examples[i].getweight(weight))
        for i in range(len(examples))
    ]

    if argkw.get("storeExamples", 0):
        testResults.examples = examples

    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    fnstr = "{TestWithIndices}_%s_%s%s-%s" % ("%s", indicesrandseed, ppsp,
                                              ccsum)
    if "*" in fnstr:
        cache = 0

    if cache and testResults.loadFromFiles(learners, fnstr):
        printVerbose("  loaded from cache", verb)
    else:
        for fold in range(nIterations):
            # learning
            learnset = examples.selectref(indices, fold, negate=1)
            if not len(learnset):
                continue
            testset = examples.selectref(indices, fold, negate=0)
            if not len(testset):
                continue

            for pp in pps:
                if pp[0] == "B":
                    learnset = pp[1](learnset)
                    testset = pp[1](testset)

            for pp in pps:
                if pp[0] == "L":
                    learnset = pp[1](learnset)
                elif pp[0] == "T":
                    testset = pp[1](testset)
                elif pp[0] == "LT":
                    (learnset, testset) = pp[1](learnset, testset)

            if not learnset:
                raise SystemError("no training examples after preprocessing")

            if not testset:
                raise SystemError("no test examples after preprocessing")

            classifiers = [None] * nLrn
            for i in range(nLrn):
                if not cache or not testResults.loaded[i]:
                    classifiers[i] = learners[i](learnset, weight)
            if storeclassifiers:
                testResults.classifiers.append(classifiers)

            # testing
            tcn = 0
            for i in range(len(examples)):
                if (indices[i] == fold):
                    # This is to prevent cheating:
                    ex = orange.Example(testset[tcn])
                    ex.setclass("?")
                    tcn += 1
                    for cl in range(nLrn):
                        if not cache or not testResults.loaded[cl]:
                            cr = classifiers[cl](ex, orange.GetBoth)
                            if cr[0].isSpecial():
                                raise "Classifier %s returned unknown value" % (
                                    classifiers[cl].name or ("#%i" % cl))
                            testResults.results[i].setResult(cl, cr[0], cr[1])
            if callback:
                callback()
        if cache:
            testResults.saveToFiles(learners, fnstr)

    return testResults
Example #8
0
def learningCurve(learners,
                  examples,
                  cv=None,
                  pick=None,
                  proportions=orange.frange(0.1),
                  pps=[],
                  **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    callback = argkw.get("callback", 0)

    for pp in pps:
        if pp[0] != "L":
            raise SystemError("cannot preprocess testing examples")

    if not cv or not pick:
        seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
        if seed:
            randomGenerator = orange.RandomGenerator(seed)
        else:
            randomGenerator = argkw.get("randomGenerator",
                                        orange.RandomGenerator())
        if not cv:
            cv = orange.MakeRandomIndicesCV(
                folds=10,
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)
        if not pick:
            pick = orange.MakeRandomIndices2(
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)

    examples, weight = demangleExamples(examples)
    folds = cv(examples)
    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    nLrn = len(learners)

    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)

        if (cv.randseed < 0) or (pick.randseed < 0):
            cache = 0
        else:
            fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % (
                "%s", p, cv.randseed, pick.randseed, ppsp, ccsum)
            if "*" in fnstr:
                cache = 0

        conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
        testResults = ExperimentResults(
            cv.folds, [l.name for l in learners],
            examples.domain.classVar.values.native(), weight != 0,
            examples.domain.classVar.baseValue)
        testResults.results = [
            TestedExample(folds[i], conv(examples[i].getclass()), nLrn,
                          examples[i].getweight(weight))
            for i in range(len(examples))
        ]

        if cache and testResults.loadFromFiles(learners, fnstr):
            printVerbose("  loaded from cache", verb)
        else:
            for fold in range(cv.folds):
                printVerbose("  fold %d" % fold, verb)

                # learning
                learnset = examples.selectref(folds, fold, negate=1)
                learnset = learnset.selectref(pick(learnset, p0=p), 0)
                if not len(learnset):
                    continue

                for pp in pps:
                    learnset = pp[1](learnset)

                classifiers = [None] * nLrn
                for i in range(nLrn):
                    if not cache or not testResults.loaded[i]:
                        classifiers[i] = learners[i](learnset, weight)

                # testing
                for i in range(len(examples)):
                    if (folds[i] == fold):
                        # This is to prevent cheating:
                        ex = orange.Example(examples[i])
                        ex.setclass("?")
                        for cl in range(nLrn):
                            if not cache or not testResults.loaded[cl]:
                                cls, pro = classifiers[cl](ex, orange.GetBoth)
                                testResults.results[i].setResult(cl, cls, pro)
                if callback: callback()
            if cache:
                testResults.saveToFiles(learners, fnstr)

        allResults.append(testResults)

    return allResults
Example #9
0
def leaveOneOut(learners, examples, pps=[], indicesrandseed="*", **argkw):
    """leave-one-out evaluation of learners on a data set"""
    (examples, weight) = demangleExamples(examples)
    return testWithIndices(learners, examples, list(range(len(examples))),
                           indicesrandseed, pps, **argkw)