Exemple #1
0
def learningCurveWithTestData(
        learners,
        learnset,
        testset,
        times=10,
        proportions=orange.frange(0.1),
        strat=orange.MakeRandomIndices.StratifiedIfPossible,
        pps=[],
        **argkw):
    verb = argkw.get("verbose", 0)

    learnset, learnweight = demangleExamples(learnset)
    testweight = demangleExamples(testset)[1]

    randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get(
        "randseed", 0) or argkw.get("randomGenerator", 0)
    pick = orange.MakeRandomIndices2(stratified=strat,
                                     randomGenerator=randomGenerator)
    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)
        testResults = ExperimentResults(
            times, [l.name for l in learners],
            testset.domain.classVar.values.native(), testweight != 0,
            testset.domain.classVar.baseValue)
        testResults.results = []

        for t in range(times):
            printVerbose("  repetition %d" % t, verb)
            learnAndTestOnTestData(learners, (learnset.selectref(
                pick(learnset, p), 0), learnweight), testset, testResults, t)

        allResults.append(testResults)

    return allResults
Exemple #2
0
def testWithIndices(learners,
                    examples,
                    indices,
                    indicesrandseed="*",
                    pps=[],
                    callback=None,
                    **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get(
        "storeClassifiers", 0)
    cache = cache and not storeclassifiers

    examples, weight = demangleExamples(examples)
    nLrn = len(learners)

    if not examples:
        raise SystemError("Test data set with no examples")
    if not examples.domain.classVar:
        raise "Test data set without class attribute"


##    for pp in pps:
##        if pp[0]!="L":
##            raise SystemError, "cannot preprocess testing examples"

    nIterations = max(indices) + 1
    if examples.domain.classVar.varType == orange.VarTypes.Discrete:
        values = list(examples.domain.classVar.values)
        basevalue = examples.domain.classVar.baseValue
    else:
        basevalue = values = None

    conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
    testResults = ExperimentResults(nIterations,
                                    [getobjectname(l) for l in learners],
                                    values, weight != 0, basevalue)
    testResults.results = [
        TestedExample(indices[i], conv(examples[i].getclass()), nLrn,
                      examples[i].getweight(weight))
        for i in range(len(examples))
    ]

    if argkw.get("storeExamples", 0):
        testResults.examples = examples

    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    fnstr = "{TestWithIndices}_%s_%s%s-%s" % ("%s", indicesrandseed, ppsp,
                                              ccsum)
    if "*" in fnstr:
        cache = 0

    if cache and testResults.loadFromFiles(learners, fnstr):
        printVerbose("  loaded from cache", verb)
    else:
        for fold in range(nIterations):
            # learning
            learnset = examples.selectref(indices, fold, negate=1)
            if not len(learnset):
                continue
            testset = examples.selectref(indices, fold, negate=0)
            if not len(testset):
                continue

            for pp in pps:
                if pp[0] == "B":
                    learnset = pp[1](learnset)
                    testset = pp[1](testset)

            for pp in pps:
                if pp[0] == "L":
                    learnset = pp[1](learnset)
                elif pp[0] == "T":
                    testset = pp[1](testset)
                elif pp[0] == "LT":
                    (learnset, testset) = pp[1](learnset, testset)

            if not learnset:
                raise SystemError("no training examples after preprocessing")

            if not testset:
                raise SystemError("no test examples after preprocessing")

            classifiers = [None] * nLrn
            for i in range(nLrn):
                if not cache or not testResults.loaded[i]:
                    classifiers[i] = learners[i](learnset, weight)
            if storeclassifiers:
                testResults.classifiers.append(classifiers)

            # testing
            tcn = 0
            for i in range(len(examples)):
                if (indices[i] == fold):
                    # This is to prevent cheating:
                    ex = orange.Example(testset[tcn])
                    ex.setclass("?")
                    tcn += 1
                    for cl in range(nLrn):
                        if not cache or not testResults.loaded[cl]:
                            cr = classifiers[cl](ex, orange.GetBoth)
                            if cr[0].isSpecial():
                                raise "Classifier %s returned unknown value" % (
                                    classifiers[cl].name or ("#%i" % cl))
                            testResults.results[i].setResult(cl, cr[0], cr[1])
            if callback:
                callback()
        if cache:
            testResults.saveToFiles(learners, fnstr)

    return testResults
Exemple #3
0
def learningCurve(learners,
                  examples,
                  cv=None,
                  pick=None,
                  proportions=orange.frange(0.1),
                  pps=[],
                  **argkw):
    verb = argkw.get("verbose", 0)
    cache = argkw.get("cache", 0)
    callback = argkw.get("callback", 0)

    for pp in pps:
        if pp[0] != "L":
            raise SystemError("cannot preprocess testing examples")

    if not cv or not pick:
        seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1)
        if seed:
            randomGenerator = orange.RandomGenerator(seed)
        else:
            randomGenerator = argkw.get("randomGenerator",
                                        orange.RandomGenerator())
        if not cv:
            cv = orange.MakeRandomIndicesCV(
                folds=10,
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)
        if not pick:
            pick = orange.MakeRandomIndices2(
                stratified=orange.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=randomGenerator)

    examples, weight = demangleExamples(examples)
    folds = cv(examples)
    ccsum = hex(examples.checksum())[2:]
    ppsp = encodePP(pps)
    nLrn = len(learners)

    allResults = []
    for p in proportions:
        printVerbose("Proportion: %5.3f" % p, verb)

        if (cv.randseed < 0) or (pick.randseed < 0):
            cache = 0
        else:
            fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % (
                "%s", p, cv.randseed, pick.randseed, ppsp, ccsum)
            if "*" in fnstr:
                cache = 0

        conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float
        testResults = ExperimentResults(
            cv.folds, [l.name for l in learners],
            examples.domain.classVar.values.native(), weight != 0,
            examples.domain.classVar.baseValue)
        testResults.results = [
            TestedExample(folds[i], conv(examples[i].getclass()), nLrn,
                          examples[i].getweight(weight))
            for i in range(len(examples))
        ]

        if cache and testResults.loadFromFiles(learners, fnstr):
            printVerbose("  loaded from cache", verb)
        else:
            for fold in range(cv.folds):
                printVerbose("  fold %d" % fold, verb)

                # learning
                learnset = examples.selectref(folds, fold, negate=1)
                learnset = learnset.selectref(pick(learnset, p0=p), 0)
                if not len(learnset):
                    continue

                for pp in pps:
                    learnset = pp[1](learnset)

                classifiers = [None] * nLrn
                for i in range(nLrn):
                    if not cache or not testResults.loaded[i]:
                        classifiers[i] = learners[i](learnset, weight)

                # testing
                for i in range(len(examples)):
                    if (folds[i] == fold):
                        # This is to prevent cheating:
                        ex = orange.Example(examples[i])
                        ex.setclass("?")
                        for cl in range(nLrn):
                            if not cache or not testResults.loaded[cl]:
                                cls, pro = classifiers[cl](ex, orange.GetBoth)
                                testResults.results[i].setResult(cl, cls, pro)
                if callback: callback()
            if cache:
                testResults.saveToFiles(learners, fnstr)

        allResults.append(testResults)

    return allResults