def learningCurveWithTestData( learners, learnset, testset, times=10, proportions=orange.frange(0.1), strat=orange.MakeRandomIndices.StratifiedIfPossible, pps=[], **argkw): verb = argkw.get("verbose", 0) learnset, learnweight = demangleExamples(learnset) testweight = demangleExamples(testset)[1] randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get( "randseed", 0) or argkw.get("randomGenerator", 0) pick = orange.MakeRandomIndices2(stratified=strat, randomGenerator=randomGenerator) allResults = [] for p in proportions: printVerbose("Proportion: %5.3f" % p, verb) testResults = ExperimentResults( times, [l.name for l in learners], testset.domain.classVar.values.native(), testweight != 0, testset.domain.classVar.baseValue) testResults.results = [] for t in range(times): printVerbose(" repetition %d" % t, verb) learnAndTestOnTestData(learners, (learnset.selectref( pick(learnset, p), 0), learnweight), testset, testResults, t) allResults.append(testResults) return allResults
def testWithIndices(learners, examples, indices, indicesrandseed="*", pps=[], callback=None, **argkw): verb = argkw.get("verbose", 0) cache = argkw.get("cache", 0) storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get( "storeClassifiers", 0) cache = cache and not storeclassifiers examples, weight = demangleExamples(examples) nLrn = len(learners) if not examples: raise SystemError("Test data set with no examples") if not examples.domain.classVar: raise "Test data set without class attribute" ## for pp in pps: ## if pp[0]!="L": ## raise SystemError, "cannot preprocess testing examples" nIterations = max(indices) + 1 if examples.domain.classVar.varType == orange.VarTypes.Discrete: values = list(examples.domain.classVar.values) basevalue = examples.domain.classVar.baseValue else: basevalue = values = None conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float testResults = ExperimentResults(nIterations, [getobjectname(l) for l in learners], values, weight != 0, basevalue) testResults.results = [ TestedExample(indices[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight)) for i in range(len(examples)) ] if argkw.get("storeExamples", 0): testResults.examples = examples ccsum = hex(examples.checksum())[2:] ppsp = encodePP(pps) fnstr = "{TestWithIndices}_%s_%s%s-%s" % ("%s", indicesrandseed, ppsp, ccsum) if "*" in fnstr: cache = 0 if cache and testResults.loadFromFiles(learners, fnstr): printVerbose(" loaded from cache", verb) else: for fold in range(nIterations): # learning learnset = examples.selectref(indices, fold, negate=1) if not len(learnset): continue testset = examples.selectref(indices, fold, negate=0) if not len(testset): continue for pp in pps: if pp[0] == "B": learnset = pp[1](learnset) testset = pp[1](testset) for pp in pps: if pp[0] == "L": learnset = pp[1](learnset) elif pp[0] == "T": testset = pp[1](testset) elif pp[0] == "LT": (learnset, testset) = pp[1](learnset, testset) if not learnset: raise SystemError("no training examples after preprocessing") if not testset: raise SystemError("no test examples after preprocessing") classifiers = [None] * nLrn for i in range(nLrn): if not cache or not testResults.loaded[i]: classifiers[i] = learners[i](learnset, weight) if storeclassifiers: testResults.classifiers.append(classifiers) # testing tcn = 0 for i in range(len(examples)): if (indices[i] == fold): # This is to prevent cheating: ex = orange.Example(testset[tcn]) ex.setclass("?") tcn += 1 for cl in range(nLrn): if not cache or not testResults.loaded[cl]: cr = classifiers[cl](ex, orange.GetBoth) if cr[0].isSpecial(): raise "Classifier %s returned unknown value" % ( classifiers[cl].name or ("#%i" % cl)) testResults.results[i].setResult(cl, cr[0], cr[1]) if callback: callback() if cache: testResults.saveToFiles(learners, fnstr) return testResults
def learningCurve(learners, examples, cv=None, pick=None, proportions=orange.frange(0.1), pps=[], **argkw): verb = argkw.get("verbose", 0) cache = argkw.get("cache", 0) callback = argkw.get("callback", 0) for pp in pps: if pp[0] != "L": raise SystemError("cannot preprocess testing examples") if not cv or not pick: seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1) if seed: randomGenerator = orange.RandomGenerator(seed) else: randomGenerator = argkw.get("randomGenerator", orange.RandomGenerator()) if not cv: cv = orange.MakeRandomIndicesCV( folds=10, stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) if not pick: pick = orange.MakeRandomIndices2( stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) examples, weight = demangleExamples(examples) folds = cv(examples) ccsum = hex(examples.checksum())[2:] ppsp = encodePP(pps) nLrn = len(learners) allResults = [] for p in proportions: printVerbose("Proportion: %5.3f" % p, verb) if (cv.randseed < 0) or (pick.randseed < 0): cache = 0 else: fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % ( "%s", p, cv.randseed, pick.randseed, ppsp, ccsum) if "*" in fnstr: cache = 0 conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float testResults = ExperimentResults( cv.folds, [l.name for l in learners], examples.domain.classVar.values.native(), weight != 0, examples.domain.classVar.baseValue) testResults.results = [ TestedExample(folds[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight)) for i in range(len(examples)) ] if cache and testResults.loadFromFiles(learners, fnstr): printVerbose(" loaded from cache", verb) else: for fold in range(cv.folds): printVerbose(" fold %d" % fold, verb) # learning learnset = examples.selectref(folds, fold, negate=1) learnset = learnset.selectref(pick(learnset, p0=p), 0) if not len(learnset): continue for pp in pps: learnset = pp[1](learnset) classifiers = [None] * nLrn for i in range(nLrn): if not cache or not testResults.loaded[i]: classifiers[i] = learners[i](learnset, weight) # testing for i in range(len(examples)): if (folds[i] == fold): # This is to prevent cheating: ex = orange.Example(examples[i]) ex.setclass("?") for cl in range(nLrn): if not cache or not testResults.loaded[cl]: cls, pro = classifiers[cl](ex, orange.GetBoth) testResults.results[i].setResult(cl, cls, pro) if callback: callback() if cache: testResults.saveToFiles(learners, fnstr) allResults.append(testResults) return allResults