def learningCurveN(learners, examples, folds=10, strat=orange.MakeRandomIndices.StratifiedIfPossible, proportions=orange.frange(0.1), pps=[], **argkw): """construct a learning curve for learners""" seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1) if seed: randomGenerator = orange.RandomGenerator(seed) else: randomGenerator = argkw.get("randomGenerator", orange.RandomGenerator()) if strat: cv = orange.MakeRandomIndicesCV(folds=folds, stratified=strat, randomGenerator=randomGenerator) pick = orange.MakeRandomIndices2(stratified=strat, randomGenerator=randomGenerator) else: cv = orange.RandomIndicesCV(folds=folds, stratified=strat, randomGenerator=randomGenerator) pick = orange.RandomIndices2(stratified=strat, randomGenerator=randomGenerator) return learningCurve(*(learners, examples, cv, pick, proportions, pps), **argkw)
def test(self): r = orange.RandomGenerator() r0 = orange.RandomGenerator(0) self.assertEqual(r(), r0()) r.initseed = 42 r0.initseed = 42 r.reset() r0.reset() self.assertEqual(r(), r0()) a = r() b = r() self.assertNotEqual(a, b) import pickle s = pickle.dumps(r) r2 = pickle.loads(s) self.assertEqual(r(), r2()) self.assertEqual(r.uses, 4)
def learningCurve(learners, examples, cv=None, pick=None, proportions=orange.frange(0.1), pps=[], **argkw): verb = argkw.get("verbose", 0) cache = argkw.get("cache", 0) callback = argkw.get("callback", 0) for pp in pps: if pp[0] != "L": raise SystemError("cannot preprocess testing examples") if not cv or not pick: seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1) if seed: randomGenerator = orange.RandomGenerator(seed) else: randomGenerator = argkw.get("randomGenerator", orange.RandomGenerator()) if not cv: cv = orange.MakeRandomIndicesCV( folds=10, stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) if not pick: pick = orange.MakeRandomIndices2( stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) examples, weight = demangleExamples(examples) folds = cv(examples) ccsum = hex(examples.checksum())[2:] ppsp = encodePP(pps) nLrn = len(learners) allResults = [] for p in proportions: printVerbose("Proportion: %5.3f" % p, verb) if (cv.randseed < 0) or (pick.randseed < 0): cache = 0 else: fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % ( "%s", p, cv.randseed, pick.randseed, ppsp, ccsum) if "*" in fnstr: cache = 0 conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float testResults = ExperimentResults( cv.folds, [l.name for l in learners], examples.domain.classVar.values.native(), weight != 0, examples.domain.classVar.baseValue) testResults.results = [ TestedExample(folds[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight)) for i in range(len(examples)) ] if cache and testResults.loadFromFiles(learners, fnstr): printVerbose(" loaded from cache", verb) else: for fold in range(cv.folds): printVerbose(" fold %d" % fold, verb) # learning learnset = examples.selectref(folds, fold, negate=1) learnset = learnset.selectref(pick(learnset, p0=p), 0) if not len(learnset): continue for pp in pps: learnset = pp[1](learnset) classifiers = [None] * nLrn for i in range(nLrn): if not cache or not testResults.loaded[i]: classifiers[i] = learners[i](learnset, weight) # testing for i in range(len(examples)): if (folds[i] == fold): # This is to prevent cheating: ex = orange.Example(examples[i]) ex.setclass("?") for cl in range(nLrn): if not cache or not testResults.loaded[cl]: cls, pro = classifiers[cl](ex, orange.GetBoth) testResults.results[i].setResult(cl, cls, pro) if callback: callback() if cache: testResults.saveToFiles(learners, fnstr) allResults.append(testResults) return allResults
def printResults(res): CAs = orngStat.CA(res, reportSE=1) for i in range(len(names)): print "%s: %5.3f+-%5.3f " % (names[i], CAs[i][0], 1.96 * CAs[i][1]), print print "\nproportionsTest that will always give the same results" for i in range(3): res = orngTest.proportionTest(learners, data, 0.7) printResults(res) print "\nproportionsTest that will give different results, but the same each time the script is run" myRandom = orange.RandomGenerator() for i in range(3): res = orngTest.proportionTest(learners, data, 0.7, randomGenerator=myRandom) printResults(res) if not vars().has_key("NO_RANDOMNESS"): print "\nproportionsTest that will give different results each time it is run" for i in range(3): res = orngTest.proportionTest(learners, data, 0.7, randseed=random.randint(0, 100)) printResults(res)
paramFile=open("Params.pkl","r") (learner,nFolds,dataSet,evaluateMethod) = pickle.load(paramFile) paramFile.close() MyRandom = orange.RandomGenerator(1000*int(os.environ["SGE_TASK_ID"])) res = %(sMethod)s fh = open("RES_out"+os.environ["SGE_TASK_ID"]+".pkl","w") pickle.dump(evaluateMethod(res)[0], fh) fh.close() """ # Assess the memory requirements memSize = dataUtilities.getApproxMemReq(dataSet) evalResList = sgeUtilities.arrayJob(jobName = "EvalJob", jobNumber = %(nExtFolds)s, jobParams = [learner,%(nFolds)s,dataSet,%(evalMethodFunc)s], jobQueue = "batch.q", jobScript = jobScript, memSize = str(memSize)+"M") else: for idx in range(%(nExtFolds)s): MyRandom = orange.RandomGenerator(1000*idx+1) res = %(sMethod)s evalResList.append(%(evalMethodFunc)s(res)[0]) if isClassifier: evalRes = [round(statc.mean(evalResList),3)] else: evalRes = [round(statc.mean(evalResList),2)] if verbose > 0: print evalRes else: res = %(sMethod)s evalRes = %(evalMethodFunc)s(res) # Save intermediate result #if os.path.exists("%(runPath)sintRes.txt"): if [os.path.basename(f) for f in glob("%(runPath)s"+"*intRes.txt")] != []:
data = orange.ExampleTable("lenses") indices2 = orange.MakeRandomIndices2(p0=6) ind = indices2(data) print ind data0 = data.select(ind, 0) data1 = data.select(ind, 1) print len(data0), len(data1) print "\nIndices without playing with random generator" for i in range(5): print indices2(data) print "\nIndices with random generator" indices2.randomGenerator = orange.RandomGenerator(42) for i in range(5): print indices2(data) print "\nIndices with randseed" indices2.randomGenerator = None indices2.randseed = 42 for i in range(5): print indices2(data) print "\nIndices with p0 set as probability (not 'a number of')" indices2.p0 = 0.25 print indices2(data) print "\n... with stratification" indices2.stratified = indices2.Stratified