def encodePP(pps): pps = "" for pp in pps: objname = getobjectname(pp[1], "") if len(objname): pps += "_" + objname else: return "*" return pps
def saveToFiles(self, learners, filename): for i in range(len(learners)): if self.loaded[i]: continue fname = ".\\cache\\" + filename % getobjectname(learners[i], "*") if not "*" in fname: if not os.path.isdir("cache"): os.mkdir("cache") f = open(fname, "wb") pickler = pickle.Pickler(f, 1) pickler.dump([((x.actualClass, x.iterationNumber), (x.classes[i], x.probabilities[i])) for x in self.results]) f.close()
def loadFromFiles(self, learners, filename): self.loaded = [] for i in range(len(learners)): f = None try: f = open( ".\\cache\\" + filename % getobjectname(learners[i], "*"), "rb") d = cPickle.load(f) for ex in range(len(self.results)): tre = self.results[ex] if (tre.actualClass, tre.iterationNumber) != d[ex][0]: raise SystemError, "mismatching example tables or sampling" self.results[ex].setResult(i, d[ex][1][0], d[ex][1][1]) self.loaded.append(1) except exceptions.Exception: self.loaded.append(0) if f: f.close() return not 0 in self.loaded
def testWithIndices(learners, examples, indices, indicesrandseed="*", pps=[], callback=None, **argkw): verb = argkw.get("verbose", 0) cache = argkw.get("cache", 0) storeclassifiers = argkw.get("storeclassifiers", 0) or argkw.get( "storeClassifiers", 0) cache = cache and not storeclassifiers examples, weight = demangleExamples(examples) nLrn = len(learners) if not examples: raise SystemError("Test data set with no examples") if not examples.domain.classVar: raise "Test data set without class attribute" ## for pp in pps: ## if pp[0]!="L": ## raise SystemError, "cannot preprocess testing examples" nIterations = max(indices) + 1 if examples.domain.classVar.varType == orange.VarTypes.Discrete: values = list(examples.domain.classVar.values) basevalue = examples.domain.classVar.baseValue else: basevalue = values = None conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float testResults = ExperimentResults(nIterations, [getobjectname(l) for l in learners], values, weight != 0, basevalue) testResults.results = [ TestedExample(indices[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight)) for i in range(len(examples)) ] if argkw.get("storeExamples", 0): testResults.examples = examples ccsum = hex(examples.checksum())[2:] ppsp = encodePP(pps) fnstr = "{TestWithIndices}_%s_%s%s-%s" % ("%s", indicesrandseed, ppsp, ccsum) if "*" in fnstr: cache = 0 if cache and testResults.loadFromFiles(learners, fnstr): printVerbose(" loaded from cache", verb) else: for fold in range(nIterations): # learning learnset = examples.selectref(indices, fold, negate=1) if not len(learnset): continue testset = examples.selectref(indices, fold, negate=0) if not len(testset): continue for pp in pps: if pp[0] == "B": learnset = pp[1](learnset) testset = pp[1](testset) for pp in pps: if pp[0] == "L": learnset = pp[1](learnset) elif pp[0] == "T": testset = pp[1](testset) elif pp[0] == "LT": (learnset, testset) = pp[1](learnset, testset) if not learnset: raise SystemError("no training examples after preprocessing") if not testset: raise SystemError("no test examples after preprocessing") classifiers = [None] * nLrn for i in range(nLrn): if not cache or not testResults.loaded[i]: classifiers[i] = learners[i](learnset, weight) if storeclassifiers: testResults.classifiers.append(classifiers) # testing tcn = 0 for i in range(len(examples)): if (indices[i] == fold): # This is to prevent cheating: ex = orange.Example(testset[tcn]) ex.setclass("?") tcn += 1 for cl in range(nLrn): if not cache or not testResults.loaded[cl]: cr = classifiers[cl](ex, orange.GetBoth) if cr[0].isSpecial(): raise "Classifier %s returned unknown value" % ( classifiers[cl].name or ("#%i" % cl)) testResults.results[i].setResult(cl, cr[0], cr[1]) if callback: callback() if cache: testResults.saveToFiles(learners, fnstr) return testResults