def run( nb_filters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, randomClassif=randomClassif, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse, saveFile=saveFile, shouldSave=shouldSave, ): randomState = None if not randomClassif: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize # ======INSTANTIATING========# # --RandConv-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random, ) randConvCoord = LoadCoordinator(randConvCoord, learnFile, testFile) # --Extra-tree-- baseClassif = ExtraTreesClassifier( nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose, ) # --Classifier classifier = Classifier(randConvCoord, baseClassif) # --Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] # =====COMPUTATION=====# # --Learning--# print "Starting learning" fitStart = time() classifier.fit(learningSet) fitEnd = time() print "Learning done", formatDuration(fitEnd - fitStart) sys.stdout.flush() # --Testing--# y_truth = testingSet.getLabels() predStart = time() y_prob, y_pred = classifier.predict_predict_proba(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) # ====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "==================RandConv================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filterPolicy", filterPolicy print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" if shouldSave: print "saveFile", saveFile print "Fit time", formatDuration(fitEnd - fitStart) print "Classifcation time", formatDuration(predEnd - predStart) print "Accuracy", accuracy if shouldSave: np.save(saveFile, y_prob) return accuracy, confMat, importance, order
def run(lsFile, tsFile, **kwargs): randomState = None if random: randomState = 100 #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Pixit-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterMinVal=filter_min_val, filterMaxVal=filter_max_val, filterMinSize=filterMinSize, filterMaxSize=filterMaxSize, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, filterNormalisation=filterNormalisation, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(randConvCoord, baseClassif) #--Data-- with open(lsFile, "wb") as f: lsSize, Xls, yls = pickle.load(f, protocol=2) loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] with open(tsFile, "wb") as f: tsSize, Xts, yts = pickle.load(f, protocol=2) loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() baseClassif.fit(Xls, yls) fitEnd = time() print "Learning done", (fitEnd-fitStart), "seconds (no extraction)" sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier._predict(Xts, lsSize) predEnd = time() #====ANALYSIS=====# accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "=========================================" print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "Fit time (no extraction)", (fitEnd-fitStart), "seconds" print "Classifcation time (no extraction)", (predEnd-predStart), "seconds" print "Accuracy", accuracy return accuracy, confMat, importance, order
from sklearn.metrics import confusion_matrix from CifarLoader import CifarFromNumpies from ImageBuffer import FileImageBuffer, NumpyImageLoader testingSetDir = "test/" testingIndexFile = "0index" if __name__ == "__main__": Y = np.load(sys.argv[0]) for filename in sys.argv[1:]: Y += np.load(filename) y_pred = np.argmax(Y, axis=1) tsSize = len(y_pred) loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] y_truth = testingSet.getLabels() accuracy = sum(map((lambda x, y: x == y), y_pred, y_truth))/float(len(y_truth)) confMat = confusion_matrix(y_truth, y_pred) print "==================Classif aggregation================" print "Files :\n", sys.argv print "Accuracy", accuracy print "Confusion matrix :\n", confMat
def run(nb_filters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, randomClassif=randomClassif, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse, saveFile=saveFile, shouldSave=shouldSave): randomState = None if not randomClassif: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize #======INSTANTIATING========# #--RandConv-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) randConvCoord = LoadCoordinator(randConvCoord, learnFile, testFile) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(randConvCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() classifier.fit(learningSet) fitEnd = time() print "Learning done", formatDuration(fitEnd - fitStart) sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_prob, y_pred = classifier.predict_predict_proba(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) #====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "==================RandConv================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filterPolicy", filterPolicy print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" if shouldSave: print "saveFile", saveFile print "Fit time", formatDuration(fitEnd - fitStart) print "Classifcation time", formatDuration(predEnd - predStart) print "Accuracy", accuracy if shouldSave: np.save(saveFile, y_prob) return accuracy, confMat, importance, order
def run(poolings=poolings, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse): randomState = None if random: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--customRandConv-- randConvCoord = customRandConvFactory( nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(randConvCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() classifier.fit(learningSet) fitEnd = time() print "Learning done", (fitEnd-fitStart), "seconds" sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier.predict(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) #====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "==================CUSTOM=======================" print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "Fit time", (fitEnd-fitStart), "seconds" print "Classifcation time", (predEnd-predStart), "seconds" print "Accuracy", accuracy return accuracy, confMat, importance, order
def run(nb_filters=nb_filters, poolings=poolings, nbSubwindows=nbSubwindows, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse): lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize totalNbFeatures = nb_filters*len(poolings)*subwindowTargetWidth*subwindowTargetHeight*3 totalNbObj = lsSize*nbSubwindows nbFeatures = totalNbFeatures/nbJobs floatSize = np.zeros().itemsize singleArraySize = nbFeatures*totalNbObj*floatSize totalArraySize = totalNbFeatures*totalNbObj*floatSize #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Pixit-- memCoord = MemroyTestCoordinator(nbFeatures, totalNbObj) if nbJobs != 1: memCoord.parallelize(nbJobs, tempFolder) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, verbose=verbose) #--Classifier classifier = Classifier(memCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# classifier.fit(learningSet) print "=========================================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "------------Misc-----------------" print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "totalNbFeatures", totalNbFeatures print "totalNbObj", totalNbObj print "singleArraySize", singleArraySize print "totalArraySize", totalArraySize
def run(): #======HYPER PARAMETERS======# #----RandConv param #Filtering nb_filters = 100 filter_min_val = -1 filter_max_val = 1 filterMinSize = 2 filterMaxSize = 32 filterNormalisation = FilterGenerator.NORMALISATION_MEANVAR #Aggregation poolings = [(2, 2, Const.POOLING_AGGREG_AVG)] #Subwindow nbSubwindows = 10 subwindowMinSizeRatio = 0.75 subwindowMaxSizeRatio = 1. subwindowTargetWidth = 16 subwindowTargetHeight = 16 fixedSize = False subwindowInterpolation = SubWindowExtractor.INTERPOLATION_BILINEAR #Misc. includeOriginalImage = True #Compressor compressorType = "Sampling" nbCompressedFeatures = 20 compressOriginalImage = True #Parrallelization & Logger nbJobs = -1 verbosity = 8 tempFolder = "tmp/" #-----Extratree param nbTrees = 30 maxFeatures = "auto" maxDepth = None minSamplesSplit = 2 minSamplesLeaf = 1 bootstrap = False nbJobsEstimator = -1 randomState = None verbose = 8 #=====DATA=====# # maxLearningSize = 50000 # maxTestingSize = 10000 learningUse = 500 learningSetDir = "learn/" learningIndexFile = "0index" testingUse = 500 testingSetDir = "test/" testingIndexFile = "0index" #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Pixit-- randConvCoord = coordinatorCompressRandConvFactory( nbFilters=nb_filters, filterMinVal=filter_min_val, filterMaxVal=filter_max_val, filterMinSize=filterMinSize, filterMaxSize=filterMaxSize, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, filterNormalisation=filterNormalisation, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, compressorType=compressorType, nbCompressedFeatures=nbCompressedFeatures, compressOriginalImage=compressOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(randConvCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:learningUse] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:testingUse] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() classifier.fit(learningSet) fitEnd = time() print "Learning done", (fitEnd-fitStart), "seconds" sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier.predict(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) #====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "=========================================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filter_min_val", filter_min_val print "filter_max_val", filter_max_val print "filterMinSize", filterMinSize print "filterMaxSize", filterMaxSize print "filterNormalisation", filterNormalisation print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "compressorType", compressorType print "nbCompressedFeatures", nbCompressedFeatures print "compressOriginalImage", compressOriginalImage print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "Fit time", (fitEnd-fitStart), "seconds" print "Classifcation time", (predEnd-predStart), "seconds" print "Accuracy", accuracy print "Confusion matrix :\n", confMat return accuracy, confMat, importance, order
def run(lsName, tsName, **kwargs): lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Coordinator-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterMinVal=filter_min_val, filterMaxVal=filter_max_val, filterMinSize=filterMinSize, filterMaxSize=filterMaxSize, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, filterNormalisation=filterNormalisation, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() X, y = randConvCoord.process(learningSet, True) with open(lsName, "wb") as f: pickle.dump((lsSize, X, y), f, protocol=2) fitEnd = time() print "Learning done", (fitEnd-fitStart), "seconds" sys.stdout.flush() #--Testing--# predStart = time() X, y = randConvCoord.process(testingSet, False) with open(tsName, "wb") as f: pickle.dump((tsSize, X, y), f, protocol=2) predEnd = time() print "=========================================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filter_min_val", filter_min_val print "filter_max_val", filter_max_val print "filterMinSize", filterMinSize print "filterMaxSize", filterMaxSize print "filterNormalisation", filterNormalisation print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "Fit time", (fitEnd-fitStart), "seconds" print "Classifcation time", (predEnd-predStart), "seconds"
baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(pixitCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:learningUse] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:testingUse] #=====COMPUTATION=====# #--Learning--# fitStart = time() classifier.fit(learningSet) fitEnd = time() #--Testing--# y_truth = testingSet.getLabels()
def run(nb_filters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, randomClassif=randomClassif, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse, saveFile=saveFile, shouldSave=shouldSave): randomState = None if not randomClassif: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize #======INSTANTIATING========# #--randconv-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterPolicy=filterPolicy, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) nb_filters = len(randConvCoord.getFilters()) #--SVM-- baseClassif = LinearSVC(verbose=verbose, random_state=randomState) #--Classifier classifier = uClassifier(coordinator=randConvCoord, base_classifier=baseClassif, n_estimators=nbTrees, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() hist = classifier._preprocess(learningSet, learningPhase=True) y = learningSet.getLabels() if shouldSave: np.savez(saveFile, data=hist.data, indices=hist.indices, indptr=hist.indptr, shape=hist.shape) classifier.fit_histogram(hist, y) fitEnd = time() print "Learning done", formatDuration(fitEnd - fitStart) sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier.predict(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) #====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp( classifier._visualBagger) print "==================Bag of Visual Words=======================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filterPolicy", filterPolicy print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------Bag of words params + SVC----------" print "nbTrees", nbTrees print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" if shouldSave: print "saveFile", saveFile print "Fit time", formatDuration(fitEnd - fitStart) print "Classifcation time", formatDuration(predEnd - predStart) print "Accuracy", accuracy print "Leafs", formatBigNumber(classifier.histoSize) return accuracy, confMat, importance, order
def run(nb_filters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, randomClassif=randomClassif, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse, saveFile=saveFile, shouldSave=shouldSave): randomState = None if not randomClassif: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize #======INSTANTIATING========# #--randconv-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterPolicy=filterPolicy, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) nb_filters = len(randConvCoord.getFilters()) #--SVM-- baseClassif = LinearSVC(verbose=verbose, random_state=randomState) #--Classifier classifier = uClassifier(coordinator=randConvCoord, base_classifier=baseClassif, n_estimators=nbTrees, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() hist = classifier._preprocess(learningSet, learningPhase=True) y = learningSet.getLabels() if shouldSave: np.savez(saveFile, data=hist.data, indices=hist.indices, indptr=hist.indptr, shape=hist.shape) classifier.fit_histogram(hist, y) fitEnd = time() print "Learning done", formatDuration(fitEnd-fitStart) sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier.predict(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) #====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(classifier._visualBagger) print "==================Bag of Visual Words=======================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filterPolicy", filterPolicy print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------Bag of words params + SVC----------" print "nbTrees", nbTrees print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" if shouldSave: print "saveFile", saveFile print "Fit time", formatDuration(fitEnd-fitStart) print "Classifcation time", formatDuration(predEnd-predStart) print "Accuracy", accuracy print "Leafs", formatBigNumber(classifier.histoSize) return accuracy, confMat, importance, order