def distillKnowledge(deepNet, filepath, batchSize=50, holdoutPercentage=0.5, log=None): import os import numpy as np from dataset.ingest.labeled import ingestImagery from dataset.pickle import writePickleZip from distill.net import DistilleryClassifier if not isinstance(deepNet, DistilleryClassifier): raise ValueError('The network must be setup as a DistilleryNetwork.') # build a new pickle with this information # TODO: NETWORKS NEED UNIQUE LABEL IDENTIFIERS WHICH CAN BE ADDED HERE rootpath = os.path.abspath(filepath) outputFile = os.path.join( rootpath, os.path.basename(rootpath) + '_darkLabels' + '_holdout_' + str(holdoutPercentage) + '_batch_' + str(batchSize) + '.pkl.gz') if os.path.exists(outputFile): if log is not None: log.info('Pickle exists for this dataset [' + outputFile + ']. Using this instead.') return outputFile # NOTE: The pickleDataset will silently use previously created pickles if # one exists (for efficiency). So watch out for stale pickles! train, test, labels = ingestImagery(filepath=filepath, shared=False, batchSize=batchSize, holdoutPercentage=holdoutPercentage, log=log) if log is not None: log.info('Distilling knowledge from deep network') # distill knowledge into a pickle which can be used to train other networks batchSize = train[0].shape[0] darkLabels = [deepNet.softTarget(dataset) for dataset in train[0]] labelDims = [len(darkLabels)] + list(darkLabels[0].shape) darkLabels = np.reshape(np.concatenate(darkLabels), labelDims) train = train[0], train[1], darkLabels # pickle the dataset writePickleZip(outputFile, (train, test, labels), log) # return the output filename return outputFile
def distillKnowledge(deepNet, filepath, batchSize=50, holdoutPercentage=0.5, log=None) : import os import numpy as np from dataset.ingest.labeled import ingestImagery from dataset.pickle import writePickleZip from distill.net import DistilleryClassifier if not isinstance(deepNet, DistilleryClassifier) : raise ValueError('The network must be setup as a DistilleryNetwork.') # build a new pickle with this information # TODO: NETWORKS NEED UNIQUE LABEL IDENTIFIERS WHICH CAN BE ADDED HERE rootpath = os.path.abspath(filepath) outputFile = os.path.join(rootpath, os.path.basename(rootpath) + '_darkLabels' + '_holdout_' + str(holdoutPercentage) + '_batch_' + str(batchSize) + '.pkl.gz') if os.path.exists(outputFile) : if log is not None : log.info('Pickle exists for this dataset [' + outputFile + ']. Using this instead.') return outputFile # NOTE: The pickleDataset will silently use previously created pickles if # one exists (for efficiency). So watch out for stale pickles! train, test, labels = ingestImagery(filepath=filepath, shared=False, batchSize=batchSize, holdoutPercentage=holdoutPercentage, log=log) if log is not None : log.info('Distilling knowledge from deep network') # distill knowledge into a pickle which can be used to train other networks batchSize = train[0].shape[0] darkLabels = [deepNet.softTarget(dataset) for dataset in train[0]] labelDims = [len(darkLabels)] + list(darkLabels[0].shape) darkLabels = np.reshape(np.concatenate(darkLabels), labelDims) train = train[0], train[1], darkLabels # pickle the dataset writePickleZip(outputFile, (train, test, labels), log) # return the output filename return outputFile
prof = Profiler(log=log, name=logName, profFile=options.profile) # create a random number generator for efficiency from numpy.random import RandomState from operator import mul from time import time rng = RandomState(int(time())) #rng = RandomState(4567) # always initialize the same # NOTE: The pickleDataset will silently use previously created pickles if # one exists (for efficiency). So watch out for stale pickles! # NOTE: User may pass a dark pickle into here, and the logic will react # appropriately to the situation. train, test, labels = ingestImagery(filepath=options.data, shared=True, batchSize=options.batchSize, holdoutPercentage=options.holdout, log=log) inputSize = train[0].shape.eval() # create a file with pre-initialized weights so both networks use the same # baseline for testing. if options.synapse is None: networkFile = createNetwork(inputSize=inputSize[1:], numKernels=options.kernel, numNeurons=options.neuron, numLabels=labels.shape[0]) else: networkFile = options.synapse regScale = 1. / (2 * options.kernel * 5 * 5 + options.neuron +
parser = argparse.ArgumentParser() addLoggingParams(parser) addUnsupDataParams(parser, 'saeClass', multiLoad=True) addDebuggingParams(parser) parser.add_argument('--percent', dest='percentReturned', type=float, default=100., help='Return some percentage of the highest related ' + 'examples. All others will not be returned to ' + 'the user.') parser.add_argument('--csv', dest='csvFile',type=str, default='output.csv', help='Name of the CSV file output.') options = parser.parse_args() # setup the logger log, prof = setupLogging(options, 'SAE-Classification Benchmark') # NOTE: The pickleDataset will silently use previously created pickles if # one exists (for efficiency). So watch out for stale pickles! train, test, labels = ingestImagery(filepath=options.data, shared=False, batchSize=options.batchSize, holdoutPercentage=options.holdout, log=log) # load all networks initialized to the target imagery nets = createNetworks(options.maxTarget, options.batchSize, options.synapse, prof, options.debug) # test the training data for similarity to the target buildCSV(options.csvFile, nets, options.targetDir, test, False, options.percentReturned, options.debug)
default=None, help='Load from a previously saved network.') parser.add_argument('data', help='Directory or pkl.gz file for the ' + 'training and test sets') options = parser.parse_args() # setup the logger logName = 'SAE-Classification Benchmark: ' + options.data log = setupLogging(logName, options.level, options.logfile) prof = Profiler(log=log, name=logName, profFile=options.profile) # NOTE: The pickleDataset will silently use previously created pickles if # one exists (for efficiency). So watch out for stale pickles! train, test, labels = ingestImagery(filepath=options.data, shared=True, batchSize=options.batchSize, log=log) # load example imagery -- # these are confirmed objects we are attempting to identify target = readTargetData(options.targetDir) if options.synapse is None: regType = 'L2' regValue = .001 trainer = buildTrainerSAENetwork(train, regType, regValue, target, prof=prof, kernelConv=options.kernel,