Esempio n. 1
0
def distillKnowledge(deepNet,
                     filepath,
                     batchSize=50,
                     holdoutPercentage=0.5,
                     log=None):
    import os
    import numpy as np
    from dataset.ingest.labeled import ingestImagery
    from dataset.pickle import writePickleZip
    from distill.net import DistilleryClassifier

    if not isinstance(deepNet, DistilleryClassifier):
        raise ValueError('The network must be setup as a DistilleryNetwork.')

    # build a new pickle with this information
    # TODO: NETWORKS NEED UNIQUE LABEL IDENTIFIERS WHICH CAN BE ADDED HERE
    rootpath = os.path.abspath(filepath)
    outputFile = os.path.join(
        rootpath,
        os.path.basename(rootpath) + '_darkLabels' + '_holdout_' +
        str(holdoutPercentage) + '_batch_' + str(batchSize) + '.pkl.gz')
    if os.path.exists(outputFile):
        if log is not None:
            log.info('Pickle exists for this dataset [' + outputFile +
                     ']. Using this instead.')
        return outputFile

    # NOTE: The pickleDataset will silently use previously created pickles if
    #       one exists (for efficiency). So watch out for stale pickles!
    train, test, labels = ingestImagery(filepath=filepath,
                                        shared=False,
                                        batchSize=batchSize,
                                        holdoutPercentage=holdoutPercentage,
                                        log=log)

    if log is not None:
        log.info('Distilling knowledge from deep network')

    # distill knowledge into a pickle which can be used to train other networks
    batchSize = train[0].shape[0]
    darkLabels = [deepNet.softTarget(dataset) for dataset in train[0]]
    labelDims = [len(darkLabels)] + list(darkLabels[0].shape)
    darkLabels = np.reshape(np.concatenate(darkLabels), labelDims)
    train = train[0], train[1], darkLabels

    # pickle the dataset
    writePickleZip(outputFile, (train, test, labels), log)

    # return the output filename
    return outputFile
Esempio n. 2
0
def distillKnowledge(deepNet, filepath, batchSize=50, 
                     holdoutPercentage=0.5, log=None) :
    import os
    import numpy as np
    from dataset.ingest.labeled import ingestImagery
    from dataset.pickle import writePickleZip
    from distill.net import DistilleryClassifier

    if not isinstance(deepNet, DistilleryClassifier) :
        raise ValueError('The network must be setup as a DistilleryNetwork.')

    # build a new pickle with this information
    # TODO: NETWORKS NEED UNIQUE LABEL IDENTIFIERS WHICH CAN BE ADDED HERE
    rootpath = os.path.abspath(filepath)
    outputFile = os.path.join(rootpath, os.path.basename(rootpath) + 
                              '_darkLabels' +
                              '_holdout_' + str(holdoutPercentage) +
                              '_batch_' + str(batchSize) +
                              '.pkl.gz')
    if os.path.exists(outputFile) :
        if log is not None :
            log.info('Pickle exists for this dataset [' + outputFile +
                     ']. Using this instead.')
        return outputFile

    # NOTE: The pickleDataset will silently use previously created pickles if
    #       one exists (for efficiency). So watch out for stale pickles!
    train, test, labels = ingestImagery(filepath=filepath, shared=False,
                                        batchSize=batchSize, 
                                        holdoutPercentage=holdoutPercentage, 
                                        log=log)

    if log is not None :
        log.info('Distilling knowledge from deep network')

    # distill knowledge into a pickle which can be used to train other networks
    batchSize = train[0].shape[0]
    darkLabels = [deepNet.softTarget(dataset) for dataset in train[0]]
    labelDims = [len(darkLabels)] + list(darkLabels[0].shape)
    darkLabels = np.reshape(np.concatenate(darkLabels), labelDims)
    train = train[0], train[1], darkLabels

    # pickle the dataset
    writePickleZip(outputFile, (train, test, labels), log)

    # return the output filename
    return outputFile
Esempio n. 3
0
    prof = Profiler(log=log, name=logName, profFile=options.profile)

    # create a random number generator for efficiency
    from numpy.random import RandomState
    from operator import mul
    from time import time
    rng = RandomState(int(time()))
    #rng = RandomState(4567) # always initialize the same

    # NOTE: The pickleDataset will silently use previously created pickles if
    #       one exists (for efficiency). So watch out for stale pickles!
    # NOTE: User may pass a dark pickle into here, and the logic will react
    #       appropriately to the situation.
    train, test, labels = ingestImagery(filepath=options.data,
                                        shared=True,
                                        batchSize=options.batchSize,
                                        holdoutPercentage=options.holdout,
                                        log=log)
    inputSize = train[0].shape.eval()

    # create a file with pre-initialized weights so both networks use the same
    # baseline for testing.
    if options.synapse is None:
        networkFile = createNetwork(inputSize=inputSize[1:],
                                    numKernels=options.kernel,
                                    numNeurons=options.neuron,
                                    numLabels=labels.shape[0])
    else:
        networkFile = options.synapse

    regScale = 1. / (2 * options.kernel * 5 * 5 + options.neuron +
Esempio n. 4
0
    parser = argparse.ArgumentParser()
    addLoggingParams(parser)
    addUnsupDataParams(parser, 'saeClass', multiLoad=True)
    addDebuggingParams(parser)
    parser.add_argument('--percent', dest='percentReturned', type=float,
                        default=100.,
                        help='Return some percentage of the highest related ' +
                             'examples. All others will not be returned to ' +
                             'the user.')
    parser.add_argument('--csv', dest='csvFile',type=str, default='output.csv',
                        help='Name of the CSV file output.')
    options = parser.parse_args()

    # setup the logger
    log, prof = setupLogging(options, 'SAE-Classification Benchmark')

    # NOTE: The pickleDataset will silently use previously created pickles if
    #       one exists (for efficiency). So watch out for stale pickles!
    train, test, labels = ingestImagery(filepath=options.data, shared=False,
                                        batchSize=options.batchSize,
                                        holdoutPercentage=options.holdout,
                                        log=log)

    # load all networks initialized to the target imagery
    nets = createNetworks(options.maxTarget, options.batchSize,
                          options.synapse, prof, options.debug)

    # test the training data for similarity to the target
    buildCSV(options.csvFile, nets, options.targetDir, test,
             False, options.percentReturned, options.debug)
Esempio n. 5
0
                        default=None,
                        help='Load from a previously saved network.')
    parser.add_argument('data',
                        help='Directory or pkl.gz file for the ' +
                        'training and test sets')
    options = parser.parse_args()

    # setup the logger
    logName = 'SAE-Classification Benchmark:  ' + options.data
    log = setupLogging(logName, options.level, options.logfile)
    prof = Profiler(log=log, name=logName, profFile=options.profile)

    # NOTE: The pickleDataset will silently use previously created pickles if
    #       one exists (for efficiency). So watch out for stale pickles!
    train, test, labels = ingestImagery(filepath=options.data,
                                        shared=True,
                                        batchSize=options.batchSize,
                                        log=log)

    # load example imagery --
    # these are confirmed objects we are attempting to identify
    target = readTargetData(options.targetDir)

    if options.synapse is None:
        regType = 'L2'
        regValue = .001
        trainer = buildTrainerSAENetwork(train,
                                         regType,
                                         regValue,
                                         target,
                                         prof=prof,
                                         kernelConv=options.kernel,