Example #1
0
    def test_rescale_01(self):
        X = np.random.rand(10, 3, 3, 3)
        Xr = emlib.rescale_01(X, perChannel=False)
        self.assertTrue(np.max(Xr) <= 1.0)
        self.assertTrue(np.min(Xr) >= 0.0)
        Xr = emlib.rescale_01(X, perChannel=True)
        self.assertTrue(np.max(Xr) <= 1.0)
        self.assertTrue(np.min(Xr) >= 0.0)

        X = np.random.rand(10, 3, 100, 100)
        X[:, 0, ...] *= 1000.0
        Xr = emlib.rescale_01(X, perChannel=False)
        self.assertTrue(np.max(Xr[:, 1, ...]) <= .1)
        Xr = emlib.rescale_01(X, perChannel=True)
        self.assertTrue(np.max(Xr[:, 1, ...]) > .8)
Example #2
0
def deploy_model(X, weightsFile,
                 log=None,
                 slices=[],
                 modelName='ciresan_n3',
                 evalPct=1.0,
                 outFile=None):
    """ Applies a previously trained CNN to new data.


      Xtrain        : Tensor of features with dimensions as specified in (1)
      trainSlices   : A list of slice indices to evalute 
                      (or [] to use all the data)
      log           : a logging object (for reporting status)
      outFile       : File name where class probabilities should be stored
      evalPct       : Fraction of volume to evalute; \in [0,1]
    """

    # Setup output file/dirctory
    if not outFile: 
        if log: log.warning('No output file specified - are you sure this is what you want?')
    elif not os.path.exists(os.path.dirname(outFile)):
        os.makedirs(os.path.dirname(outFile))

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # preprocess data
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if slices:
        X = X[slices,:,:]

    # rescale features to live in [0, 1]
    X = emlib.rescale_01(X, perChannel=True)

    if log: log.info('X volume dimensions: %s' % str(X.shape))
    if log: log.info('X values min/max:    %g, %g' % (np.min(X), np.max(X)))

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # initialize CNN
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if log: log.info('initializing CNN...')
    model = getattr(emm, modelName)() 
    model.compile(optimizer='sgd',   # not used, but required by keras
                  loss='categorical_crossentropy')
    model.load_weights(weightsFile)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Do it
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if log: log.info('evaluating volume...')
    Prob = _evaluate(model, X, log=log, evalPct=evalPct)
    if log: log.info('Complete!')

    if outFile: 
        np.save(outFile, Prob) 
        scipy.io.savemat(outFile.replace('.npy', '.mat'), {'P' : Prob})
        if log: log.info('Probabilites stored in file %s' % outFile)

    return Prob
Example #3
0
def train_model(Xtrain, Ytrain,
                Xvalid, Yvalid,
                trainSlices=[],
                validSlices=[],
                omitLabels=[],
                modelName='ciresan_n3',
                learnRate0=0.01,
                weightDecay=1e-6,
                momentum=0.9,
                maxMbPerEpoch=sys.maxint,
                nEpochs=30,
                log=None,
                outDir=None):
    """Trains a CNN using Keras.

    Some of the key parameters include:

      Xtrain, Ytrain : Tensors of features and per-pixel class labels with
                       dimensions as specified in (1),(2)
      Xvalid, Yalid :  Tensors of features and per-pixel class labels with
                       dimensions as specified in (1),(2).  Presumed to
                       be held-out data (i.e. disjoint from X/Ytrain)

      trainSlices   : A list of slice indices to include in training
                      (or [] to use all the data)
      validSlices   : A list of slice indices to include in validation
                      (or [] to use all the data)
      omitLabels    : A list of class labels whose corresponding pixel data
                      should be omitted from train and test.  If [], uses
                      all data.

      maxMbPerEpoch : The maximum number of minibatches to run in each
                      epoch (default is to process entire data volume
                      each epoch).

      log           : a logging object (for reporting status)
      outDir        : if not None, a directory where model weights
                      will be stored (highly recommended)
    """
    if not outDir: 
        if log: log.warning('No output directory specified - are you sure this is what you want?')
    elif not os.path.exists(outDir): 
        os.makedirs(outDir)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # preprocess data
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # discard unneeded slices (if any)
    if trainSlices:
        Xtrain = Xtrain[trainSlices,:,:,:]
        Ytrain = Ytrain[trainSlices,:,:]

    if validSlices:
        Xvalid = Xvalid[validSlices,:,:,:]
        Yvalid = Yvalid[validSlices,:,:]

    # rescale features to live in [0 1]
    # XXX: technically, should probably use scale factors from
    #      train volume on validation data...
    Xtrain = emlib.rescale_01(Xtrain, perChannel=True)
    Xvalid = emlib.rescale_01(Xvalid, perChannel=True)

    # Remap class labels to consecutive natural numbers.
    # Note that any pixels that should be omitted from the 
    # analysis are mapped to -1 by this function.
    Ytrain = emlib.number_classes(Ytrain, omitLabels)
    Yvalid = emlib.number_classes(Yvalid, omitLabels)


    if log: 
        log.info('training volume dimensions:   %s' % str(Xtrain.shape))
        log.info('training values min/max:      %g, %g' % (np.min(Xtrain), np.max(Xtrain)))
        log.info('training class labels:        %s' % str(np.unique(Ytrain)))
        for yi in np.unique(Ytrain):
            cnt = np.sum(Ytrain == yi)
            log.info('    class %d has %d instances' % (yi, cnt))
        log.info('')
        log.info('validation volume dimensions: %s' % str(Xvalid.shape))
        log.info('validation values min/max:    %g, %g' % (np.min(Xvalid), np.max(Xvalid)))
        log.info('validation class labels:      %s' % str(np.unique(Yvalid)))


    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # create and configure CNN
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if log: log.info('creating CNN')
    model = getattr(emm, modelName)() 
    sgd = SGD(lr=learnRate0, decay=weightDecay, momentum=momentum, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Do training
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    for epoch in range(nEpochs):
        if log: log.info('starting training epoch %d (of %d)' % (epoch, nEpochs));
        acc, loss = _train_one_epoch(model, Xtrain, Ytrain, 
                                     log=log,
                                     omitLabels=[-1,],
                                     nBatches=maxMbPerEpoch)

        if outDir: 
            # save a snapshot of current model weights
            weightFile = os.path.join(outDir, "weights_epoch_%03d.h5" % epoch) 
            if os.path.exists(weightFile): 
                os.remove(weightFile) 
            model.save_weights(weightFile)

            # also save accuracies (for diagnostic purposes)
            accFile = os.path.join(outDir, 'acc_epoch_%03d.npy' % epoch)
            np.save(accFile, acc)

        # Evaluate performance on validation data.
        if log: log.info('epoch %d complete. validating...' % epoch)
        Prob, acc = _evaluate(model, Xvalid, Yvalid, omitLabels=[-1,], log=log)
        if log: log.info('accuracy on validation data: %0.2f%%' % acc)

        if outDir: 
            estFile = os.path.join(outDir, "validation_epoch_%03d.npy" % epoch)
            np.save(estFile, Prob)


    if log: log.info('Finished!')
    return model
Example #4
0
def deploy_model(X,
                 weightsFile,
                 log=None,
                 slices=[],
                 modelName='ciresan_n3',
                 evalPct=1.0,
                 outFile=None):
    """ Applies a previously trained CNN to new data.


      Xtrain        : Tensor of features with dimensions as specified in (1)
      trainSlices   : A list of slice indices to evalute 
                      (or [] to use all the data)
      log           : a logging object (for reporting status)
      outFile       : File name where class probabilities should be stored
      evalPct       : Fraction of volume to evalute; \in [0,1]
    """

    # Setup output file/dirctory
    if not outFile:
        if log:
            log.warning(
                'No output file specified - are you sure this is what you want?'
            )
    elif not os.path.exists(os.path.dirname(outFile)):
        os.makedirs(os.path.dirname(outFile))

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # preprocess data
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if slices:
        X = X[slices, :, :]

    # rescale features to live in [0, 1]
    X = emlib.rescale_01(X, perChannel=True)

    if log: log.info('X volume dimensions: %s' % str(X.shape))
    if log: log.info('X values min/max:    %g, %g' % (np.min(X), np.max(X)))

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # initialize CNN
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if log: log.info('initializing CNN...')
    model = getattr(emm, modelName)()
    model.compile(
        optimizer='sgd',  # not used, but required by keras
        loss='categorical_crossentropy')
    model.load_weights(weightsFile)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Do it
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if log: log.info('evaluating volume...')
    Prob = _evaluate(model, X, log=log, evalPct=evalPct)
    if log: log.info('Complete!')

    if outFile:
        np.save(outFile, Prob)
        scipy.io.savemat(outFile.replace('.npy', '.mat'), {'P': Prob})
        if log: log.info('Probabilites stored in file %s' % outFile)

    return Prob