def test_rescale_01(self): X = np.random.rand(10, 3, 3, 3) Xr = emlib.rescale_01(X, perChannel=False) self.assertTrue(np.max(Xr) <= 1.0) self.assertTrue(np.min(Xr) >= 0.0) Xr = emlib.rescale_01(X, perChannel=True) self.assertTrue(np.max(Xr) <= 1.0) self.assertTrue(np.min(Xr) >= 0.0) X = np.random.rand(10, 3, 100, 100) X[:, 0, ...] *= 1000.0 Xr = emlib.rescale_01(X, perChannel=False) self.assertTrue(np.max(Xr[:, 1, ...]) <= .1) Xr = emlib.rescale_01(X, perChannel=True) self.assertTrue(np.max(Xr[:, 1, ...]) > .8)
def deploy_model(X, weightsFile, log=None, slices=[], modelName='ciresan_n3', evalPct=1.0, outFile=None): """ Applies a previously trained CNN to new data. Xtrain : Tensor of features with dimensions as specified in (1) trainSlices : A list of slice indices to evalute (or [] to use all the data) log : a logging object (for reporting status) outFile : File name where class probabilities should be stored evalPct : Fraction of volume to evalute; \in [0,1] """ # Setup output file/dirctory if not outFile: if log: log.warning('No output file specified - are you sure this is what you want?') elif not os.path.exists(os.path.dirname(outFile)): os.makedirs(os.path.dirname(outFile)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # preprocess data #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if slices: X = X[slices,:,:] # rescale features to live in [0, 1] X = emlib.rescale_01(X, perChannel=True) if log: log.info('X volume dimensions: %s' % str(X.shape)) if log: log.info('X values min/max: %g, %g' % (np.min(X), np.max(X))) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # initialize CNN #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if log: log.info('initializing CNN...') model = getattr(emm, modelName)() model.compile(optimizer='sgd', # not used, but required by keras loss='categorical_crossentropy') model.load_weights(weightsFile) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Do it #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if log: log.info('evaluating volume...') Prob = _evaluate(model, X, log=log, evalPct=evalPct) if log: log.info('Complete!') if outFile: np.save(outFile, Prob) scipy.io.savemat(outFile.replace('.npy', '.mat'), {'P' : Prob}) if log: log.info('Probabilites stored in file %s' % outFile) return Prob
def train_model(Xtrain, Ytrain, Xvalid, Yvalid, trainSlices=[], validSlices=[], omitLabels=[], modelName='ciresan_n3', learnRate0=0.01, weightDecay=1e-6, momentum=0.9, maxMbPerEpoch=sys.maxint, nEpochs=30, log=None, outDir=None): """Trains a CNN using Keras. Some of the key parameters include: Xtrain, Ytrain : Tensors of features and per-pixel class labels with dimensions as specified in (1),(2) Xvalid, Yalid : Tensors of features and per-pixel class labels with dimensions as specified in (1),(2). Presumed to be held-out data (i.e. disjoint from X/Ytrain) trainSlices : A list of slice indices to include in training (or [] to use all the data) validSlices : A list of slice indices to include in validation (or [] to use all the data) omitLabels : A list of class labels whose corresponding pixel data should be omitted from train and test. If [], uses all data. maxMbPerEpoch : The maximum number of minibatches to run in each epoch (default is to process entire data volume each epoch). log : a logging object (for reporting status) outDir : if not None, a directory where model weights will be stored (highly recommended) """ if not outDir: if log: log.warning('No output directory specified - are you sure this is what you want?') elif not os.path.exists(outDir): os.makedirs(outDir) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # preprocess data #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # discard unneeded slices (if any) if trainSlices: Xtrain = Xtrain[trainSlices,:,:,:] Ytrain = Ytrain[trainSlices,:,:] if validSlices: Xvalid = Xvalid[validSlices,:,:,:] Yvalid = Yvalid[validSlices,:,:] # rescale features to live in [0 1] # XXX: technically, should probably use scale factors from # train volume on validation data... Xtrain = emlib.rescale_01(Xtrain, perChannel=True) Xvalid = emlib.rescale_01(Xvalid, perChannel=True) # Remap class labels to consecutive natural numbers. # Note that any pixels that should be omitted from the # analysis are mapped to -1 by this function. Ytrain = emlib.number_classes(Ytrain, omitLabels) Yvalid = emlib.number_classes(Yvalid, omitLabels) if log: log.info('training volume dimensions: %s' % str(Xtrain.shape)) log.info('training values min/max: %g, %g' % (np.min(Xtrain), np.max(Xtrain))) log.info('training class labels: %s' % str(np.unique(Ytrain))) for yi in np.unique(Ytrain): cnt = np.sum(Ytrain == yi) log.info(' class %d has %d instances' % (yi, cnt)) log.info('') log.info('validation volume dimensions: %s' % str(Xvalid.shape)) log.info('validation values min/max: %g, %g' % (np.min(Xvalid), np.max(Xvalid))) log.info('validation class labels: %s' % str(np.unique(Yvalid))) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # create and configure CNN #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if log: log.info('creating CNN') model = getattr(emm, modelName)() sgd = SGD(lr=learnRate0, decay=weightDecay, momentum=momentum, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Do training #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for epoch in range(nEpochs): if log: log.info('starting training epoch %d (of %d)' % (epoch, nEpochs)); acc, loss = _train_one_epoch(model, Xtrain, Ytrain, log=log, omitLabels=[-1,], nBatches=maxMbPerEpoch) if outDir: # save a snapshot of current model weights weightFile = os.path.join(outDir, "weights_epoch_%03d.h5" % epoch) if os.path.exists(weightFile): os.remove(weightFile) model.save_weights(weightFile) # also save accuracies (for diagnostic purposes) accFile = os.path.join(outDir, 'acc_epoch_%03d.npy' % epoch) np.save(accFile, acc) # Evaluate performance on validation data. if log: log.info('epoch %d complete. validating...' % epoch) Prob, acc = _evaluate(model, Xvalid, Yvalid, omitLabels=[-1,], log=log) if log: log.info('accuracy on validation data: %0.2f%%' % acc) if outDir: estFile = os.path.join(outDir, "validation_epoch_%03d.npy" % epoch) np.save(estFile, Prob) if log: log.info('Finished!') return model
def deploy_model(X, weightsFile, log=None, slices=[], modelName='ciresan_n3', evalPct=1.0, outFile=None): """ Applies a previously trained CNN to new data. Xtrain : Tensor of features with dimensions as specified in (1) trainSlices : A list of slice indices to evalute (or [] to use all the data) log : a logging object (for reporting status) outFile : File name where class probabilities should be stored evalPct : Fraction of volume to evalute; \in [0,1] """ # Setup output file/dirctory if not outFile: if log: log.warning( 'No output file specified - are you sure this is what you want?' ) elif not os.path.exists(os.path.dirname(outFile)): os.makedirs(os.path.dirname(outFile)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # preprocess data #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if slices: X = X[slices, :, :] # rescale features to live in [0, 1] X = emlib.rescale_01(X, perChannel=True) if log: log.info('X volume dimensions: %s' % str(X.shape)) if log: log.info('X values min/max: %g, %g' % (np.min(X), np.max(X))) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # initialize CNN #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if log: log.info('initializing CNN...') model = getattr(emm, modelName)() model.compile( optimizer='sgd', # not used, but required by keras loss='categorical_crossentropy') model.load_weights(weightsFile) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Do it #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if log: log.info('evaluating volume...') Prob = _evaluate(model, X, log=log, evalPct=evalPct) if log: log.info('Complete!') if outFile: np.save(outFile, Prob) scipy.io.savemat(outFile.replace('.npy', '.mat'), {'P': Prob}) if log: log.info('Probabilites stored in file %s' % outFile) return Prob