Exemple #1
0
def _load_data(xName, yName, tileRadius, onlySlices, omitLabels=None):
    """Loads data sets and does basic preprocessing.
    """
    X = emlib.load_cube(xName, np.float32)

    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert(X.shape[0] < X.shape[1])
    assert(X.shape[0] < X.shape[2])

    if onlySlices: 
        X = X[onlySlices,:,:] 
    print('[emCNN]:    data shape: %s' % str(X.shape))

    X = emlib.mirror_edges(X, tileRadius)

    # Scale data to live in [0 1].
    # *** ASSUMPTION *** original data is in [0 255]
    if np.max(X) > 1:
        X = X / 255.
    print('[emCNN]:    data min/max: %0.2f / %0.2f' % (np.min(X), np.max(X)))

    # Also obtain labels file (if provided - e.g. in deploy mode
    # we may not have labels...)
    if yName: 
        Y = emlib.load_cube(yName, np.float32)

        if onlySlices: 
            Y = Y[onlySlices,:,:] 
        print('[emCNN]:    labels shape: %s' % str(Y.shape))

        # ** ASSUMPTION **: Special case code for membrane detection / ISBI volume
        yAll = np.unique(Y)
        yAll.sort()
        if (len(yAll) == 2) and (yAll[0] == 0) and (yAll[1] == 255):
            print('[emCNN]:    ISBI-style labels detected.  converting 0->1, 255->0')
            Y[Y==0] = 1;      #  membrane
            Y[Y==255] = 0;    #  non-membrane

        # Labels must be natural numbers (contiguous integers starting at 0)
        # because they are mapped to indices at the output of the network.
        # This next bit of code remaps the native y values to these indices.
        omitLabels, pctOmitted = _omit_labels(Y, omitLabels)
        Y = emlib.fix_class_labels(Y, omitLabels).astype(np.int32)

        print('[emCNN]:    yAll is %s' % str(np.unique(Y)))
        print('[emCNN]:    will use %0.2f%% of volume' % (100.0 - pctOmitted))

        Y = emlib.mirror_edges(Y, tileRadius)

        return X, Y
    else:
        return X
Exemple #2
0
def _load_data(xName, yName, args, tileSize):
    """Loads data sets and does basic preprocessing.
    """
    X = emlib.load_cube(xName, np.float32)

    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert (X.shape[0] < X.shape[1])
    assert (X.shape[0] < X.shape[2])
    print('[emCNN]:    data shape: %s' % str(X.shape))

    if args.onlySlices:
        X = X[args.onlySlices, :, :]

    X = emlib.mirror_edges(X, tileSize)

    # Scale data to live in [0 1].
    # I'm assuming original data is in [0 255]
    if np.max(X) > 1:
        X = X / 255.

    # Also obtain labels file (if provided - e.g. in deploy mode
    # we may not have labels...)
    if yName:
        Y = emlib.load_cube(yName, np.float32)

        if args.onlySlices:
            Y = Y[args.onlySlices, :, :]

        # Labels must be natural numbers (contiguous integers starting at 0)
        # because they are mapped to indices at the output of the network.
        # This next bit of code remaps the native y values to these indices.
        Y = emlib.fix_class_labels(Y, args.omitLabels)

        print('[emCNN]:    yAll is %s' % str(np.unique(Y)))
        print('[emCNN]:    will use %0.2f%% of volume' %
              (100. * np.sum(Y >= 0) / numel(Y)))

        Y = emlib.mirror_edges(Y, tileSize)
    else:
        Y = None

    return X, Y
Exemple #3
0
def _load_data(xName, yName, args, tileSize):
    """Loads data sets and does basic preprocessing.
    """
    X = emlib.load_cube(xName, np.float32)

    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert(X.shape[0] < X.shape[1])
    assert(X.shape[0] < X.shape[2])
    print('[emCNN]:    data shape: %s' % str(X.shape))

    if args.onlySlices: 
        X = X[args.onlySlices,:,:] 

    X = emlib.mirror_edges(X, tileSize)

    # Scale data to live in [0 1].
    # I'm assuming original data is in [0 255]
    if np.max(X) > 1:
        X = X / 255.

    # Also obtain labels file (if provided - e.g. in deploy mode
    # we may not have labels...)
    if yName: 
        Y = emlib.load_cube(yName, np.float32)

        if args.onlySlices: 
            Y = Y[args.onlySlices,:,:] 

        # Labels must be natural numbers (contiguous integers starting at 0)
        # because they are mapped to indices at the output of the network.
        # This next bit of code remaps the native y values to these indices.
        Y = emlib.fix_class_labels(Y, args.omitLabels)

        print('[emCNN]:    yAll is %s' % str(np.unique(Y)))
        print('[emCNN]:    will use %0.2f%% of volume' % (100.*np.sum(Y>=0)/numel(Y)))

        Y = emlib.mirror_edges(Y, tileSize)
    else:
        Y = None

    return X, Y
Exemple #4
0
    batchDim = emlib.infer_data_dimensions(netFn)
    print('[train]: batch shape: %s' % str(batchDim))

    if len(args.snapPrefix):
        outDir = args.snapPrefix
    else:
        outDir = str(solverParam.snapshot_prefix)  # unicode -> str
    if not os.path.isdir(outDir):
        os.mkdir(outDir)

    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    print('[train]: loading file: %s' % args.trainFileName)
    X = emlib.load_cube(args.trainFileName, np.float32)
    print('[train]: loading file: %s' % args.labelsFileName)
    Y = emlib.load_cube(args.labelsFileName, np.float32)

    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert (X.shape[0] < X.shape[1])
    assert (X.shape[0] < X.shape[2])

    # Class labels must be natural numbers (contiguous integers starting at 0)
    # because they are mapped to indices at the output of the network.
    # This next bit of code remaps the native y values to these indices.
    yAll = np.sort(np.unique(Y))
    omitLabels = eval(args.omitLabels)
    yAll = [y for y in yAll if y not in omitLabels]
    Ytmp = -1 * np.ones(
Exemple #5
0
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setFormatter(logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s]  %(message)s'))
    logger.addHandler(ch)

    args = _train_mode_args()


    # Use command line args to override default args for train_model().
    # Note to self: the first co_argcount varnames are the 
    #               function's parameters.
    validArgs = train_model.__code__.co_varnames[0:train_model.__code__.co_argcount]
    cmdLineArgs = dict_subset(vars(args), validArgs)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # load training and validation volumes
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Xtrain = emlib.load_cube(args.emTrainFile, addChannel=True)
    Ytrain = emlib.load_cube(args.labelsTrainFile, addChannel=False)
        
    Xvalid = emlib.load_cube(args.emValidFile, addChannel=True)
    Yvalid = emlib.load_cube(args.labelsValidFile, addChannel=False)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # do it
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    model = train_model(Xtrain, Ytrain, Xvalid, Yvalid, log=logger, **cmdLineArgs)


# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
Exemple #6
0
    return args



if __name__ == "__main__":

    # setup logging 
    logger = logging.getLogger("deploy_model")
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setFormatter(logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s]  %(message)s'))
    logger.addHandler(ch)


    args = _deploy_mode_args()

    # Use command line args to override default args for train_model().
    # Note to self: the first co_argcount varnames are the 
    #               function's parameters.
    from train import dict_subset
    validArgs = deploy_model.__code__.co_varnames[0:deploy_model.__code__.co_argcount]
    cmdLineArgs = dict_subset(vars(args), validArgs)

    # load data volume
    X = emlib.load_cube(args.emFile)

    # do it
    Prob = deploy_model(X, args.weightFile, log=logger, **cmdLineArgs)

# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
Exemple #7
0
    return args


if __name__ == "__main__":

    # setup logging
    logger = logging.getLogger("deploy_model")
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setFormatter(
        logging.Formatter('[%(asctime)s:%(name)s:%(levelname)s]  %(message)s'))
    logger.addHandler(ch)

    args = _deploy_mode_args()

    # Use command line args to override default args for train_model().
    # Note to self: the first co_argcount varnames are the
    #               function's parameters.
    from train import dict_subset
    validArgs = deploy_model.__code__.co_varnames[0:deploy_model.__code__.
                                                  co_argcount]
    cmdLineArgs = dict_subset(vars(args), validArgs)

    # load data volume
    X = emlib.load_cube(args.emFile)

    # do it
    Prob = deploy_model(X, args.weightFile, log=logger, **cmdLineArgs)

# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
Exemple #8
0
def main(args):
    tileRadius = np.floor(args.tileSize/2)
    nMiniBatch = 1000 # here, a "mini-batch" specifies LMDB transaction size

    # make sure we don't clobber an existing output
    if os.path.exists(args.outDir):
        raise RuntimeError('Output path "%s" already exists; please move out of the way and try again' % args.outDir)


    # load the data volumes (EM image and labels, if any)
    print('[make_lmdb]: loading EM data file: %s' % args.emFileName)
    X = emlib.load_cube(args.emFileName, np.float32)

    if args.labelsFileName: 
        print('[make_lmdb]: loading labels file: %s' % args.labelsFileName) 
        Y = emlib.load_cube(args.labelsFileName, np.float32)
        Y = emlib.fix_class_labels(Y, eval(args.omitLabels))
        assert(Y.shape == X.shape)
    else:
        print('[make_lmdb]: no labels file; assuming this is a test volume')
        Y = np.zeros(X.shape)


    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert(X.shape[0] < X.shape[1])
    assert(X.shape[0] < X.shape[2])

    # Identify the subset of the data to use for training.
    # (default is to use it all)
    if len(args.slicesExpr): 
        sliceIdx = eval(args.slicesExpr) 
        X = X[sliceIdx, :, :]  # python puts the z dimension first... 
        Y = Y[sliceIdx, :, :]
    X = X.astype(np.uint8)  # critical!! otherwise, Caffe just flails...

    print('[make_lmdb]: EM volume shape: %s' % str(X.shape))
    print('[make_lmdb]: yAll is %s' % np.unique(Y))
    print('[make_lmdb]: %0.2f%% pixels will be omitted' % (100.0*np.sum(Y==-1)/numel(Y)))
    print('[make_lmdb]: writing results to: %s' % args.outDir)
    print('')
    sys.stdout.flush()

    # Create the output database.
    # Multiply the actual size by a fudge factor to get a safe upper bound
    dbSize = (X.nbytes * args.tileSize * args.tileSize + Y.nbytes) * 10
    env = lmdb.open(args.outDir, map_size=dbSize)

    # Extract all possible tiles.
    # This corresponds to extracting one "epoch" worth of tiles.
    tileId = 0
    lastChatter = -1
    tic = time.time()
    yCnt = np.zeros(sum(np.unique(Y) >= 0))

    if np.any(Y > 0): 
        # generates a balanced training data set (subsamples and shuffles)
        it = emlib.stratified_interior_pixel_generator(Y, tileRadius, nMiniBatch, omitLabels=[-1])
    else:
        # enumerates all possible tiles in order (no shuffling)
        it = emlib.interior_pixel_generator(X, tileRadius, nMiniBatch)


    for Idx, epochPct in it: 
        # respect upper bound on number of examples
        if tileId > args.maxNumExamples: 
            print('[make_lmdb]: stopping at %d (max number of examples reached\n)' % (tileId-1))
            break

        # Each mini-batch will be added to the database as a single transaction.
        with env.begin(write=True) as txn:
            # Translate indices Idx -> tiles Xi and labels yi.
            for jj in range(Idx.shape[0]):
                yi = Y[ Idx[jj,0], Idx[jj,1], Idx[jj,2] ]
                yi = int(yi)
                a = Idx[jj,1] - tileRadius
                b = Idx[jj,1] + tileRadius + 1
                c = Idx[jj,2] - tileRadius
                d = Idx[jj,2] + tileRadius + 1
                Xi = X[ Idx[jj,0], a:b, c:d ]
                assert(Xi.shape == (args.tileSize, args.tileSize))

                datum = caffe.proto.caffe_pb2.Datum()
                datum.channels = 1
                datum.height = Xi.shape[0]
                datum.width = Xi.shape[1]
                datum.data = Xi.tostring() # use tobytes() for newer numpy
                datum.label = yi
                strId = '{:08}'.format(tileId)

                txn.put(strId.encode('ascii'), datum.SerializeToString())
                tileId += 1
                yCnt[yi] += 1

                # check early termination conditions
                if tileId > args.maxNumExamples:
                    break

        #if np.floor(epochPct) > lastChatter: 
        print('[make_lmdb] %% %0.2f done (%0.2f min;   yCnt=%s)' % ((100*epochPct), (time.time() - tic)/60, str(yCnt)))
        lastChatter = epochPct
Exemple #9
0
    netParam = caffe_pb2.NetParameter()
    text_format.Merge(open(netFn).read(), netParam)

    batchDim = emlib.infer_data_dimensions(netFn)

    if len(args.outFileNameY):
        outFileNameY = args.outFileNameY
    else:
        outFileNameY = os.path.join(
            os.path.split(args.netFile)[0],
            'Yhat_' + os.path.split(args.dataFileName)[-1])

    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    X = emlib.load_cube(args.dataFileName, np.float32)

    # mirror edges so that every pixel in the original data set can act
    # as the center pixel of some tile
    borderSize = int(batchDim[2] / 2)
    X = emlib.mirror_edges(X, borderSize)

    if len(args.evalSliceExpr):  # optional: pare down to a subset of slices
        idx = eval(args.evalSliceExpr)
        X = X[idx, :, :]

    # pixels that are sufficiently bright are trivial to classify
    # and can be omitted.
    Mask = np.ones(X.shape, dtype=np.bool)
    Mask[X > args.maxBrightness] = False
Exemple #10
0
    print('[deploy]: batch shape: %s' % str(batchDim))

    if len(args.outFileNameY):
        outFileNameY = args.outFileNameY
    else:
        outFileNameY = os.path.join(
            os.path.split(args.dataFileName)[0],
            'Yhat_' + os.path.split(args.dataFileName)[-1])
    outFileNameX = args.outFileNameX
    print('[deploy]: probability output file: %s' % outFileNameY)
    print('[deploy]: features output file:    %s' % outFileNameX)

    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    X = emlib.load_cube(args.dataFileName, np.float32)

    # mirror edges of images so that every pixel in the original data set can act
    # as a center pixel of some tile
    borderSize = int(batchDim[2] / 2)
    X = emlib.mirror_edges(X, borderSize)

    if len(args.evalSliceExpr):  # optional: pare down to a subset of slices
        idx = eval(args.evalSliceExpr)
        X = X[idx, :, :]
    print('[deploy]: data shape: %s' % str(X.shape))

    # There may be reasons for not evaluating certain pixels.
    # The mask allows the caller to specify which pixels to omit.
    if len(args.maskFileName):
        Mask = emlib.load_cube(args.maskFileName, dtype=np.bool)
Exemple #11
0
 
    batchDim = emlib.infer_data_dimensions(netFn)
    print('[deploy]: batch shape: %s' % str(batchDim))

    if len(args.outFileNameY):
        outFileNameY = args.outFileNameY
    else:
        outFileNameY = os.path.join(os.path.split(args.dataFileName)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1])
    outFileNameX = args.outFileNameX
    print('[deploy]: probability output file: %s' % outFileNameY)
    print('[deploy]: features output file:    %s' % outFileNameX)
 
    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    X = emlib.load_cube(args.dataFileName, np.float32)

    # mirror edges of images so that every pixel in the original data set can act
    # as a center pixel of some tile    
    borderSize = int(batchDim[2]/2)
    X = emlib.mirror_edges(X, borderSize)

    if len(args.evalSliceExpr):  # optional: pare down to a subset of slices
        idx = eval(args.evalSliceExpr)
        X = X[idx,:,:]
    print('[deploy]: data shape: %s' % str(X.shape))
    
    # There may be reasons for not evaluating certain pixels.
    # The mask allows the caller to specify which pixels to omit.
    if len(args.maskFileName):
        Mask = emlib.load_cube(args.maskFileName, dtype=np.bool)
    args.trainSlices = eval(args.trainSlices)
    args.validSlices = eval(args.validSlices)
    args.testSlices = eval(args.testSlices)

    return args



if __name__ == "__main__":
    args = get_args();

    #outDir = os.path.split(args.dataFileName)[0]
    if not os.path.isdir(args.outDir):
        os.mkdir(args.outDir)

    X = emlib.load_cube(args.dataFileName, np.uint8)
    Y = emlib.load_cube(args.labelsFileName, np.uint8)

    # remap Y labels from ISBI convention to membrane-vs-non-membrane
    Y[Y==0] = 1;      #  membrane 
    Y[Y==255] = 0;    #  non-membrane

    # change type of Y so can use -1 as a value.
    Y = Y.astype(np.int8)

    Xtrain = X[args.trainSlices,:,:]; Ytrain = Y[args.trainSlices,:,:]
    Xvalid = X[args.validSlices,:,:]; Yvalid = Y[args.validSlices,:,:]
    Xtest = X[args.testSlices,:,:];   Ytest = Y[args.testSlices,:,:]

    # brightness thresholding 
    thresh = mquantiles(np.concatenate((Xtrain[Ytrain==1], Xvalid[Yvalid==1])), args.brightQuant)
Exemple #13
0
    args.validSlices = eval(args.validSlices)
    args.testSlices = eval(args.testSlices)

    return args



if __name__ == "__main__":
    args = get_args();

    #outDir = os.path.split(args.dataFileName)[0]
    if not os.path.isdir(args.outDir):
        os.mkdir(args.outDir)


    X = emlib.load_cube(args.dataFileName, np.uint8)
    Y = emlib.load_cube(args.labelsFileName, np.uint8)

    # remap Y labels from ISBI convention to membrane-vs-non-membrane
    Y[Y==0] = 1;      #  membrane 
    Y[Y==255] = 0;    #  non-membrane

    # change type of Y so can use -1 as a value.
    Y = Y.astype(np.int8)

    Xtrain = X[args.trainSlices,:,:]; Ytrain = Y[args.trainSlices,:,:]
    Xvalid = X[args.validSlices,:,:]; Yvalid = Y[args.validSlices,:,:]
    Xtest = X[args.testSlices,:,:];   Ytest = Y[args.testSlices,:,:]

    # brightness thresholding 
    thresh = mquantiles(np.concatenate((Xtrain[Ytrain==1], Xvalid[Yvalid==1])), args.brightQuant)
Exemple #14
0
    
    batchDim = emlib.infer_data_dimensions(netFn)
    print('[train]: batch shape: %s' % str(batchDim))

    if len(args.snapPrefix):
        outDir = args.snapPrefix
    else:
        outDir = str(solverParam.snapshot_prefix)   # unicode -> str
    if not os.path.isdir(outDir):
        os.mkdir(outDir)
    
    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    print('[train]: loading file: %s' % args.trainFileName)
    X = emlib.load_cube(args.trainFileName, np.float32)
    print('[train]: loading file: %s' % args.labelsFileName)
    Y = emlib.load_cube(args.labelsFileName, np.float32)

    # usually we expect fewer slices in Z than pixels in X or Y.
    # Make sure the dimensions look ok before proceeding.
    assert(X.shape[0] < X.shape[1])
    assert(X.shape[0] < X.shape[2])

    # Class labels must be natural numbers (contiguous integers starting at 0)
    # because they are mapped to indices at the output of the network.
    # This next bit of code remaps the native y values to these indices.
    yAll = np.sort(np.unique(Y))
    omitLabels = eval(args.omitLabels)
    yAll = [y for y in yAll if y not in omitLabels]
    Ytmp = -1*np.ones(Y.shape, dtype=Y.dtype)   # default label is -1, which is omitted from evaluation
Exemple #15
0
    text_format.Merge(open(netFn).read(), netParam)
    
    batchDim = emlib.infer_data_dimensions(netFn)
    print('[train]: batch shape: %s' % str(batchDim))

    if len(args.snapPrefix):
        outDir = args.snapPrefix
    else:
        outDir = str(solverParam.snapshot_prefix)   # unicode -> str
    if not os.path.isdir(outDir):
        os.mkdir(outDir)
    
    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    X = emlib.load_cube(args.trainFileName, np.float32)
    Y = emlib.load_cube(args.labelsFileName, np.float32)

    # Class labels must be natural numbers (contiguous integers starting at 0)
    # because they are mapped to indices at the output of the network.
    # This next bit of code remaps the native y values to these indices.
    yAll = np.sort(np.unique(Y))
    Yhat = np.zeros(Y.shape, dtype=Y.dtype)
    for yIdx, y in enumerate(yAll):
        Yhat[Y==y] = yIdx
    Y = Yhat

    # mirror edges of images so that every pixel in the original data set can act
    # as a center pixel of some tile    
    borderSize = int(batchDim[2]/2)
    X = emlib.mirror_edges(X, borderSize)
Exemple #16
0
    netParam = caffe_pb2.NetParameter()
    text_format.Merge(open(netFn).read(), netParam)
 
    batchDim = emlib.infer_data_dimensions(netFn)

    if len(args.outFileNameY):
        outFileNameY = args.outFileNameY
    else:
        outFileNameY = os.path.join(os.path.split(args.netFile)[0], 'Yhat_' + os.path.split(args.dataFileName)[-1])



    #----------------------------------------
    # Load and preprocess data set
    #----------------------------------------
    X = emlib.load_cube(args.dataFileName, np.float32)

    # mirror edges so that every pixel in the original data set can act
    # as the center pixel of some tile    
    borderSize = int(batchDim[2]/2)
    X = emlib.mirror_edges(X, borderSize)

    if len(args.evalSliceExpr):  # optional: pare down to a subset of slices
        idx = eval(args.evalSliceExpr)
        X = X[idx,:,:]

    # pixels that are sufficiently bright are trivial to classify
    # and can be omitted.
    Mask = np.ones(X.shape, dtype=np.bool)
    Mask[X > args.maxBrightness] = False