Esempio n. 1
0
    def __init__(
            self,
            volpath,
            ext='.npz',
            nb_restart_cycle=None,  # number of files to restart after
            name='single_vol',  # name
            fixed_vol_size=True,  # assumes each volume is fixed size
    ):

        # get filenames at given paths
        volfiles = _get_file_list(volpath, ext, vol_rand_seed)
        nb_files = len(volfiles)
        assert nb_files > 0, "Could not find any files at %s with extension %s" % (
            volpath, ext)

        # set up restart cycle for volume files --
        # i.e. after how many volumes do we restart
        if nb_restart_cycle is None:
            nb_restart_cycle = nb_files

        # compute subvolume split
        vol_data = _load_medical_volume(os.path.join(volpath, volfiles[0]),
                                        ext)
        # process volume
        if data_proc_fn is not None:
            vol_data = data_proc_fn(vol_data)
            [f for f in _npz_headers(npz, namelist=['vol_data.npy'])][0][1]

        nb_patches_per_vol = 1
        if fixed_vol_size and (patch_size is not None) and all(
                f is not None for f in patch_size):
            nb_patches_per_vol = np.prod(
                pl.gridsize(vol_data.shape, patch_size, patch_stride))

        assert nb_restart_cycle <= (nb_files * nb_patches_per_vol), \
            '%s restart cycle (%s) too big (%s) in %s' % \
            (name, nb_restart_cycle, nb_files * nb_patches_per_vol, volpath)
Esempio n. 2
0
def vol(
        volpath,
        ext='.npz',
        batch_size=1,
        expected_nb_files=-1,
        expected_files=None,
        data_proc_fn=None,  # processing function that takes in one arg (the volume)
        relabel=None,  # relabeling array
        nb_labels_reshape=0,  # reshape to categorial format for keras, need # labels
        keep_vol_size=False,  # whether to keep the volume size on categorical resizing
        name='single_vol',  # name, optional
        nb_restart_cycle=None,  # number of files to restart after
        patch_size=None,  # split the volume in patches? if so, get patch_size
        patch_stride=1,  # split the volume in patches? if so, get patch_stride
        collapse_2d=None,
        extract_slice=None,
        force_binary=False,
        nb_feats=1,
        patch_rand=False,
        patch_rand_seed=None,
        vol_rand_seed=None,
        binary=False,
        yield_incomplete_final_batch=True,
        verbose=False):
    """
    generator for single volume (or volume patches) from a list of files

    simple volume generator that loads a volume (via npy/mgz/nii/niigz), processes it,
    and prepares it for keras model formats

    if a patch size is passed, breaks the volume into patches and generates those
    """

    # get filenames at given paths
    volfiles = _get_file_list(volpath, ext, vol_rand_seed)
    nb_files = len(volfiles)
    assert nb_files > 0, "Could not find any files at %s with extension %s" % (
        volpath, ext)

    # compute subvolume split
    vol_data = _load_medical_volume(os.path.join(volpath, volfiles[0]), ext)

    # process volume
    if data_proc_fn is not None:
        vol_data = data_proc_fn(vol_data)

    nb_patches_per_vol = 1
    if patch_size is not None and all(f is not None for f in patch_size):
        if relabel is None and len(patch_size) == (len(vol_data.shape) - 1):
            tmp_patch_size = [f for f in patch_size]
            patch_size = [*patch_size, vol_data.shape[-1]]
            patch_stride = [f for f in patch_stride]
            patch_stride = [*patch_stride, vol_data.shape[-1]]
        assert len(vol_data.shape) == len(
            patch_size), "Vol dims %d are  not equal to patch dims %d" % (len(
                vol_data.shape), len(patch_size))
        nb_patches_per_vol = np.prod(
            pl.gridsize(vol_data.shape, patch_size, patch_stride))
    if nb_restart_cycle is None:
        print("setting restart cycle to", nb_files)
        nb_restart_cycle = nb_files

    assert nb_restart_cycle <= (nb_files * nb_patches_per_vol), \
        '%s restart cycle (%s) too big (%s) in %s' % \
        (name, nb_restart_cycle, nb_files * nb_patches_per_vol, volpath)

    # check the number of files matches expected (if passed)
    if expected_nb_files >= 0:
        assert nb_files == expected_nb_files, \
            "number of files do not match: %d, %d" % (nb_files, expected_nb_files)
    if expected_files is not None:
        if not (volfiles == expected_files):
            print(
                'file lists did not match. You should probably stop execution.',
                file=sys.stderr)
            print(len(volfiles), len(expected_files))

    if verbose:
        print('nb_restart_cycle:', nb_restart_cycle)

    # iterate through files
    fileidx = -1
    batch_idx = -1
    feat_idx = 0
    batch_shape = None
    while 1:
        fileidx = np.mod(fileidx + 1, nb_restart_cycle)
        if verbose and fileidx == 0:
            print('starting %s cycle' % name)

        # read next file (circular)

        try:
            if verbose:
                print('opening %s' % os.path.join(volpath, volfiles[fileidx]))
            file_name = os.path.join(volpath, volfiles[fileidx])
            vol_data = _load_medical_volume(file_name, ext, verbose)
            # print(file_name, " was loaded", vol_data.shape)
        except:
            debug_error_msg = "#files: %d, fileidx: %d, nb_restart_cycle: %d. error: %s"
            print(
                debug_error_msg %
                (len(volfiles), fileidx, nb_restart_cycle, sys.exc_info()[0]))
            raise

        # process volume
        if data_proc_fn is not None:
            vol_data = data_proc_fn(vol_data)

        # the original segmentation files have non-sequential relabel (i.e. some relabel are
        # missing to avoid exploding our model, we only care about the relabel that exist.
        if relabel is not None:
            vol_data = _relabel(vol_data, relabel)

        # split volume into patches if necessary and yield
        if patch_size is None:
            this_patch_size = vol_data.shape
            patch_stride = [1 for f in this_patch_size]

        else:
            this_patch_size = [f for f in patch_size]
            for pi, p in enumerate(this_patch_size):
                if p is None:
                    this_patch_size[pi] = vol_data.shape[pi]
                    patch_stride[pi] = 1

        assert ~np.any(
            np.isnan(vol_data)), "Found a nan for %s" % volfiles[fileidx]
        assert np.all(
            np.isfinite(vol_data)), "Found a inf for %s" % volfiles[fileidx]

        patch_gen = patch(vol_data,
                          this_patch_size,
                          patch_stride=patch_stride,
                          nb_labels_reshape=nb_labels_reshape,
                          batch_size=1,
                          infinite=False,
                          collapse_2d=collapse_2d,
                          patch_rand=patch_rand,
                          patch_rand_seed=patch_rand_seed,
                          keep_vol_size=keep_vol_size)

        empty_gen = True
        patch_idx = -1
        for lpatch in patch_gen:
            empty_gen = False
            patch_idx += 1

            # add to feature
            if np.mod(feat_idx, nb_feats) == 0:
                vol_data_feats = lpatch

            else:
                vol_data_feats = np.concatenate([vol_data_feats, lpatch],
                                                np.ndim(lpatch) - 1)
            feat_idx += 1

            if binary:
                vol_data_feats = vol_data_feats.astype(bool)

            if np.mod(feat_idx, nb_feats) == 0:
                feats_shape = vol_data_feats[1:]

                # yield previous batch if the new volume has different patch sizes
                if batch_shape is not None and (feats_shape != batch_shape):
                    batch_idx = -1
                    batch_shape = None
                    print('switching patch sizes')
                    yield np.vstack(vol_data_batch)

                # add to batch of volume data, unless the batch is currently empty
                if batch_idx == -1:
                    vol_data_batch = [vol_data_feats]
                    batch_shape = vol_data_feats[1:]
                else:
                    vol_data_batch = [*vol_data_batch, vol_data_feats]

                # yield patch
                batch_idx += 1
                batch_done = batch_idx == batch_size - 1
                files_done = np.mod(fileidx + 1, nb_restart_cycle) == 0
                final_batch = yield_incomplete_final_batch and files_done and patch_idx == (
                    nb_patches_per_vol - 1)
                if final_batch:  # verbose and
                    print('last batch in %s cycle %d. nb_batch:%d' %
                          (name, fileidx, len(vol_data_batch)))

                if batch_done or final_batch:
                    batch_idx = -1
                    q = np.vstack(vol_data_batch)
                    yield q

        if empty_gen:
            raise ValueError('Patch generator was empty for file %s',
                             volfiles[fileidx])