Example #1
0
def process_volumes(parameters):
    '''process original volumes and save into nifti format'''

    import os
    import copy
    import numpy as np
    from scipy import interpolate
    from types import SimpleNamespace
    from voluseg._tools.load_volume import load_volume
    from voluseg._tools.save_volume import save_volume
    from voluseg._tools.plane_name import plane_name
    from voluseg._tools.constants import ori, ali, nii, hdf
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    p = SimpleNamespace(**parameters)

    volume_nameRDD = evenly_parallelize(
        p.volume_names0 if p.planes_packed else p.volume_names)
    for color_i in range(p.n_colors):
        fullname_volmean = os.path.join(p.dir_output, 'volume%d' % (color_i))
        if os.path.isfile(fullname_volmean + hdf):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        os.makedirs(dir_volume, exist_ok=True)

        def initial_processing(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            # try:
            # load input volumes
            fullname_input = os.path.join(p.dir_input, name_volume)
            volume = load_volume(fullname_input + p.ext)

            # get number of planes
            if p.planes_packed:
                name_volume0 = copy.deepcopy(name_volume)
                volume0 = copy.deepcopy(volume)
                lp = len(volume0)
            else:
                lp = 1

            for pi in range(lp):
                if p.planes_packed:
                    name_volume = plane_name(name_volume0, pi)
                    volume = volume0[pi]

                fullname_volume = os.path.join(dir_volume, name_volume)
                # skip processing if volume exists
                if load_volume(fullname_volume+ori+nii) is not None \
                        or load_volume(fullname_volume+ali+hdf):
                    continue

                if volume.ndim == 2:
                    volume = volume[None, :, :]
                volume = volume.transpose(2, 1, 0)

                # get dimensions
                lx, ly, lz = volume.shape

                # split two-color volumes into two halves
                if p.n_colors == 2:
                    # ensure that two-frames have even number of y-dim voxels
                    assert (ly % 2 == 0)
                    ly //= 2
                    if color_i == 0:
                        volume = volume[:, :ly, :]
                    elif color_i == 1:
                        volume = volume[:, ly:, :]

                # downsample in the x-y if specified
                if p.ds > 1:
                    if (lx % p.ds) or (ly % p.ds):
                        lx -= (lx % p.ds)
                        ly -= (ly % p.ds)
                        volume = volume[:lx, :ly, :]

                    # make grid for computing downsampled values
                    sx_ds = np.arange(0.5, lx, p.ds)
                    sy_ds = np.arange(0.5, ly, p.ds)
                    xy_grid_ds = np.dstack(
                        np.meshgrid(sx_ds, sy_ds, indexing='ij'))

                    # get downsampled volume
                    volume_ds = np.zeros((len(sx_ds), len(sy_ds), lz))
                    for zi in np.arange(lz):
                        interpolation_fx = interpolate.RegularGridInterpolator(
                            (np.arange(lx), np.arange(ly)),
                            volume[:, :, zi],
                            method='linear')
                        volume_ds[:, :, zi] = interpolation_fx(xy_grid_ds)

                    volume = volume_ds

                # pad planes as necessary
                if p.registration and p.planes_pad:
                    volume = np.lib.pad(
                        volume, ((0, 0), (0, 0), (p.planes_pad, p.planes_pad)),
                        'constant',
                        constant_values=(np.percentile(volume, 1), ))

                # save volume in output directory
                if p.registration:
                    save_volume(fullname_volume + ori + nii, volume,
                                p.affine_mat)
                else:
                    volume = volume.T
                    save_volume(fullname_volume + ali + hdf, volume)

        volume_nameRDD.foreach(initial_processing)
Example #2
0
def mask_volumes(parameters):
    '''create intensity mask from the average registered volume'''

    import os
    import h5py
    import pyspark
    import numpy as np
    from scipy import stats
    from sklearn import mixture
    from skimage import morphology
    from types import SimpleNamespace
    from scipy.ndimage.filters import median_filter
    from voluseg._tools.ball import ball
    from voluseg._tools.constants import ali, hdf
    from voluseg._tools.load_volume import load_volume
    from voluseg._tools.clean_signal import clean_signal
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    # set up spark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    # set up matplotlib
    import warnings
    import matplotlib
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    p = SimpleNamespace(**parameters)

    # compute mean timeseries and ranked dff
    fullname_timemean = os.path.join(p.dir_output, 'mean_timeseries')
    volume_nameRDD = evenly_parallelize(p.volume_names)
    if not os.path.isfile(fullname_timemean + hdf):
        dff_rank = np.zeros(p.lt)
        mean_timeseries_raw = np.zeros((p.n_colors, p.lt))
        mean_timeseries = np.zeros((p.n_colors, p.lt))
        mean_baseline = np.zeros((p.n_colors, p.lt))
        for color_i in range(p.n_colors):
            dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))

            def mean_volume(tuple_name_volume):
                name_volume = tuple_name_volume[1]
                fullname_volume = os.path.join(dir_volume, name_volume)
                return np.mean(load_volume(fullname_volume + ali + hdf),
                               dtype='float64')

            mean_timeseries_raw[color_i] = volume_nameRDD.map(
                mean_volume).collect()
            time, base = clean_signal(parameters, mean_timeseries_raw[color_i])
            mean_timeseries[color_i], mean_baseline[color_i] = time, base
            dff_rank += stats.rankdata((time - base) / time)

        # get high delta-f/f timepoints
        if not p.nt:
            timepoints = np.range(p.lt)
        else:
            timepoints = np.sort(np.argsort(dff_rank)[::-1][:p.nt])

        with h5py.File(fullname_timemean + hdf, 'w') as file_handle:
            file_handle['mean_timeseries_raw'] = mean_timeseries_raw
            file_handle['mean_timeseries'] = mean_timeseries
            file_handle['mean_baseline'] = mean_baseline
            file_handle['timepoints'] = timepoints

    # load timepoints
    with h5py.File(fullname_timemean + hdf, 'r') as file_handle:
        timepoints = file_handle['timepoints'][()]

    for color_i in range(p.n_colors):
        fullname_volmean = os.path.join(p.dir_output, 'volume%d' % (color_i))
        if os.path.isfile(fullname_volmean + hdf):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        dir_plot = os.path.join(p.dir_output, 'mask_plots', str(color_i))
        os.makedirs(dir_plot, exist_ok=True)

        fullname_volume = os.path.join(dir_volume, p.volume_names[0])
        lx, ly, lz = load_volume(fullname_volume + ali + hdf).T.shape

        class accum_param(pyspark.accumulators.AccumulatorParam):
            '''define accumulator class'''
            def zero(self, val0):
                return np.zeros(val0.shape, dtype='float64')

            def addInPlace(self, val1, val2):
                return np.add(val1, val2, dtype='float64')

        # geometric mean
        volume_accum = sc.accumulator(np.zeros((lx, ly, lz), dtype='float64'),
                                      accum_param())

        def add_volume(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            fullname_volume = os.path.join(dir_volume, name_volume)
            volume_accum.add(
                np.log10(load_volume(fullname_volume + ali + hdf).T))

        evenly_parallelize(p.volume_names[timepoints]).foreach(add_volume)
        volume_mean = 10**(volume_accum.value / p.lt)

        # get peaks by comparing to a median-smoothed volume
        ball_radi = ball(0.5 * p.diam_cell, p.affine_mat)[0]
        volume_peak = volume_mean > median_filter(volume_mean,
                                                  footprint=ball_radi)

        # compute power and probability
        voxel_intensity = np.percentile(volume_mean[volume_mean > 0],
                                        np.r_[5:95:0.001])[:, None]
        gmm = mixture.GaussianMixture(n_components=2, max_iter=100,
                                      n_init=100).fit(voxel_intensity)
        voxel_probability = gmm.predict_proba(voxel_intensity)
        voxel_probability = voxel_probability[:,
                                              np.
                                              argmax(voxel_intensity[np.argmax(
                                                  voxel_probability, 0)])]

        # compute intensity threshold
        if (p.thr_mask > 0) and (p.thr_mask <= 1):
            thr_probability = p.thr_mask
            ix = np.argmin(np.abs(voxel_probability - thr_probability))
            thr_intensity = voxel_intensity[ix][0]
        elif p.thr_mask > 1:
            thr_intensity = p.thr_mask
            ix = np.argmin(np.abs(voxel_intensity - thr_intensity))
            thr_probability = voxel_probability[ix]
        else:
            thr_intensity = -np.inf
            thr_probability = 0

        print('using probability threshold of %f.' % (thr_probability))
        print('using intensity threshold of %f.' % (thr_intensity))

        # get and save brain mask
        fig = plt.figure(1, (18, 6))
        plt.subplot(131),
        _ = plt.hist(voxel_intensity, 100)
        plt.plot(thr_intensity, 0, '|', color='r', markersize=200)
        plt.xlabel('voxel intensity')
        plt.title('intensity histogram with threshold (red)')

        plt.subplot(132),
        _ = plt.hist(voxel_probability, 100)
        plt.plot(thr_probability, 0, '|', color='r', markersize=200)
        plt.xlabel('voxel probability')
        plt.title('probability histogram with threshold (red)')

        plt.subplot(133),
        plt.plot(voxel_intensity, voxel_probability, linewidth=3)
        plt.plot(thr_intensity, thr_probability, 'x', color='r', markersize=10)
        plt.xlabel('voxel intensity')
        plt.ylabel('voxel probability')
        plt.title('intensity-probability plot with threshold (red)')

        plt.savefig(os.path.join(dir_plot, 'histogram.png'))
        plt.close(fig)

        # remove all disconnected components less than 5000 cubic microliters in size
        rx, ry, rz, _ = np.diag(p.affine_mat)
        volume_mask = (volume_mean > thr_intensity).astype('bool')
        thr_size = np.round(5000 * rx * ry * rz).astype(int)
        volume_mask = morphology.remove_small_objects(volume_mask, thr_size)

        # compute background fluorescence
        background = np.median(volume_mean[volume_mask == 0])

        # save brain mask figures
        for i in range(lz):
            fig = plt.figure(1, (18, 6))
            plt.subplot(131)
            plt.imshow(volume_mean[:, :, i].T,
                       vmin=voxel_intensity[0],
                       vmax=voxel_intensity[-1])
            plt.title('volume intensity (plane %d)' % (i))

            plt.subplot(132)
            plt.imshow(volume_mask[:, :, i].T)
            plt.title('volume mask (plane %d)' % (i))

            plt.subplot(133)
            img = np.stack(
                (volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :,
                                                                         i]),
                axis=2)
            img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / (
                voxel_intensity[-1] - voxel_intensity[0])
            img[:, :, 0] = np.minimum(np.maximum(img[:, :, 0], 0), 1)
            plt.imshow(np.transpose(img, [1, 0, 2]))
            plt.title('volume mask/intensity overlay (plane %d)' % (i))

            plt.savefig(os.path.join(dir_plot, 'mask_z%03d.png' % (i)))
            plt.close(fig)

        with h5py.File(fullname_volmean + hdf, 'w') as file_handle:
            file_handle['volume_mask'] = volume_mask.T
            file_handle['volume_mean'] = volume_mean.T
            file_handle['volume_peak'] = volume_peak.T
            file_handle['thr_intensity'] = thr_intensity
            file_handle['thr_probability'] = thr_probability
            file_handle['background'] = background
Example #3
0
def process_images(parameters):
    '''process original images and save into nifti format'''

    import os
    import h5py
    import nibabel
    import numpy as np
    from scipy import interpolate
    from types import SimpleNamespace
    from skimage.external import tifffile
    from voluseg._tools.nii_image import nii_image
    from voluseg._tools.evenly_parallelize import evenly_parallelize
    try:
        import PIL
        import pyklb
    except:
        pass

    p = SimpleNamespace(**parameters)

    volume_nameRDD = evenly_parallelize(p.volume_names)
    for color_i in range(p.n_colors):
        if os.path.isfile(
                os.path.join(p.dir_output, 'volume%d.hdf5' % (color_i))):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        os.makedirs(dir_volume, exist_ok=True)

        def initial_processing(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            fullname_original = os.path.join(dir_volume,
                                             name_volume + '_original.nii.gz')
            fullname_aligned = os.path.join(dir_volume,
                                            name_volume + '_aligned.nii.gz')
            fullname_aligned_hdf = fullname_aligned.replace('.nii.gz', '.hdf5')
            if os.path.isfile(fullname_original):
                try:
                    volume_original = nibabel.load(
                        fullname_original).get_data()
                    return
                except:
                    pass
            if os.path.isfile(fullname_aligned):
                try:
                    volume_aligned = nibabel.load(fullname_aligned).get_data()
                    return
                except:
                    pass
            if os.path.isfile(fullname_aligned_hdf):
                try:
                    with h5py.File(fullname_aligned_hdf) as file_handle:
                        volume_aligned = file_handle['V3D'][()].T
                        return
                except:
                    pass

            try:
                # load input images
                fullname_input = os.path.join(p.dir_input, name_volume + p.ext)
                if ('.tif' in p.ext) or ('.tiff' in p.ext):
                    try:
                        volume_input = tifffile.imread(fullname_input)
                    except:
                        img = PIL.Image.open(fullname_input)
                        volume_input = []
                        for i in range(img.n_frames):
                            img.seek(i)
                            volume_input.append(np.array(img).T)
                        volume_input = np.array(volume_input)
                elif ('.h5' in p.ext) or ('.hdf5' in p.ext):
                    with h5py.File(fullname_input, 'r') as file_handle:
                        volume_input = file_handle[list(
                            file_handle.keys())[0]][()]
                elif ('.klb' in p.ext):
                    volume_input = pyklb.readfull(fullname_input)
                    volume_input = volume_input.transpose(0, 2, 1)

                if volume_input.ndim == 2:
                    volume_input = volume_input[None, :, :]
                volume_input = volume_input.transpose(2, 1, 0)

                # get dimensions
                lx, ly, lz = volume_input.shape

                # split two-color images into two halves
                if p.n_colors == 2:
                    # ensure that two-frames have even number of y-dim voxels
                    assert (ly % 2 == 0)
                    ly /= 2
                    if color_i == 0:
                        volume_input = volume_input[:, :ly, :]
                    elif color_i == 1:
                        volume_input = volume_input[:, ly:, :]

                # downsample in the x-y if specified
                if p.ds > 1:
                    if (lx % p.ds) or (ly % p.ds):
                        lx -= (lx % p.ds)
                        ly -= (ly % p.ds)
                        volume_input = volume_input[:lx, :ly, :]

                    # make grid for computing downsampled values
                    sx_ds = np.arange(0.5, lx, p.ds)
                    sy_ds = np.arange(0.5, ly, p.ds)
                    xy_grid_ds = np.dstack(
                        np.meshgrid(sx_ds, sy_ds, indexing='ij'))

                    # get downsampled image
                    volume_input_ds = np.zeros((len(sx_ds), len(sy_ds), lz))
                    for zi in np.arange(lz):
                        interpolation_fx = interpolate.RegularGridInterpolator(
                            (np.arange(lx), np.arange(ly)),
                            volume_input[:, :, zi],
                            method='linear')
                        volume_input_ds[:, :,
                                        zi] = interpolation_fx(xy_grid_ds)

                    volume_input = volume_input_ds

                # pad planes as necessary
                if p.registration and p.planes_pad:
                    volume_input = np.lib.pad(
                        volume_input,
                        ((0, 0), (0, 0), (p.planes_pad, p.planes_pad)),
                        'constant',
                        constant_values=(np.percentile(volume_input, 1), ))

                # save image as a nifti file
                nibabel.save(
                    nii_image(volume_input.astype('float32'), p.affine_mat),
                    (fullname_original
                     if p.registration else fullname_aligned))

            except Exception as msg:
                raise Exception('image %s not processed: %s.' %
                                (name_volume, msg))

        volume_nameRDD.foreach(initial_processing)
Example #4
0
def mask_images(parameters):
    '''create intensity mask from the average registered image'''

    import os
    import h5py
    import nibabel
    import pyspark
    import numpy as np
    from sklearn import mixture
    from skimage import morphology
    from types import SimpleNamespace
    from scipy.ndimage.filters import median_filter
    from voluseg._tools.evenly_parallelize import evenly_parallelize
    from voluseg._tools.ball import ball

    # set up spark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    # set up matplotlib
    import warnings
    import matplotlib
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    p = SimpleNamespace(**parameters)

    volume_nameRDD = evenly_parallelize(p.volume_names)
    for color_i in range(p.n_colors):
        if os.path.isfile(
                os.path.join(p.dir_output, 'volume%d.hdf5' % (color_i))):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        dir_plot = os.path.join(p.dir_output, 'mask_plots', str(color_i))
        os.makedirs(dir_plot, exist_ok=True)

        def load_volume(name_volume):
            fullname_aligned = os.path.join(dir_volume,
                                            name_volume + '_aligned.nii.gz')
            fullname_aligned_hdf = fullname_aligned.replace('.nii.gz', '.hdf5')
            if os.path.isfile(fullname_aligned):
                return nibabel.load(fullname_aligned).get_data()
            elif os.path.isfile(fullname_aligned_hdf):
                with h5py.File(fullname_aligned_hdf, 'r') as file_handle:
                    return (file_handle['V3D'][()].T)
            else:
                raise Exception('%s or %s do not exist.' %
                                (fullname_aligned, fullname_aligned_hdf))

        lx, ly, lz = load_volume(p.volume_names[0]).shape

        class accum_param(pyspark.accumulators.AccumulatorParam):
            '''define accumulator class'''
            def zero(self, val0):
                return np.zeros(val0.shape, dtype='float64')

            def addInPlace(self, val1, val2):
                return np.add(val1, val2, dtype='float64')

        # geometric mean
        volume_accum = sc.accumulator(np.zeros((lx, ly, lz), dtype='float64'),
                                      accum_param())

        def add_volume(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            volume_accum.add(np.log10(load_volume(name_volume)))

        volume_nameRDD.foreach(add_volume)
        volume_mean = 10**(volume_accum.value / p.lt)

        # get peaks by comparing to a median-smoothed volume
        ball_radi = ball(0.5 * p.diam_cell, p.affine_mat)[0]
        volume_peak = volume_mean > median_filter(volume_mean,
                                                  footprint=ball_radi)

        # compute power and probability
        voxel_intensity = np.percentile(volume_mean[volume_mean > 0],
                                        np.r_[5:95:0.001])[:, None]
        gmm = mixture.GaussianMixture(n_components=2, max_iter=100,
                                      n_init=100).fit(voxel_intensity)
        voxel_probability = gmm.predict_proba(voxel_intensity)
        voxel_probability = voxel_probability[:,
                                              np.
                                              argmax(voxel_intensity[np.argmax(
                                                  voxel_probability, 0)])]

        # compute intensity threshold
        if (p.thr_mask > 0) and (p.thr_mask <= 1):
            thr_probability = p.thr_mask
            ix = np.argmin(np.abs(voxel_probability - thr_probability))
            thr_intensity = voxel_intensity[ix][0]
        elif p.thr_mask > 1:
            thr_intensity = p.thr_mask
            ix = np.argmin(np.abs(voxel_intensity - thr_intensity))
            thr_probability = voxel_probability[ix]
        else:
            thr_intensity = -np.inf
            thr_probability = 0

        print('using probability threshold of %f.' % (thr_probability))
        print('using intensity threshold of %f.' % (thr_intensity))

        # get and save brain mask
        fig = plt.figure(1, (18, 6))
        plt.subplot(131),
        _ = plt.hist(voxel_intensity, 100)
        plt.plot(thr_intensity, 0, '|', color='r', markersize=200)
        plt.xlabel('voxel intensity')
        plt.title('intensity histogram with threshold (red)')

        plt.subplot(132),
        _ = plt.hist(voxel_probability, 100)
        plt.plot(thr_probability, 0, '|', color='r', markersize=200)
        plt.xlabel('voxel probability')
        plt.title('probability histogram with threshold (red)')

        plt.subplot(133),
        plt.plot(voxel_intensity, voxel_probability, linewidth=3)
        plt.plot(thr_intensity, thr_probability, 'x', color='r', markersize=10)
        plt.xlabel('voxel intensity')
        plt.ylabel('voxel probability')
        plt.title('intensity-probability plot with threshold (red)')

        plt.savefig(os.path.join(dir_plot, 'histogram.png'))
        plt.close(fig)

        # remove all disconnected components less than 5000 cubic microliters in size
        rx, ry, rz, _ = np.diag(p.affine_mat)
        volume_mask = (volume_mean > thr_intensity).astype('bool')
        thr_size = np.round(5000 * rx * ry * rz).astype(int)
        volume_mask = morphology.remove_small_objects(volume_mask, thr_size)

        # compute background fluorescence
        background = np.median(volume_mean[volume_mask == 0])

        # compute mean timeseries
        bvolume_mask = sc.broadcast(volume_mask)

        def masked_mean(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            return np.mean(load_volume(name_volume)[bvolume_mask.value],
                           dtype='float64')

        timeseries_mean = np.array(volume_nameRDD.map(masked_mean).collect())

        # save brain mask figures
        for i in range(lz):
            fig = plt.figure(1, (18, 6))
            plt.subplot(131)
            plt.imshow(volume_mean[:, :, i].T,
                       vmin=voxel_intensity[0],
                       vmax=voxel_intensity[-1])
            plt.title('volume intensity (plane %d)' % (i))

            plt.subplot(132)
            plt.imshow(volume_mask[:, :, i].T)
            plt.title('volume mask (plane %d)' % (i))

            plt.subplot(133)
            img = np.stack(
                (volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :,
                                                                         i]),
                axis=2)
            img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / (
                voxel_intensity[-1] - voxel_intensity[0])
            img[:, :, 0] = np.minimum(np.maximum(img[:, :, 0], 0), 1)
            plt.imshow(np.transpose(img, [1, 0, 2]))
            plt.title('volume mask/intensity overlay (plane %d)' % (i))

            plt.savefig(os.path.join(dir_plot, 'mask_z%03d.png' % (i)))
            plt.close(fig)

        with h5py.File(os.path.join(p.dir_output, 'volume%s.hdf5' % (color_i)),
                       'w') as file_handle:
            file_handle['volume_mask'] = volume_mask.T
            file_handle['volume_mean'] = volume_mean.T
            file_handle['volume_peak'] = volume_peak.T
            file_handle['timeseries_mean'] = timeseries_mean
            file_handle['thr_intensity'] = thr_intensity
            file_handle['thr_probability'] = thr_probability
            file_handle['background'] = background

        # convert nifti images to hdf5 files
        def nii2hdf(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            fullname_aligned = os.path.join(dir_volume,
                                            name_volume + '_aligned.nii.gz')
            fullname_aligned_hdf = fullname_aligned.replace('.nii.gz', '.hdf5')

            if not os.path.isfile(fullname_aligned_hdf):
                with h5py.File(fullname_aligned_hdf, 'w') as file_handle:
                    volume_aligned = nibabel.load(
                        fullname_aligned).get_data().T.astype('float32')
                    file_handle.create_dataset('V3D',
                                               data=volume_aligned,
                                               compression='gzip')
                try:
                    os.remove(fullname_aligned)
                except:
                    pass

        volume_nameRDD.foreach(nii2hdf)
Example #5
0
def clean_cells(parameters):
    '''remove noise cells, detrend and detect baseline'''

    import os
    import h5py
    import shutil
    import numpy as np
    from types import SimpleNamespace
    from itertools import combinations
    from voluseg._steps.step4e import collect_blocks
    from voluseg._tools.constants import hdf, dtype
    from voluseg._tools.clean_signal import clean_signal
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    # set up spark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    p = SimpleNamespace(**parameters)

    thr_similarity = 0.5

    for color_i in range(p.n_colors):
        fullname_cells = os.path.join(p.dir_output,
                                      'cells%s_clean' % (color_i))
        if os.path.isfile(fullname_cells + hdf):
            continue

        block_id, cell_xyz, cell_weights, cell_timeseries, cell_lengths = collect_blocks(
            color_i, parameters)

        fullname_volmean = os.path.join(p.dir_output, 'volume%d' % (color_i))
        with h5py.File(fullname_volmean + hdf, 'r') as file_handle:
            volume_mask = file_handle['volume_mask'][()].T
            x, y, z = volume_mask.shape

        cell_x = cell_xyz[:, :, 0]
        cell_y = cell_xyz[:, :, 1]
        cell_z = cell_xyz[:, :, 2]
        cell_w = np.nansum(cell_weights, 1)

        ix = np.any(np.isnan(cell_timeseries), 1)
        if np.any(ix):
            print('nans (to be removed): %d' % np.count_nonzero(ix))
            cell_timeseries[ix] = 0

        cell_valids = np.zeros(len(cell_w), dtype=bool)
        for i, (li, xi, yi,
                zi) in enumerate(zip(cell_lengths, cell_x, cell_y, cell_z)):
            cell_valids[i] = np.mean(volume_mask[xi[:li], yi[:li],
                                                 zi[:li]]) > p.thr_mask

        # brain mask array
        volume_list = [[[[] for zi in range(z)] for yi in range(y)]
                       for xi in range(x)]
        volume_cell_n = np.zeros((x, y, z), dtype='int')
        for i, (li, vi) in enumerate(zip(cell_lengths, cell_valids)):
            for j in range(li if vi else 0):
                xij, yij, zij = cell_x[i, j], cell_y[i, j], cell_z[i, j]
                volume_list[xij][yij][zij].append(i)
                volume_cell_n[xij, yij, zij] += 1

        pair_cells = [
            pi for a in volume_list for b in a for c in b
            for pi in combinations(c, 2)
        ]
        assert (len(pair_cells) == np.sum(volume_cell_n * (volume_cell_n - 1) /
                                          2))

        # remove duplicate cells
        pair_id, pair_count = np.unique(pair_cells, axis=0, return_counts=True)
        for pi, fi in zip(pair_id, pair_count):
            pair_overlap = (fi / np.mean(cell_lengths[pi])) > thr_similarity
            pair_correlation = np.corrcoef(
                cell_timeseries[pi])[0, 1] > thr_similarity
            if (pair_overlap and pair_correlation):
                cell_valids[pi[np.argmin(cell_w[pi])]] = 0

        ## get valid version of cells
        block_id = block_id[cell_valids]
        cell_weights = cell_weights[cell_valids].astype(dtype)
        cell_timeseries = cell_timeseries[cell_valids].astype(dtype)
        cell_lengths = cell_lengths[cell_valids]
        cell_x = cell_x[cell_valids]
        cell_y = cell_y[cell_valids]
        cell_z = cell_z[cell_valids]
        cell_w = cell_w[cell_valids]
        ## end get valid version of cells

        bparameters = sc.broadcast(parameters)

        def get_timebase(timeseries_tuple):
            timeseries = timeseries_tuple[1]
            return clean_signal(bparameters.value, timeseries)

        if p.parallel_clean:
            print('Computing baseline in parallel mode... ', end='')
            timebase = evenly_parallelize(cell_timeseries).map(
                get_timebase).collect()
        else:
            print('Computing baseline in serial mode... ', end='')
            timeseries_tuple = zip([[]] * len(cell_timeseries),
                                   cell_timeseries)
            timebase = map(get_timebase, timeseries_tuple)
        print('done.')

        cell_timeseries1, cell_baseline1 = list(zip(*timebase))

        # convert to arrays
        cell_timeseries1 = np.array(cell_timeseries1)
        cell_baseline1 = np.array(cell_baseline1)

        # check that all series are in single precision
        assert (cell_weights.dtype == dtype)
        assert (cell_timeseries.dtype == dtype)
        assert (cell_timeseries1.dtype == dtype)
        assert (cell_baseline1.dtype == dtype)

        n = np.count_nonzero(cell_valids)
        volume_id = -1 + np.zeros((x, y, z))
        volume_weight = np.zeros((x, y, z))
        for i, li in enumerate(cell_lengths):
            for j in range(li):
                xij, yij, zij = cell_x[i, j], cell_y[i, j], cell_z[i, j]
                if cell_weights[i, j] > volume_weight[xij, yij, zij]:
                    volume_id[xij, yij, zij] = i
                    volume_weight[xij, yij, zij] = cell_weights[i, j]

        with h5py.File(fullname_volmean + hdf, 'r') as file_handle:
            background = file_handle['background'][()]

        with h5py.File(fullname_cells + hdf, 'w') as file_handle:
            file_handle['n'] = n
            file_handle['t'] = p.lt
            file_handle['x'] = x
            file_handle['y'] = y
            file_handle['z'] = z
            file_handle['cell_x'] = cell_x
            file_handle['cell_y'] = cell_y
            file_handle['cell_z'] = cell_z
            file_handle['block_id'] = block_id
            file_handle['volume_id'] = volume_id
            file_handle['volume_weight'] = volume_weight
            file_handle['cell_weights'] = cell_weights
            file_handle['cell_timeseries_raw'] = cell_timeseries
            file_handle['cell_timeseries'] = cell_timeseries1
            file_handle['cell_baseline'] = cell_baseline1
            file_handle['background'] = background

    # clean up
    completion = 1
    for color_i in range(p.n_colors):
        fullname_cells = os.path.join(p.dir_output,
                                      'cells%s_clean' % (color_i))
        if not os.path.isfile(fullname_cells + hdf):
            completion = 0

    if completion:
        try:
            shutil.rmtree(os.path.join(p.dir_output, 'volumes'))
            shutil.rmtree(os.path.join(p.dir_output, 'cells'))
        except:
            pass
Example #6
0
def align_volumes(parameters):
    '''register volumes to a single middle volume'''

    # do not run if registration is set to none
    if not parameters['registration']:
        return

    import os
    import shutil
    from types import SimpleNamespace
    from voluseg._tools.load_volume import load_volume
    from voluseg._tools.save_volume import save_volume
    from voluseg._tools.constants import ori, ali, nii, hdf
    from voluseg._tools.ants_registration import ants_registration
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    p = SimpleNamespace(**parameters)

    volume_nameRDD = evenly_parallelize(p.volume_names)
    for color_i in range(p.n_colors):
        fullname_volmean = os.path.join(p.dir_output, 'volume%d' % (color_i))
        if os.path.isfile(fullname_volmean + hdf):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        fullname_reference = os.path.join(dir_volume, 'reference')
        if load_volume(fullname_reference + nii) is None:
            fullname_median = os.path.join(dir_volume,
                                           p.volume_names[p.lt // 2])
            shutil.copyfile(fullname_median + ori + nii,
                            fullname_reference + nii)

        dir_transform = os.path.join(p.dir_output, 'transforms', str(color_i))
        os.makedirs(dir_transform, exist_ok=True)

        def register_volume(tuple_name_volume):
            os.environ['ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS'] = '1'
            name_volume = tuple_name_volume[1]
            fullname_volume = os.path.join(dir_volume, name_volume)
            # skip processing if aligned volume exists
            if load_volume(fullname_volume + ali + hdf) is not None:
                return

            # setup registration
            cmd = ants_registration(dir_ants=p.dir_ants,
                                    in_nii=fullname_volume + ori + nii,
                                    ref_nii=fullname_reference + nii,
                                    out_nii=fullname_volume + ali + nii,
                                    prefix_out_tform=os.path.join(
                                        dir_transform,
                                        name_volume + '_tform_'),
                                    typ='r')
            if p.registration == 'high':
                pass
            elif p.registration == 'medium':
                cmd = cmd.replace('[1000x500x250x125]', '[1000x500x250]')\
                         .replace('12x8x4x2', '12x8x4')\
                         .replace('4x3x2x1vox', '4x3x2vox')
            elif p.registration == 'low':
                cmd = cmd.replace('[1000x500x250x125]', '[1000x500]')\
                         .replace('12x8x4x2', '12x8')\
                         .replace('4x3x2x1vox', '4x3vox')

            # run registration
            flag = os.system(cmd)
            if flag:
                # if breaks change initialization
                flag = os.system(cmd.replace(nii + ',1]', nii + ',0]'))
            if flag and load_volume(fullname_volume + ori + nii).shape[2] == 1:
                # if breaks change dimensionality
                flag = os.system(
                    cmd.replace('--dimensionality 3', '--dimensionality 2'))
                if not flag:
                    volume = load_volume(fullname_volume + ali + nii)[:, :,
                                                                      None]
                    save_volume(fullname_volume + ali + nii, volume,
                                p.affine_mat)
            if flag:
                raise Exception('volume %s not registered: flag %d.' %
                                (name_volume, flag))

            # load aligned volume
            volume = load_volume(fullname_volume + ali + nii)

            # remove padding
            if p.planes_pad:
                volume = volume[:, :, p.planes_pad:-p.planes_pad]

            # save as hdf5
            volume = volume.T
            save_volume(fullname_volume + ali + hdf, volume)

            # remove nifti files
            if load_volume(fullname_volume + ali + hdf) is not None:
                try:
                    os.remove(fullname_volume + ori + nii)
                    os.remove(fullname_volume + ali + nii)
                except:
                    pass

        volume_nameRDD.foreach(register_volume)
Example #7
0
def detect_cells(parameters):
    '''detect cells in images'''

    import os
    import h5py
    import time
    import numpy as np
    from types import SimpleNamespace
    from voluseg._steps.step4a import define_blocks
    from voluseg._steps.step4b import process_block_data
    from voluseg._steps.step4c import initialize_block_cells
    from voluseg._steps.step4d import nnmf_sparse
    # from voluseg._steps.step4e import collect_blocks
    from voluseg._tools.evenly_parallelize import evenly_parallelize
    from voluseg._tools.clean_signal import clean_signal
    from voluseg._tools.ball import ball

    # set up spark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    p = SimpleNamespace(**parameters)

    ball_diam, ball_diam_xyz0 = ball(1.0 * p.diam_cell, p.affine_mat)

    # load plane filename
    for color_i in range(p.n_colors):
        if os.path.isfile(
                os.path.join(p.dir_output, 'cells%s_raw.hdf5' % (color_i))):
            continue

        dir_cell = os.path.join(p.dir_output, 'cells', str(color_i))
        os.makedirs(dir_cell, exist_ok=True)

        with h5py.File(os.path.join(p.dir_output, 'volume%s.hdf5' % (color_i)),
                       'r') as file_handle:
            volume_mean = file_handle['volume_mean'][()].T
            volume_mask = file_handle['volume_mask'][()].T
            volume_peak = file_handle['volume_peak'][()].T
            timeseries_mean = file_handle['timeseries_mean'][()]
            if 'n_blocks' in file_handle.keys():
                flag = 0
                n_voxels_cell = file_handle['n_voxels_cell'][()]
                n_blocks = file_handle['n_blocks'][()]
                block_valids = file_handle['block_valids'][()]
                xyz0 = file_handle['block_xyz0'][()]
                xyz1 = file_handle['block_xyz1'][()]
                timepoints = file_handle['timepoints'][()]
            else:
                flag = 1

        # broadcast image peaks (for initialization) and volume_mean (for renormalization)
        bvolume_peak = sc.broadcast(volume_peak)
        bvolume_mean = sc.broadcast(volume_mean)

        # dimensions and resolution
        lxyz = volume_mean.shape
        rxyz = np.diag(p.affine_mat)[:3]

        # compute number of blocks (do only once)
        if flag:
            lx, ly, lz = lxyz
            rx, ry, rz = rxyz

            # get number of voxels in cell
            if (lz == 1) or (rz >= p.diam_cell):
                # area of a circle
                n_voxels_cell = np.pi * ((p.diam_cell / 2.0)**2) / (rx * ry)
            else:
                # volume of a cylinder (change to sphere later)
                n_voxels_cell = p.diam_cell * np.pi * (
                    (p.diam_cell / 2.0)**2) / (rx * ry * rz)

            n_voxels_cell = np.round(n_voxels_cell).astype(int)

            # get number of voxels in each cell
            n_blocks, block_valids, xyz0, xyz1 = \
                define_blocks(lx, ly, lz, p.n_cells_block, n_voxels_cell, volume_mask)

            # get timepoints for cell detection
            if not p.nt:
                timepoints = np.range(p.lt)
            else:
                timeseries1, baseline1 = clean_signal(parameters,
                                                      timeseries_mean)
                timepoints = np.sort(
                    np.argsort(
                        (timeseries1 - baseline1) / timeseries1)[::-1][:p.nt])

            # save number and indices of blocks
            with h5py.File(
                    os.path.join(p.dir_output, 'volume%s.hdf5' % (color_i)),
                    'r+') as file_handle:
                file_handle['n_voxels_cell'] = n_voxels_cell
                file_handle['n_blocks'] = n_blocks
                file_handle['block_valids'] = block_valids
                file_handle['block_xyz0'] = xyz0
                file_handle['block_xyz1'] = xyz1
                if not 'timepoints' in file_handle:
                    file_handle['timepoints'] = timepoints

        print('number of blocks, total: %d.' % (block_valids.sum()))

        for ii in np.where(block_valids)[0]:
            try:
                with h5py.File(os.path.join(dir_cell, 'block%05d.hdf5' % (ii)),
                               'r') as file_handle:
                    if ('completion' in file_handle.keys()
                        ) and file_handle['completion'][()]:
                        block_valids[ii] = 0
            except (NameError, OSError):
                pass

        print('number of blocks, remaining: %d.' % (block_valids.sum()))
        ix = np.where(block_valids)[0]
        block_ixyz01 = list(zip(ix, xyz0[ix], xyz1[ix]))

        # detect individual cells with sparse nnmf algorithm
        def detect_cells_block(tuple_i_xyz0_xyz1):
            os.environ['MKL_NUM_THREADS'] = '1'

            ii, xyz0, xyz1 = tuple_i_xyz0_xyz1[1]

            voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak = \
                process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, \
                                   ball_diam, bvolume_mean, bvolume_peak, timepoints)

            n_voxels_block = len(voxel_xyz)  # number of voxels in block

            voxel_fraction_peak = np.argsort(
                ((voxel_timeseries[peak_idx])**2).mean(1)) / len(peak_idx)
            for fraction in np.r_[1:0:-0.05]:
                try:
                    peak_valids = (voxel_fraction_peak >=
                                   (1 - fraction))  # valid voxel indices

                    n_cells = np.round(peak_valids.sum() /
                                       (0.5 * n_voxels_cell)).astype(int)
                    print((fraction, n_cells))

                    tic = time.time()
                    voxel_timeseries_valid, voxel_xyz_valid, cell_weight_init_valid, \
                    cell_neighborhood_valid, cell_sparseness = \
                        initialize_block_cells( n_voxels_cell, n_voxels_block, n_cells, \
                        voxel_xyz, voxel_timeseries, peak_idx, peak_valids, voxel_similarity_peak, \
                        lxyz, rxyz, ball_diam, ball_diam_xyz0)
                    print('cell initialization: %.1f minutes.\n' %
                          ((time.time() - tic) / 60))

                    tic = time.time()
                    cell_weights_valid, cell_timeseries_valid, d = nnmf_sparse(
                        voxel_timeseries_valid,
                        voxel_xyz_valid,
                        cell_weight_init_valid,
                        cell_neighborhood_valid,
                        cell_sparseness,
                        timepoints=timepoints,
                        miniter=10,
                        maxiter=100,
                        tolfun=1e-3)

                    success = 1
                    print('cell factorization: %.1f minutes.\n' %
                          ((time.time() - tic) / 60))
                    break
                except ValueError as msg:
                    print('retrying factorization of block %d: %s' % (ii, msg))
                    success = 0

            # get cell positions and timeseries, and save cell data
            with h5py.File(os.path.join(dir_cell, 'block%05d.hdf5' % (ii)),
                           'w') as file_handle:
                if success:
                    for ci in range(n_cells):
                        ix = cell_weights_valid[:, ci] > 0
                        xyzi = voxel_xyz_valid[ix]
                        wi = cell_weights_valid[ix, ci]
                        bi = np.sum(wi * bvolume_mean.value[list(
                            zip(*xyzi))]) / np.sum(wi)
                        ti = bi * cell_timeseries_valid[ci] / np.mean(
                            cell_timeseries_valid[ci])

                        file_handle['/cell/%05d/xyz' % (ci)] = xyzi
                        file_handle['/cell/%05d/weights' % (ci)] = wi
                        file_handle['/cell/%05d/timeseries' % (ci)] = ti

                file_handle['n_cells'] = n_cells
                file_handle['completion'] = 1

        if block_valids.any():
            evenly_parallelize(block_ixyz01).foreach(detect_cells_block)
Example #8
0
def align_images(parameters):
    '''register images to a single middle image'''

    # do not run if registration is set to none
    if not parameters['registration']:
        return

    import os
    import h5py
    import shutil
    import nibabel
    from types import SimpleNamespace
    from voluseg._tools.nii_image import nii_image
    from voluseg._tools.ants_registration import ants_registration
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    p = SimpleNamespace(**parameters)

    volume_nameRDD = evenly_parallelize(p.volume_names)
    for color_i in range(p.n_colors):
        if os.path.isfile(
                os.path.join(p.dir_output, 'volume%d.hdf5' % (color_i))):
            continue

        dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i))
        fullname_reference = os.path.join(dir_volume,
                                          'reference_original.nii.gz')
        if not os.path.isfile(fullname_reference):
            fullname_lt_2 = os.path.join(
                dir_volume, p.volume_names[p.lt // 2] + '_original.nii.gz')
            shutil.copyfile(fullname_lt_2, fullname_reference)

        dir_transform = os.path.join(p.dir_output, 'transforms', str(color_i))
        os.makedirs(dir_transform, exist_ok=True)

        def register_volume(tuple_name_volume):
            os.environ['ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS'] = '1'
            name_volume = tuple_name_volume[1]
            fullname_original = os.path.join(dir_volume,
                                             name_volume + '_original.nii.gz')
            fullname_aligned = os.path.join(dir_volume,
                                            name_volume + '_aligned.nii.gz')
            fullname_aligned_hdf = fullname_aligned.replace('.nii.gz', '.hdf5')
            if os.path.isfile(fullname_aligned):
                try:
                    volume_aligned = nibabel.load(fullname_aligned).get_data()
                    return
                except:
                    pass
            if os.path.isfile(fullname_aligned_hdf):
                try:
                    with h5py.File(fullname_aligned_hdf) as file_handle:
                        volume_aligned = file_handle['V3D'][()].T
                        return
                except:
                    pass

            cmd = ants_registration(dir_ants=p.dir_ants,
                                    in_nii=fullname_original,
                                    ref_nii=fullname_reference,
                                    out_nii=fullname_aligned,
                                    prefix_out_tform=os.path.join(
                                        dir_transform,
                                        name_volume + '_tform_'),
                                    typ='r')
            if p.registration == 'high':
                pass
            elif p.registration == 'medium':
                cmd = cmd.replace('[1000x500x250x125]','[1000x500x250]')\
                         .replace('12x8x4x2', '12x8x4')\
                         .replace('4x3x2x1vox', '4x3x2vox')
            elif p.registration == 'low':
                cmd = cmd.replace('[1000x500x250x125]','[1000x500]')\
                         .replace('12x8x4x2', '12x8')\
                         .replace('4x3x2x1vox', '4x3vox')
            else:
                raise Exception('unknown registration type.')

            flag = os.system(cmd)
            if flag:
                flag = os.system(cmd.replace('.nii.gz,1]', '.nii.gz,0]'))
            if flag and nibabel.load(fullname_original).shape[2] == 1:
                os.system(
                    cmd.replace('--dimensionality 3', '--dimensionality 2'))
                volume_input = nibabel.load(fullname_aligned).get_data()[:, :,
                                                                         None]
                nibabel.save(nii_image(volume_input, p.affine_mat),
                             fullname_aligned)

            # remove padding
            if p.planes_pad:
                volume_aligned = nibabel.load(fullname_aligned).get_data()
                volume_aligned = volume_aligned[:, :,
                                                p.planes_pad:-p.planes_pad]
                nibabel.save(nii_image(volume_aligned, p.affine_mat),
                             fullname_aligned)

            if os.path.isfile(fullname_aligned):
                try:
                    volume_aligned = nibabel.load(fullname_aligned).get_data()
                    os.remove(fullname_original)
                except:
                    pass
            else:
                raise Exception('image %s not registered: flag %d.' %
                                (name_volume, flag))

        volume_nameRDD.foreach(register_volume)
Example #9
0
def collect_blocks(color_i, parameters):
    '''collect cells across all blocks'''

    import os
    import h5py
    import numpy as np
    from types import SimpleNamespace
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    # set up spark
    import pyspark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    p = SimpleNamespace(**parameters)

    dir_cell = os.path.join(p.dir_output, 'cells', str(color_i))

    with h5py.File(os.path.join(p.dir_output, 'volume%d.hdf5' % (color_i)),
                   'r') as file_handle:
        block_valids = file_handle['block_valids'][()]

    class accum_data(pyspark.accumulators.AccumulatorParam):
        '''define accumulator class'''
        def zero(self, val0):
            return [[]] * 3

        def addInPlace(self, val1, val2):
            return [val1[i] + val2[i] for i in range(3)]

    # cumulate collected cells
    cell_data = sc.accumulator([[]] * 3, accum_data())

    def add_data(tuple_ii):
        ii = tuple_ii[1]
        try:
            cell_xyz = []
            cell_weights = []
            cell_timeseries = []

            with h5py.File(os.path.join(dir_cell, 'block%05d.hdf5' % (ii)),
                           'r') as file_handle:
                for ci in range(file_handle['n_cells'][()]):
                    cell_xyz.append(file_handle['/cell/%05d/xyz' % (ci)][()])
                    cell_weights.append(file_handle['/cell/%05d/weights' %
                                                    (ci)][()])
                    cell_timeseries.append(
                        file_handle['/cell/%05d/timeseries' % (ci)][()])

            cell_data.add([cell_xyz, cell_weights, cell_timeseries])
        except KeyError:
            print('block %d is empty.' % ii)
        except IOError:
            print('block %d does not exist.' % ii)

    evenly_parallelize(np.argwhere(block_valids).T[0]).foreach(add_data)
    cell_xyz, cell_weights, cell_timeseries = cell_data.value

    # convert lists to arrays
    cn = len(cell_xyz)
    cell_lengths = np.array([len(i) for i in cell_weights])
    cell_xyz_array = np.full((cn, np.max(cell_lengths), 3), -1, dtype=int)
    cell_weights_array = np.full((cn, np.max(cell_lengths)), np.nan)
    for ci, li in enumerate(cell_lengths):
        cell_xyz_array[ci, :li] = cell_xyz[ci]
        cell_weights_array[ci, :li] = cell_weights[ci]
    cell_timeseries_array = np.array(cell_timeseries)

    return cell_xyz_array, cell_weights_array, cell_timeseries_array, cell_lengths
Example #10
0
def process_parameters(parameters0=None):
    '''process parameters and create parameter file'''

    import os
    import copy
    import pickle
    import numpy as np
    from voluseg._tools.load_volume import load_volume
    from voluseg._tools.plane_name import plane_name
    from voluseg._tools.parameter_dictionary import parameter_dictionary
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    parameters = copy.deepcopy(parameters0)

    ## general checks

    # check that parameter input is a dictionary
    if not type(parameters) == dict:
        print('error: specify parameter dictionary as input.')
        return

    # check if any parameters are missing
    missing_parameters = set(parameter_dictionary()) - set(parameters)
    if missing_parameters:
        print('error: missing parameters \'%s\'.' %
              ('\', \''.join(missing_parameters)))
        return

    # get input and output directories, and parameter filename
    dir_input = parameters['dir_input']
    dir_output = parameters['dir_output']
    filename_parameters = os.path.join(dir_output, 'parameters.pickle')

    # load parameters from file, if it already exists
    if os.path.isfile(filename_parameters):
        print('exiting, parameter file exists: %s.' % (filename_parameters))
        return

    ## specific checks

    # check directory names
    for i in ['dir_ants', 'dir_input', 'dir_output', 'registration']:
        pi = parameters[i]
        if not (isinstance(pi, str) and (' ' not in pi)):
            print('error: \'%s\' must be a string without spaces.' % (i))
            return

    # check booleans
    for i in ['parallel_clean', 'planes_packed']:
        pi = parameters[i]
        if not isinstance(pi, bool):
            print('error: \'%s\' must be a boolean.' % (i))
            return

    # check integers
    for i in ['ds', 'n_cells_block', 'n_colors', 'nt', 'planes_pad']:
        pi = parameters[i]
        if not (np.isscalar(pi) and (pi >= 0) and (pi == np.round(pi))):
            print('error: \'%s\' must be a nonnegative or positive integer.' %
                  (i))
            return

    # check non-negative real numbers:
    for i in [
            'diam_cell', 'f_hipass', 'f_volume', 'res_x', 'res_y', 'res_z',
            't_baseline', 't_section', 'thr_mask'
    ]:
        pi = parameters[i]
        if not (np.isscalar(pi) and (pi >= 0) and np.isreal(pi)):
            print(
                'error: \'%s\' must be a nonnegative or positive real number.'
                % (i))
            return

    # check registration
    if parameters['registration']:
        parameters['registration'] = parameters['registration'].lower()
        if parameters['registration'] == 'none':
            parameters['registration'] = None
        elif not parameters['registration'] in ['high', 'medium', 'low']:
            print(
                'error: \'registration\' must be \'high\', \'medium\', \'low\', or \'none\'.'
            )
            return

    # check plane padding
    if (not parameters['registration']) and not (
        (parameters['planes_pad'] == 0)):
        print('error: \'planes_pad\' must be 0 if \'registration\' is None.')
        return

    # get volume extension, volume names and number of segmentation timepoints
    file_names = [i.split('.', 1) for i in os.listdir(dir_input) if '.' in i]
    file_exts, counts = np.unique(list(zip(*file_names))[1],
                                  return_counts=True)
    ext = '.' + file_exts[np.argmax(counts)]
    volume_names = np.sort([i for i, j in file_names if '.' + j == ext])
    lt = len(volume_names)

    # adjust parameters for packed planes data
    if parameters['planes_packed']:
        volume_names0 = copy.deepcopy(volume_names)
        parameters['volume_names0'] = volume_names0
        parameters['res_z'] = parameters['diam_cell']

        def volume_plane_names(tuple_name_volume):
            name_volume = tuple_name_volume[1]
            fullname_volume = os.path.join(dir_input, name_volume)
            lp = len(load_volume(fullname_volume + ext))
            return [plane_name(name_volume, pi) for pi in range(lp)]

        volume_names = evenly_parallelize(volume_names0).map(
            volume_plane_names).collect()
        volume_names = np.sort([pi for ni in volume_names for pi in ni])
        lt = len(volume_names)

    # affine matrix
    affine_mat = np.diag([
        parameters['res_x'] * parameters['ds'],
        parameters['res_y'] * parameters['ds'], parameters['res_z'], 1
    ])

    # save parameters
    parameters['volume_names'] = volume_names
    parameters['ext'] = ext
    parameters['lt'] = lt
    parameters['affine_mat'] = affine_mat

    try:
        os.makedirs(dir_output, exist_ok=True)
        with open(filename_parameters, 'wb') as file_handle:
            pickle.dump(parameters, file_handle)
            print('parameter file successfully saved.')

    except Exception as msg:
        print('parameter file not saved: %s.' % (msg))
Example #11
0
def collect_blocks(color_i, parameters):
    '''collect cells across all blocks'''

    import os
    import h5py
    import numpy as np
    from types import SimpleNamespace
    from voluseg._tools.constants import hdf
    from voluseg._tools.evenly_parallelize import evenly_parallelize

    # set up spark
    import pyspark
    from pyspark.sql.session import SparkSession
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext

    p = SimpleNamespace(**parameters)

    dir_cell = os.path.join(p.dir_output, 'cells', str(color_i))

    fullname_volmean = os.path.join(p.dir_output, 'volume%d' % (color_i))
    with h5py.File(fullname_volmean + hdf, 'r') as file_handle:
        block_valids = file_handle['block_valids'][()]

    class accum_data(pyspark.accumulators.AccumulatorParam):
        '''define accumulator class'''
        def zero(self, val0):
            return [[]] * 4

        def addInPlace(self, val1, val2):
            return [val1[i] + val2[i] for i in range(4)]

    # cumulate collected cells
    if p.parallel_clean:
        cell_data = sc.accumulator([[]] * 4, accum_data())

    def add_data(tuple_ii):
        ii = tuple_ii[1]
        try:
            block_id = []
            cell_xyz = []
            cell_weights = []
            cell_timeseries = []

            fullname_block = os.path.join(dir_cell, 'block%05d' % (ii))
            with h5py.File(fullname_block + hdf, 'r') as file_handle:
                for ci in range(file_handle['n_cells'][()]):
                    block_id.append(ii)
                    cell_xyz.append(file_handle['/cell/%05d/xyz' % (ci)][()])
                    cell_weights.append(file_handle['/cell/%05d/weights' %
                                                    (ci)][()])
                    cell_timeseries.append(
                        file_handle['/cell/%05d/timeseries' % (ci)][()])

            if p.parallel_clean:
                cell_data.add(
                    [block_id, cell_xyz, cell_weights, cell_timeseries])
            else:
                return [block_id, cell_xyz, cell_weights, cell_timeseries]

        except KeyError:
            print('block %d is empty.' % ii)
        except IOError:
            print('block %d does not exist.' % ii)

    if p.parallel_clean:
        evenly_parallelize(np.argwhere(block_valids).T[0]).foreach(add_data)
        block_id, cell_xyz, cell_weights, cell_timeseries = cell_data.value
    else:
        idx_block_valids = np.argwhere(block_valids).T[0]
        valids_tuple = zip([[]] * len(idx_block_valids), idx_block_valids)
        block_id, cell_xyz, cell_weights, cell_timeseries = list(
            zip(*map(add_data, valids_tuple)))
        block_id = [ii for bi in block_id for ii in bi]
        cell_xyz = [xyzi for ci in cell_xyz for xyzi in ci]
        cell_weights = [wi for ci in cell_weights for wi in ci]
        cell_timeseries = [ti for ci in cell_timeseries for ti in ci]

    # convert lists to arrays
    cn = len(cell_xyz)
    block_id = np.array(block_id)
    cell_lengths = np.array([len(i) for i in cell_weights])
    cell_xyz_array = np.full((cn, np.max(cell_lengths), 3), -1, dtype=int)
    cell_weights_array = np.full((cn, np.max(cell_lengths)), np.nan)
    for ci, li in enumerate(cell_lengths):
        cell_xyz_array[ci, :li] = cell_xyz[ci]
        cell_weights_array[ci, :li] = cell_weights[ci]
    cell_timeseries_array = np.array(cell_timeseries)

    return block_id, cell_xyz_array, cell_weights_array, cell_timeseries_array, cell_lengths