Ejemplo n.º 1
0
def split_segments(seg_path,
                   ids='/segm/labels_memb_del_relabeled_fix',
                   outputstem=''):

    labels = LabelImage(seg_path)
    labels.load(load_data=False)
    labels_ds = labels.slice_dataset()

    # nuclei
    outstem = '{}.h5{}'.format(outputstem, '/nucl/dapi')
    maskpath_sauvola = '{}_mask_sauvola'.format(outstem)
    maskpath_absmin = '{}_mask_absmin'.format(outstem)
    mask_nucl = nucl_mask(maskpath_sauvola, maskpath_absmin)
    write(mask_nucl, outstem, '_mask_nuclei', labels, imtype='Mask')

    # membranes  # TODO: may combine with planarity_mask to make it more data-informed
    outstem = '{}.h5{}'.format(outputstem, '/memb/boundary')
    mask_memb = memb_mask(labels_ds)
    write(mask_memb, outstem, '_mask', labels, imtype='Mask')

    outstem = '{}.h5{}'.format(outputstem, ids)
    for mask, pf in zip([mask_memb, mask_nucl], ['_memb', '_nucl']):
        labs = np.copy(labels_ds)
        labs[~mask] = 0
        write(labs, outstem, pf, labels, imtype='Label')
Ejemplo n.º 2
0
def relabel_blocks(info,
                   ids='segm/labels_memb_del',
                   pf='relabeled',
                   maxlabel=1):
    """Relabel all blocks in the dataset sequentially."""

    for k, v in info.items():

        ods = '{}_{}'.format(ids, pf)
        print('relabeling block {:03d} (id:{}) to {}'.format(
            k, v['postfix'], ods))

        # seg = read_image(v, ids=ids, imtype='Label')
        fstem = os.path.join(v['datadir'],
                             '{}_{}'.format(v['base'], v['postfix']))
        seg = LabelImage('{}.h5/{}'.format(fstem, ids))
        seg.load(load_data=False)

        seg = relabel(seg, maxlabel=maxlabel)

        if seg.maxlabel != 0:
            maxlabel = seg.maxlabel

        print('maxlabel = {:08d}'.format(maxlabel))

        seg.close()

    return maxlabel
Ejemplo n.º 3
0
def split_nucl_and_memb(labels, outpat, nuclearmask=None):

    labeldata = labels.slice_dataset()

    labeldata, labeldata_nucl = split_nucl_and_memb_data(
        labeldata, nuclearmask)

    pf = '_memb'
    outpath = outpat.format(pf)
    im_memb = LabelImage(outpath, **props)
    im_memb.create()
    im_memb.write(labeldata)

    pf = '_nucl'
    outpath = outpat.format(pf)
    im_nucl = LabelImage(outpath, **props)
    im_nucl.create()
    im_nucl.write(labeldata_nucl)

    return im_memb, im_nucl
Ejemplo n.º 4
0
def delete_blocks_parallel(image_in, block_idx, postfix='fix'):

    im = LabelImage(image_in)
    im.load(load_data=False)

    vols = {
        postfix: 'uint32',
        '{}_reseg_mask'.format(postfix): 'bool',
        'block_idxs': 'uint16'
    }

    for pf, dtype in vols.items():
        delete_h5_dataset(im, pf=pf)
Ejemplo n.º 5
0
def write_output(outpath, out, props, imtype='Label'):
    """Write data to an image on disk."""

    props['dtype'] = out.dtype
    if imtype == 'Label':
        mo = LabelImage(outpath, **props)
    elif imtype == 'Mask':
        mo = MaskImage(outpath, **props)
    else:
        mo = Image(outpath, **props)
    mo.create()
    mo.write(out)

    return mo
Ejemplo n.º 6
0
def copy_blocks_parallel(image_in, block_idx, postfix='fix'):

    im = LabelImage(image_in)
    im.load(load_data=False)

    vols = {
        postfix: ['Label', 'uint32'],
        '{}_reseg_mask'.format(postfix): ['Mask', 'bool'],
        #'{}_peaks'.format(postfix): ['Mask', 'bool'],
        'block_idxs': ['Label', 'uint16']
    }

    #vols = {postfix: ['Mask', 'bool']}

    for pf, (imtype, dtype) in vols.items():
        copy_h5_dataset(im, imtype, pf, dtype, k=block_idx)
Ejemplo n.º 7
0
def relabel_parallel(image_in, block_idx, maxlabelfile, pf='relabeled'):

    # print('block_idx {:03d} will start from {:10d}'.format(block_idx, maxlabel))
    # print('block_idx {:03d}'.format(block_idx))
    maxlabels = np.loadtxt(maxlabelfile, dtype=np.uint32)
    maxlabel = np.sum(maxlabels[:block_idx])
    # print('maxlabel {:10d}'.format(maxlabel))

    seg = LabelImage(image_in)
    seg.load(load_data=False)

    # TODO: into segmentation pipeline: but will probably fail as attr too big
    # seg.set_ulabels()
    # seg.ds.attrs.create('ulabels', seg.ulabels, dtype='uint32')

    relabel(seg, pf, maxlabel)
Ejemplo n.º 8
0
def read_image(im_info, ids='segm/labels_memb_del', imtype='Label'):
    """"Read a h5 dataset as Image object."""

    fname = '{}_{}'.format(im_info['base'], im_info['postfix'])
    fstem = os.path.join(im_info['datadir'], fname)
    if imtype == 'Label':
        im = LabelImage('{}.h5/{}'.format(fstem, ids))
    elif imtype == 'Mask':
        im = MaskImage('{}.h5/{}'.format(fstem, ids))
    else:
        im = Image('{}.h5/{}'.format(fstem, ids))
    im.load(load_data=False)
    if imtype == 'Label':
        im.set_maxlabel()

    return im
Ejemplo n.º 9
0
def write(out, outstem, postfix, ref_im, imtype='Image'):
    """Write an image to disk."""

    outstem = outstem or gen_outpath(ref_im, '')
    outpath = '{}{}'.format(outstem, postfix)

    props = ref_im.get_props()
    props['dtype'] = out.dtype

    if imtype == 'Label':
        mo = LabelImage(outpath, **props)
    elif imtype == 'Mask':
        mo = MaskImage(outpath, **props)
    else:
        mo = Image(outpath, **props)

    mo.create()
    mo.write(out)

    if imtype == 'Label':
        mo.set_maxlabel()
        mo.ds.attrs.create('maxlabel', mo.maxlabel, dtype='uint32')

    return mo
Ejemplo n.º 10
0
def postprocess_features(
    seg_paths,
    blocksize=[],
    blockmargin=[],
    blockrange=[],
    csv_dir='',
    csv_stem='',
    feat_pf='_features',
    segm_pfs=['full', 'memb', 'nucl'],
    ext='csv',
    min_size_nucl=50,
    save_border_labels=False,
    split_features=False,
    fset_morph='minimal',
    fset_intens='minimal',
):

    labels = LabelImage(seg_paths[0], permission='r')
    labels.load(load_data=False)
    comps = labels.split_path()

    csv_dir = csv_dir or comps['dir']

    mpi = wmeMPI(usempi=False)
    mpi.set_blocks(labels, blocksize, blockmargin, blockrange)
    mpi.scatter_series()

    li = []
    for i in mpi.series:
        print('processing block {:03d}'.format(i))
        block = mpi.blocks[i]

        # read the csv's
        filestem = '{}_{}{}'.format(csv_stem, block['id'], feat_pf)
        dfs = {}
        for segm_pf in segm_pfs:
            filename = '{}_{}.{}'.format(filestem, segm_pf, ext)
            filepath = os.path.join(csv_dir, filename)
            dfs[segm_pf] = pd.read_csv(filepath, index_col='label', header=0)

        if len(dfs['full'].index) == 0:
            continue

        # select features
        # metrics=['mean', 'median', 'variance', 'min', 'max']
        metrics = ['mean']
        feat_names = get_feature_names(fset_morph, fset_intens, metrics)
        df = select_features(dfs, feat_names, min_size_nucl, split_features)
        #df = rename_columns(df, metrics=metrics)

        # label filtering: only select full segments
        filestem = '{}_{}'.format(csv_stem, block['id'])
        sl_path = os.path.join(csv_dir,
                               '{}_smalllabels.pickle'.format(filestem))
        with open(sl_path, 'rb') as f:
            slabels = pickle.load(f)
        bl_path = os.path.join(csv_dir,
                               '{}_borderlabels.pickle'.format(filestem))
        if save_border_labels:
            labels.slices = block['slices']
            blabels = find_border_labels(labels)
            with open(bl_path, 'wb') as f:
                pickle.dump(blabels, f)
        else:
            with open(bl_path, 'rb') as f:
                blabels = pickle.load(f)

        blabels -= slabels
        df = df.drop(labels=blabels)

        li.append(df)

    combined_df = pd.concat(li, keys=mpi.series)
    combined_df.index.names = ['block', 'label']
    combined_df.reset_index(inplace=True)

    combined_df.drop_duplicates(subset='label', inplace=True)

    outputpath = os.path.join(csv_dir, '{}{}.csv'.format(csv_stem, feat_pf))
    combined_df.to_csv(outputpath, index=True, encoding='utf-8-sig')

    return combined_df
Ejemplo n.º 11
0
def export_regionprops(
    seg_paths,
    seg_names=['full', 'memb', 'nucl'],
    data_paths=[],
    data_names=[],
    aux_data_path=[],
    downsample_factors=[1, 1, 1],
    outputstem='',
    blocksize=[],
    blockmargin=[],
    blockrange=[],
    channels=[],
    filter_borderlabels=False,
    min_labelsize=0,
    split_features=False,
    fset_morph=['label'],
    fset_intens=['mean_intensity'],
    fset_addit=['com_z', 'com_y', 'com_x'],
):

    # load the segments: ['full'] or ['full', 'memb', 'nucl']
    label_ims = {}
    pfs = seg_names[:len(seg_paths)]
    for pf, seg_path in zip(pfs, seg_paths):
        im = LabelImage(seg_path, permission='r')
        im.load(load_data=False)
        label_ims[pf] = im
    comps = label_ims['full'].split_path()

    # prepare parallel processing
    mpi_label = wmeMPI(usempi=False)
    blocksize = blocksize or label_ims['full'].dims
    mpi_label.set_blocks(label_ims['full'], blocksize, blockmargin, blockrange)
    mpi_label.scatter_series()

    # load the data
    data_ims = {}
    mpi_data = wmeMPI(usempi=False)
    for i, data_path in enumerate(data_paths):
        pf = 'im{:02d}'.format(i)
        data = Image(data_path, permission='r')
        data.load(load_data=False)
        ch_idx = data.axlab.index('c')
        # FIXME channels for multiple data_paths
        chs = channels or [ch for ch in range(data.dims[ch_idx])]
        names = [data_names.pop(0) for _ in range(len(chs))]
        data_ims[pf] = {'im': data, 'ch': chs, 'names': names}
        """ TODO
        try:
            mpi_data.blocks = [
                {'id': split_filename(comps['file'])[0]['postfix'],
                 'slices': dset_name2slices(comps['file'], axlab=data.axlab, shape=data.dims),
                 'path': '',},
                ]
        except:
        """
    mpi_data.set_blocks(data, blocksize, blockmargin, blockrange)

    border_labelset = set([])
    #    if filter_borderlabels:
    #        outstem = outputstem or label_ims['full'].split_path()['base']
    #        outstem += '_dataset'
    #        border_labelset |= filter_borders(label_ims['full'], outstem)

    dfs = []
    for i in mpi_label.series:
        print('processing block {:03d} with id: {}'.format(
            i, mpi_label.blocks[i]['id']))
        dfs.append(
            process_block(
                mpi_label.blocks[i],
                mpi_data.blocks[i],
                label_ims,
                split_features,
                data_ims,
                min_labelsize,
                channels,
                filter_borderlabels,
                fset_morph,
                fset_intens,
                fset_addit,
                border_labelset,
                outputstem,
                aux_data_path,
                downsample_factors,
            ))

    return dfs
Ejemplo n.º 12
0
def mergeblocks(
    images_in,
    dataslices=None,
    blocksize=[],
    blockmargin=[],
    blockrange=[],
    blockoffset=[0, 0, 0],
    fullsize=[],
    is_labelimage=False,
    relabel=False,
    neighbourmerge=False,
    save_fwmap=False,
    blockreduce=[],
    func='np.amax',
    datatype='',
    usempi=False,
    outputpath='',
    save_steps=False,
    protective=False,
):
    """Merge blocks of data into a single hdf5 file."""

    if blockrange:
        images_in = images_in[blockrange[0]:blockrange[1]]

    mpi = wmeMPI(usempi)

    im = Image(images_in[0], permission='r')
    im.load(mpi.comm, load_data=False)
    props = im.get_props(protective=protective, squeeze=True)
    ndim = im.get_ndim()

    props['dtype'] = datatype or props['dtype']
    props['chunks'] = props['chunks'] or None

    # get the size of the outputfile
    # TODO: option to derive fullsize from dset_names?
    if blockreduce:
        datasize = np.subtract(fullsize, blockoffset)
        outsize = [
            int(np.ceil(d / np.float(b)))
            for d, b in zip(datasize, blockreduce)
        ]
        props['elsize'] = [e * b for e, b in zip(im.elsize, blockreduce)]
    else:  # FIXME: 'zyx(c)' stack assumed
        outsize = np.subtract(fullsize, blockoffset)

    if ndim == 4:
        outsize = list(outsize) + [im.ds.shape[3]]  # TODO: flexible insert

    if outputpath.endswith('.ims'):
        mo = LabelImage(outputpath)
        mo.create(comm=mpi.comm)
    else:
        props['shape'] = outsize
        mo = LabelImage(outputpath, **props)
        mo.create(comm=mpi.comm)

    mpi.blocks = [{'path': image_in} for image_in in images_in]
    mpi.nblocks = len(images_in)
    mpi.scatter_series()

    # merge the datasets
    maxlabel = 0
    for i in mpi.series:

        block = mpi.blocks[i]
        # try:
        maxlabel = process_block(block['path'], ndim, blockreduce, func,
                                 blockoffset, blocksize, blockmargin, fullsize,
                                 mo, is_labelimage, relabel, neighbourmerge,
                                 save_fwmap, maxlabel, mpi)
        print('processed block {:03d}: {}'.format(i, block['path']))
        # except Exception as e:
        #     print('failed block {:03d}: {}'.format(i, block['path']))
        #     print(e)

    im.close()
    mo.close()

    return mo
Ejemplo n.º 13
0
def csv_to_im(
    image_in,
    csv_path,
    labelkey='label',
    key='dapi',
    name='',
    maxlabel=0,
    normalize=False,
    scale_uint16=False,
    replace_nan=False,
    channel=-1,
    outpath='',
):
    """Write segment backprojection."""

    if isinstance(image_in, Image):
        labels = image_in
    else:
        labels = LabelImage(image_in)
        labels.load(load_data=False)

    if not maxlabel:
        labels.set_maxlabel()
        maxlabel = labels.maxlabel

    if csv_path.endswith('.csv'):
        df = pd.read_csv(csv_path)
        df = df.astype({labelkey: int})
    elif csv_path.endswith('.h5ad'):
        import scanpy as sc
        adata = sc.read(csv_path)
        if not csv_path.endswith('_nofilter.h5ad'):
            adata.X = adata.raw.X
        df = adata.obs[labelkey].astype(int)
        df = pd.concat([df, adata[:, key].to_df()], axis=1)

    # for key in keys:  # TODO
    fw = np.zeros(maxlabel + 1, dtype='float')
    for index, row in df.iterrows():
        fw[int(row[labelkey])] = row[key]

    if replace_nan:
        fw = np.nan_to_num(fw)
    if normalize:

        def normalize_data(data):
            """Normalize data between 0 and 1."""
            data = data.astype('float64')
            datamin = np.amin(data)
            datamax = np.amax(data)
            data -= datamin
            data *= 1 / (datamax - datamin)
            return data, [datamin, datamax]

        fw_n, fw_minmax = normalize_data(fw)
        fw_n *= 65535
        fw = fw_n
    elif scale_uint16:  # for e.g. pseudotime / FA / etc / any [0, 1] vars
        fw *= 65535

    out = labels.forward_map(list(fw))

    if outpath.endswith('.ims'):
        mo = Image(outpath, permission='r+')
        mo.load(load_data=False)
        if channel >= 0 and channel < mo.dims[3]:
            ch = channel
        else:
            mo.create()
            ch = mo.dims[3] - 1
        mo.slices[3] = slice(ch, ch + 1, 1)
        mo.write(out.astype(mo.dtype))  # FIXME: >65535 wraps around
        cpath = 'DataSetInfo/Channel {}'.format(ch)
        name = name or key
        mo.file[cpath].attrs['Name'] = np.array([c for c in name], dtype='|S1')
        mo.close()
    elif outpath.endswith('.nii.gz'):
        props = labels.get_props()
        if not labels.path.endswith('.nii.gz'):
            props = transpose_props(props, outlayout='xyz')
            out = out.transpose()
        mo = write_output(outpath, out, props)
    else:
        outpath = outpath or gen_outpath(labels, key)
        mo = write_output(outpath, out, labels.get_props())

    return mo