def split_segments(seg_path, ids='/segm/labels_memb_del_relabeled_fix', outputstem=''): labels = LabelImage(seg_path) labels.load(load_data=False) labels_ds = labels.slice_dataset() # nuclei outstem = '{}.h5{}'.format(outputstem, '/nucl/dapi') maskpath_sauvola = '{}_mask_sauvola'.format(outstem) maskpath_absmin = '{}_mask_absmin'.format(outstem) mask_nucl = nucl_mask(maskpath_sauvola, maskpath_absmin) write(mask_nucl, outstem, '_mask_nuclei', labels, imtype='Mask') # membranes # TODO: may combine with planarity_mask to make it more data-informed outstem = '{}.h5{}'.format(outputstem, '/memb/boundary') mask_memb = memb_mask(labels_ds) write(mask_memb, outstem, '_mask', labels, imtype='Mask') outstem = '{}.h5{}'.format(outputstem, ids) for mask, pf in zip([mask_memb, mask_nucl], ['_memb', '_nucl']): labs = np.copy(labels_ds) labs[~mask] = 0 write(labs, outstem, pf, labels, imtype='Label')
def delete_blocks_parallel(image_in, block_idx, postfix='fix'): im = LabelImage(image_in) im.load(load_data=False) vols = { postfix: 'uint32', '{}_reseg_mask'.format(postfix): 'bool', 'block_idxs': 'uint16' } for pf, dtype in vols.items(): delete_h5_dataset(im, pf=pf)
def copy_blocks_parallel(image_in, block_idx, postfix='fix'): im = LabelImage(image_in) im.load(load_data=False) vols = { postfix: ['Label', 'uint32'], '{}_reseg_mask'.format(postfix): ['Mask', 'bool'], #'{}_peaks'.format(postfix): ['Mask', 'bool'], 'block_idxs': ['Label', 'uint16'] } #vols = {postfix: ['Mask', 'bool']} for pf, (imtype, dtype) in vols.items(): copy_h5_dataset(im, imtype, pf, dtype, k=block_idx)
def relabel_parallel(image_in, block_idx, maxlabelfile, pf='relabeled'): # print('block_idx {:03d} will start from {:10d}'.format(block_idx, maxlabel)) # print('block_idx {:03d}'.format(block_idx)) maxlabels = np.loadtxt(maxlabelfile, dtype=np.uint32) maxlabel = np.sum(maxlabels[:block_idx]) # print('maxlabel {:10d}'.format(maxlabel)) seg = LabelImage(image_in) seg.load(load_data=False) # TODO: into segmentation pipeline: but will probably fail as attr too big # seg.set_ulabels() # seg.ds.attrs.create('ulabels', seg.ulabels, dtype='uint32') relabel(seg, pf, maxlabel)
def relabel_blocks(info, ids='segm/labels_memb_del', pf='relabeled', maxlabel=1): """Relabel all blocks in the dataset sequentially.""" for k, v in info.items(): ods = '{}_{}'.format(ids, pf) print('relabeling block {:03d} (id:{}) to {}'.format( k, v['postfix'], ods)) # seg = read_image(v, ids=ids, imtype='Label') fstem = os.path.join(v['datadir'], '{}_{}'.format(v['base'], v['postfix'])) seg = LabelImage('{}.h5/{}'.format(fstem, ids)) seg.load(load_data=False) seg = relabel(seg, maxlabel=maxlabel) if seg.maxlabel != 0: maxlabel = seg.maxlabel print('maxlabel = {:08d}'.format(maxlabel)) seg.close() return maxlabel
def write_output(outpath, out, props, imtype='Label'): """Write data to an image on disk.""" props['dtype'] = out.dtype if imtype == 'Label': mo = LabelImage(outpath, **props) elif imtype == 'Mask': mo = MaskImage(outpath, **props) else: mo = Image(outpath, **props) mo.create() mo.write(out) return mo
def read_image(im_info, ids='segm/labels_memb_del', imtype='Label'): """"Read a h5 dataset as Image object.""" fname = '{}_{}'.format(im_info['base'], im_info['postfix']) fstem = os.path.join(im_info['datadir'], fname) if imtype == 'Label': im = LabelImage('{}.h5/{}'.format(fstem, ids)) elif imtype == 'Mask': im = MaskImage('{}.h5/{}'.format(fstem, ids)) else: im = Image('{}.h5/{}'.format(fstem, ids)) im.load(load_data=False) if imtype == 'Label': im.set_maxlabel() return im
def write(out, outstem, postfix, ref_im, imtype='Image'): """Write an image to disk.""" outstem = outstem or gen_outpath(ref_im, '') outpath = '{}{}'.format(outstem, postfix) props = ref_im.get_props() props['dtype'] = out.dtype if imtype == 'Label': mo = LabelImage(outpath, **props) elif imtype == 'Mask': mo = MaskImage(outpath, **props) else: mo = Image(outpath, **props) mo.create() mo.write(out) if imtype == 'Label': mo.set_maxlabel() mo.ds.attrs.create('maxlabel', mo.maxlabel, dtype='uint32') return mo
def postprocess_features( seg_paths, blocksize=[], blockmargin=[], blockrange=[], csv_dir='', csv_stem='', feat_pf='_features', segm_pfs=['full', 'memb', 'nucl'], ext='csv', min_size_nucl=50, save_border_labels=False, split_features=False, fset_morph='minimal', fset_intens='minimal', ): labels = LabelImage(seg_paths[0], permission='r') labels.load(load_data=False) comps = labels.split_path() csv_dir = csv_dir or comps['dir'] mpi = wmeMPI(usempi=False) mpi.set_blocks(labels, blocksize, blockmargin, blockrange) mpi.scatter_series() li = [] for i in mpi.series: print('processing block {:03d}'.format(i)) block = mpi.blocks[i] # read the csv's filestem = '{}_{}{}'.format(csv_stem, block['id'], feat_pf) dfs = {} for segm_pf in segm_pfs: filename = '{}_{}.{}'.format(filestem, segm_pf, ext) filepath = os.path.join(csv_dir, filename) dfs[segm_pf] = pd.read_csv(filepath, index_col='label', header=0) if len(dfs['full'].index) == 0: continue # select features # metrics=['mean', 'median', 'variance', 'min', 'max'] metrics = ['mean'] feat_names = get_feature_names(fset_morph, fset_intens, metrics) df = select_features(dfs, feat_names, min_size_nucl, split_features) #df = rename_columns(df, metrics=metrics) # label filtering: only select full segments filestem = '{}_{}'.format(csv_stem, block['id']) sl_path = os.path.join(csv_dir, '{}_smalllabels.pickle'.format(filestem)) with open(sl_path, 'rb') as f: slabels = pickle.load(f) bl_path = os.path.join(csv_dir, '{}_borderlabels.pickle'.format(filestem)) if save_border_labels: labels.slices = block['slices'] blabels = find_border_labels(labels) with open(bl_path, 'wb') as f: pickle.dump(blabels, f) else: with open(bl_path, 'rb') as f: blabels = pickle.load(f) blabels -= slabels df = df.drop(labels=blabels) li.append(df) combined_df = pd.concat(li, keys=mpi.series) combined_df.index.names = ['block', 'label'] combined_df.reset_index(inplace=True) combined_df.drop_duplicates(subset='label', inplace=True) outputpath = os.path.join(csv_dir, '{}{}.csv'.format(csv_stem, feat_pf)) combined_df.to_csv(outputpath, index=True, encoding='utf-8-sig') return combined_df
def split_nucl_and_memb(labels, outpat, nuclearmask=None): labeldata = labels.slice_dataset() labeldata, labeldata_nucl = split_nucl_and_memb_data( labeldata, nuclearmask) pf = '_memb' outpath = outpat.format(pf) im_memb = LabelImage(outpath, **props) im_memb.create() im_memb.write(labeldata) pf = '_nucl' outpath = outpat.format(pf) im_nucl = LabelImage(outpath, **props) im_nucl.create() im_nucl.write(labeldata_nucl) return im_memb, im_nucl
def export_regionprops( seg_paths, seg_names=['full', 'memb', 'nucl'], data_paths=[], data_names=[], aux_data_path=[], downsample_factors=[1, 1, 1], outputstem='', blocksize=[], blockmargin=[], blockrange=[], channels=[], filter_borderlabels=False, min_labelsize=0, split_features=False, fset_morph=['label'], fset_intens=['mean_intensity'], fset_addit=['com_z', 'com_y', 'com_x'], ): # load the segments: ['full'] or ['full', 'memb', 'nucl'] label_ims = {} pfs = seg_names[:len(seg_paths)] for pf, seg_path in zip(pfs, seg_paths): im = LabelImage(seg_path, permission='r') im.load(load_data=False) label_ims[pf] = im comps = label_ims['full'].split_path() # prepare parallel processing mpi_label = wmeMPI(usempi=False) blocksize = blocksize or label_ims['full'].dims mpi_label.set_blocks(label_ims['full'], blocksize, blockmargin, blockrange) mpi_label.scatter_series() # load the data data_ims = {} mpi_data = wmeMPI(usempi=False) for i, data_path in enumerate(data_paths): pf = 'im{:02d}'.format(i) data = Image(data_path, permission='r') data.load(load_data=False) ch_idx = data.axlab.index('c') # FIXME channels for multiple data_paths chs = channels or [ch for ch in range(data.dims[ch_idx])] names = [data_names.pop(0) for _ in range(len(chs))] data_ims[pf] = {'im': data, 'ch': chs, 'names': names} """ TODO try: mpi_data.blocks = [ {'id': split_filename(comps['file'])[0]['postfix'], 'slices': dset_name2slices(comps['file'], axlab=data.axlab, shape=data.dims), 'path': '',}, ] except: """ mpi_data.set_blocks(data, blocksize, blockmargin, blockrange) border_labelset = set([]) # if filter_borderlabels: # outstem = outputstem or label_ims['full'].split_path()['base'] # outstem += '_dataset' # border_labelset |= filter_borders(label_ims['full'], outstem) dfs = [] for i in mpi_label.series: print('processing block {:03d} with id: {}'.format( i, mpi_label.blocks[i]['id'])) dfs.append( process_block( mpi_label.blocks[i], mpi_data.blocks[i], label_ims, split_features, data_ims, min_labelsize, channels, filter_borderlabels, fset_morph, fset_intens, fset_addit, border_labelset, outputstem, aux_data_path, downsample_factors, )) return dfs
def mergeblocks( images_in, dataslices=None, blocksize=[], blockmargin=[], blockrange=[], blockoffset=[0, 0, 0], fullsize=[], is_labelimage=False, relabel=False, neighbourmerge=False, save_fwmap=False, blockreduce=[], func='np.amax', datatype='', usempi=False, outputpath='', save_steps=False, protective=False, ): """Merge blocks of data into a single hdf5 file.""" if blockrange: images_in = images_in[blockrange[0]:blockrange[1]] mpi = wmeMPI(usempi) im = Image(images_in[0], permission='r') im.load(mpi.comm, load_data=False) props = im.get_props(protective=protective, squeeze=True) ndim = im.get_ndim() props['dtype'] = datatype or props['dtype'] props['chunks'] = props['chunks'] or None # get the size of the outputfile # TODO: option to derive fullsize from dset_names? if blockreduce: datasize = np.subtract(fullsize, blockoffset) outsize = [ int(np.ceil(d / np.float(b))) for d, b in zip(datasize, blockreduce) ] props['elsize'] = [e * b for e, b in zip(im.elsize, blockreduce)] else: # FIXME: 'zyx(c)' stack assumed outsize = np.subtract(fullsize, blockoffset) if ndim == 4: outsize = list(outsize) + [im.ds.shape[3]] # TODO: flexible insert if outputpath.endswith('.ims'): mo = LabelImage(outputpath) mo.create(comm=mpi.comm) else: props['shape'] = outsize mo = LabelImage(outputpath, **props) mo.create(comm=mpi.comm) mpi.blocks = [{'path': image_in} for image_in in images_in] mpi.nblocks = len(images_in) mpi.scatter_series() # merge the datasets maxlabel = 0 for i in mpi.series: block = mpi.blocks[i] # try: maxlabel = process_block(block['path'], ndim, blockreduce, func, blockoffset, blocksize, blockmargin, fullsize, mo, is_labelimage, relabel, neighbourmerge, save_fwmap, maxlabel, mpi) print('processed block {:03d}: {}'.format(i, block['path'])) # except Exception as e: # print('failed block {:03d}: {}'.format(i, block['path'])) # print(e) im.close() mo.close() return mo
def csv_to_im( image_in, csv_path, labelkey='label', key='dapi', name='', maxlabel=0, normalize=False, scale_uint16=False, replace_nan=False, channel=-1, outpath='', ): """Write segment backprojection.""" if isinstance(image_in, Image): labels = image_in else: labels = LabelImage(image_in) labels.load(load_data=False) if not maxlabel: labels.set_maxlabel() maxlabel = labels.maxlabel if csv_path.endswith('.csv'): df = pd.read_csv(csv_path) df = df.astype({labelkey: int}) elif csv_path.endswith('.h5ad'): import scanpy as sc adata = sc.read(csv_path) if not csv_path.endswith('_nofilter.h5ad'): adata.X = adata.raw.X df = adata.obs[labelkey].astype(int) df = pd.concat([df, adata[:, key].to_df()], axis=1) # for key in keys: # TODO fw = np.zeros(maxlabel + 1, dtype='float') for index, row in df.iterrows(): fw[int(row[labelkey])] = row[key] if replace_nan: fw = np.nan_to_num(fw) if normalize: def normalize_data(data): """Normalize data between 0 and 1.""" data = data.astype('float64') datamin = np.amin(data) datamax = np.amax(data) data -= datamin data *= 1 / (datamax - datamin) return data, [datamin, datamax] fw_n, fw_minmax = normalize_data(fw) fw_n *= 65535 fw = fw_n elif scale_uint16: # for e.g. pseudotime / FA / etc / any [0, 1] vars fw *= 65535 out = labels.forward_map(list(fw)) if outpath.endswith('.ims'): mo = Image(outpath, permission='r+') mo.load(load_data=False) if channel >= 0 and channel < mo.dims[3]: ch = channel else: mo.create() ch = mo.dims[3] - 1 mo.slices[3] = slice(ch, ch + 1, 1) mo.write(out.astype(mo.dtype)) # FIXME: >65535 wraps around cpath = 'DataSetInfo/Channel {}'.format(ch) name = name or key mo.file[cpath].attrs['Name'] = np.array([c for c in name], dtype='|S1') mo.close() elif outpath.endswith('.nii.gz'): props = labels.get_props() if not labels.path.endswith('.nii.gz'): props = transpose_props(props, outlayout='xyz') out = out.transpose() mo = write_output(outpath, out, props) else: outpath = outpath or gen_outpath(labels, key) mo = write_output(outpath, out, labels.get_props()) return mo