def number_of_voxels(): p = '../data/rawdata/sbem-6dpf-1-whole-raw.n5' is_h5 = is_h5_file(p) key = get_key(is_h5, setup_id=0, time_point=0, scale=0) with h5py.File(p, 'r') as f: ds = f[key] shape = ds.shape n_vox = np.prod(list(shape)) print("Number of voxel:") print(n_vox) print("corresponds to") print(float(n_vox) / 1e12, "TVoxel")
def append_nephridia_table(): table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv' table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'].values cell_ids = np.unique(cell_ids) if cell_ids[0] == 0: cell_ids = cell_ids[1:] out_table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cells/regions.csv' seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5' nep_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.n5' table = pd.read_csv(out_table_path, sep='\t') new_col = np.zeros(len(table), dtype='float32') print("Loading volumes ...") scale = 4 is_h5 = is_h5_file(seg_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(seg_path, 'r') as f: seg = f[key][:] scale = 0 is_h5 = is_h5_file(nep_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(nep_path, 'r') as f: nep = f[key][:] assert nep.shape == seg.shape print("Iterating over cells ...") for cid in cell_ids: nid = np.unique(nep[seg == cid]) if 0 in nid: nid = nid[1:] assert len(nid) == 1 new_col[cid] = nid table['nephridia'] = new_col table.to_csv(out_table_path, sep='\t', index=False)
def make_nephridia_segmentation(): table_path = '../../data/0.6.5/tables/sbem-6dpf-1-whole-segmented-cilia/cell_mapping.csv' seg_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-cells.n5' out_path = '../../data/0.6.5/segmentations/sbem-6dpf-1-whole-segmented-nephridia.xml' table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'].values cell_ids = np.unique(cell_ids) if cell_ids[0] == 0: cell_ids = cell_ids[1:] print(cell_ids) scale = 4 is_h5 = is_h5_file(seg_path) key = get_key(is_h5, setup_id=0, time_point=0, scale=scale) with open_file(seg_path, 'r') as f: ds = f[key] seg = ds[:].astype('uint32') bshape = (32, 256, 256) tmp = np.zeros_like(seg) print("Isin ...") tmp = elf.parallel.isin(seg, cell_ids, out=tmp, n_threads=16, verbose=True, block_shape=bshape) print("Label ...") tmp = vigra.analysis.labelVolumeWithBackground(tmp) print("Size filter ...") ids, counts = elf.parallel.unique(tmp, return_counts=True, n_threads=16, verbose=True, block_shape=bshape) keep_ids = np.argsort(counts)[::-1] keep_ids = ids[keep_ids[:3]] assert keep_ids[0] == 0 out = np.zeros(tmp.shape, dtype='uint8') for new_id, keep_id in enumerate(keep_ids[1:], 1): out[tmp == keep_id] = new_id factors = 3 * [[2, 2, 2]] res = [.4, .32, .32] make_bdv(out, out_path, factors, resolution=res, unit='micrometer')
def write_h5_files(table, folder, raw_seg_path): """ Writes individual h5 file for each row in the table, equal to the bounding box of that object + a 10 pixel border on all dimensions Args: table [pd.Dataframe] - table of nucleus statistics folder [str] - a temporary folder to write files to raw_seg_path [str] - path to the raw segmentation .h5 """ for row in table.itertuples(index=False): # min max coordinates in microns for segmentation minmax_seg = [ row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x, row.bb_max_y, row.bb_max_z ] # raw scale (from xml) for 2x downsampled raw_scale = [0.02, 0.02, 0.025] # slice for raw file raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=True) is_h5 = is_h5_file(raw_seg_path) raw_key = get_key(is_h5, setup=0, time_point=0, scale=1) with open_file(raw_seg_path, 'r') as f: # get 2x downsampled nuclei data = f[raw_key] img_array = data[raw_slice] # write h5 file for nucleus result_path = folder + os.sep + str(row.label_id) + '.h5' with open_file(result_path, 'a') as f: # check dataset is bigger than 64x64x64 if img_array.shape[0] >= 64 and img_array.shape[ 1] >= 64 and img_array.shape[2] >= 64: chunks = (64, 64, 64) else: chunks = img_array.shape f.create_dataset('dataset', chunks=chunks, compression='gzip', shape=img_array.shape, dtype=img_array.dtype) f['dataset'][:] = img_array
def animal(): p = '../data/rawdata/sbem-6dpf-1-whole-mask-inside.n5' is_h5 = is_h5_file(p) key = get_key(is_h5, setup_id=0, time_point=0, scale=0) with h5py.File(p, 'r') as f: mask = f[key][:] bb = np.where(mask > 0) mins = [b.min() for b in bb] maxs = [b.max() for b in bb] size = [ma - mi for mi, ma in zip(mins, maxs)] print("Animal size in pixel:") print(size) res = [.4, .32, .32] size = [si * re for si, re in zip(size, res)] print("Animal size in micron:") print(size)
def process_ilastik_output(table, ilastik_file, nucleus_seg_path, final_output): """ Processes output h5 files form ilastik, doing an opening / closing to clean up the segmentation, change label ids so (euchromatin == nucleus_id & heterochromatin == 12000 + nucleus_id) and use nucleus segmentation as a mask to set background to 0. Then writes to the main results file / deletes ilastik file. Args: table [pd.Dataframe] - table of nucleus statistics ilastik_file [str] - path to ilastik output file .h5 nucleus_seg_path [str] - path to nuclear segmentation final_output [str] - path to the main output file .h5 """ # Get label id of nucleus from file name label_id = get_label_id_from_file(ilastik_file) # select correct row of table select = table['label_id'] == label_id # minmax of bounding box for that nucleus minmax_seg = [ table.loc[select, 'bb_min_x'], table.loc[select, 'bb_min_y'], table.loc[select, 'bb_min_z'], table.loc[select, 'bb_max_x'], table.loc[select, 'bb_max_y'], table.loc[select, 'bb_max_z'] ] minmax_seg = [x.iloc[0] for x in minmax_seg] # read out ilastik result print('Processing Ilastik result...' + str(label_id)) with open_file(ilastik_file, 'r') as f: dataset = f['exported_data'] data = dataset[:] # reads in as zyxc, drop the c channel data = data[:, :, :, 0] # Convert from 1/2 label to 0/1 - now heterochromatin is 0 and euchromatin is 1 data[data == 1] = 0 data[data == 2] = 1 # Then do opening / closing data = skimage.morphology.binary_opening(data) data = skimage.morphology.binary_closing(data) # remove the extra 10 pixels border around the nucleus data = data[10:data.shape[0] - 10, 10:data.shape[1] - 10, 10:data.shape[2] - 10] data = data.astype('uint16') # change to implicit mapping to nuclei # heterochromatin = nucleus id # euchromatin = 12000 + nucleus id data[data == 1] = label_id data[data == 0] = 12000 + label_id # segmentation file scale seg_scale = [0.08, 0.08, 0.1] # slice for segmentation file seg_slice = calculate_slice(seg_scale, minmax_seg, False) is_h5 = is_h5_file(nucleus_seg_path) nuc_key = get_key(is_h5, setup=0, time_point=0, scale=0) # open the nuclear segmentation for correct nucleus with open_file(nucleus_seg_path, 'r') as f: # get full-res dataset dataset = f[nuc_key] img_array = dataset[seg_slice] # binarise so 1 in the relevant nucleus, 0 outside img_array[img_array != label_id] = 0 img_array[img_array == label_id] = 1 # use the vigra resize here, seems much more memory efficient img_array = img_array.astype('float32') img_array = vigra.sampling.resize(img_array, shape=data.shape, order=0) img_array = img_array.astype('uint8') # set pixels outside the nucleus segmentation to 0 data[img_array == 0] = 0 img_array = None # raw scale (from xml) for 2x downsampled raw_scale = [0.02, 0.02, 0.025] # slice for raw file raw_slice = calculate_slice(raw_scale, minmax_seg, addBorder=False) # write to the main h5 file with open_file(final_output, 'r+') as f: result = f['dataset'] # read in part covered by the nuclear bounding box result_data = result[raw_slice] # Set the part covered by the nuclear segmentation to the new values result_data[data != 0] = data[data != 0] # write it back result[raw_slice] = result_data # remove temporary segmentation file once write is successful os.remove(ilastik_file)