def main(args): total_start_time = time.time() print('\n>> CLI Parameters ...\n') print(args) check_args(args) feature_file_format = os.path.splitext(args.outputNucleiFeatureFile)[1] if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False # # Initiate Dask client # print('\n>> Creating Dask client ...\n') start_time = time.time() c = cli_utils.create_dask_client(args) print(c) dask_setup_time = time.time() - start_time print('Dask setup time = {} seconds'.format(dask_setup_time)) # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time print('low-res foreground mask computation time = {}'.format( cli_utils.disp_time_hms(fgnd_time))) # # Compute foreground fraction of tiles in parallel using Dask # tile_fgnd_frac_list = [1.0] it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs) else: tile_fgnd_frac_list = [1.0] * num_tiles num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {0:d} ((1:2f)%%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # # Compute reinhard stats for color normalization # src_mu_lab = None src_sigma_lab = None if is_wsi and process_whole_image: print('\n>> Computing reinhard color normalization stats ...\n') start_time = time.time() src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( args.inputImageFile, 0.01, magnification=args.analysis_mag) rstats_time = time.time() - start_time print('Reinhard stats computation time = {}'.format( cli_utils.disp_time_hms(rstats_time))) # # Detect and compute nuclei features in parallel using Dask # print('\n>> Detecting nuclei and computing features ...\n') start_time = time.time() tile_result_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect nuclei cur_result = dask.delayed(compute_tile_nuclei_features)( args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab, src_sigma_lab) # append result to list tile_result_list.append(cur_result) tile_result_list = dask.delayed(tile_result_list).compute() nuclei_annot_list = [ annot for annot_list, fdata in tile_result_list for annot in annot_list ] nuclei_fdata = pd.concat([fdata for annot_list, fdata in tile_result_list], ignore_index=True) nuclei_detection_time = time.time() - start_time print('Number of nuclei = {}'.format(len(nuclei_annot_list))) print('Nuclei detection time = {}'.format( cli_utils.disp_time_hms(nuclei_detection_time))) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = { "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format, "elements": nuclei_annot_list } with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) # # Create CSV Feature file # print('>> Writing CSV feature file') if feature_file_format == '.csv': nuclei_fdata.to_csv(args.outputNucleiFeatureFile, index=False) elif feature_file_format == '.h5': nuclei_fdata.to_hdf(args.outputNucleiFeatureFile, 'Features', format='table', mode='w') else: raise ValueError( 'Extension of output feature file must be .csv or .h5') total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def test_create_tile_nuclei_annotations(self): wsi_path = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs.sha512' # noqa )) # define parameters args = { 'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433], 'reference_std_lab': [0.57506023, 0.10403329, 0.01364062], 'stain_1': 'hematoxylin', 'stain_2': 'eosin', 'stain_3': 'null', 'stain_1_vector': [-1, -1, -1], 'stain_2_vector': [-1, -1, -1], 'stain_3_vector': [-1, -1, -1], 'min_fgnd_frac': 0.50, 'analysis_mag': 20, 'analysis_tile_size': 1200, 'foreground_threshold': 60, 'min_radius': 6, 'max_radius': 12, 'min_nucleus_area': 25, 'local_max_search_radius': 8, # In Python 3 unittesting, the scheduler fails if it uses processes 'scheduler': 'multithreading', # None, 'num_workers': -1, 'num_threads_per_worker': 1, } args = collections.namedtuple('Parameters', args.keys())(**args) # read WSI ts = large_image.getTileSource(wsi_path) ts_metadata = ts.getMetadata() analysis_tile_size = { 'width': int(ts_metadata['tileWidth'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileWidth'])), 'height': int(ts_metadata['tileHeight'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileHeight'])) } # define ROI roi = { 'left': ts_metadata['sizeX'] / 2, 'top': ts_metadata['sizeY'] * 3 / 4, 'width': analysis_tile_size['width'], 'height': analysis_tile_size['height'], 'units': 'base_pixels' } # define tile iterator parameters it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, 'region': roi } # create dask client cli_utils.create_dask_client(args) # get tile foregreoung at low res im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) # compute tile foreground fraction tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( wsi_path, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) np.testing.assert_equal(num_fgnd_tiles, 2) # create nuclei annotations nuclei_bbox_annot_list = [] nuclei_bndry_annot_list = [] for tile_info in ts.tileIterator( format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs): im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab) # perform color deconvolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi( im_nuclei_stain, im_nuclei_stain < args.foreground_threshold, args.min_radius, args.max_radius, args.min_nucleus_area, args.local_max_search_radius) # generate nuclei annotations as bboxes cur_bbox_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'bbox') nuclei_bbox_annot_list.extend(cur_bbox_annot_list) # generate nuclei annotations as boundaries cur_bndry_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'boundary') nuclei_bndry_annot_list.extend(cur_bndry_annot_list) # compare nuclei bbox annotations with gtruth nuclei_bbox_annot_gtruth_file = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot.sha512' # noqa )) with open(nuclei_bbox_annot_gtruth_file, 'r') as fbbox_annot: nuclei_bbox_annot_list_gtruth = json.load(fbbox_annot)['elements'] # Check that nuclei_bbox_annot_list is nearly equal to # nuclei_bbox_annot_list_gtruth assert len(nuclei_bbox_annot_list) == len( nuclei_bbox_annot_list_gtruth) for pos in range(len(nuclei_bbox_annot_list)): np.testing.assert_array_almost_equal( nuclei_bbox_annot_list[pos]['center'], nuclei_bbox_annot_list_gtruth[pos]['center'], 0) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['width'], nuclei_bbox_annot_list_gtruth[pos]['width'], 1) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['height'], nuclei_bbox_annot_list_gtruth[pos]['height'], 1) # compare nuclei boundary annotations with gtruth nuclei_bndry_annot_gtruth_file = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot.sha512' # noqa )) with open(nuclei_bndry_annot_gtruth_file, 'r') as fbndry_annot: nuclei_bndry_annot_list_gtruth = json.load( fbndry_annot)['elements'] assert len(nuclei_bndry_annot_list) == len( nuclei_bndry_annot_list_gtruth) for pos in range(len(nuclei_bndry_annot_list)): np.testing.assert_array_almost_equal( nuclei_bndry_annot_list[pos]['points'], nuclei_bndry_annot_list_gtruth[pos]['points'], 0)
def main(args): total_start_time = time.time() print('\n>> CLI Parameters ...\n') print(args) if not os.path.isfile(args.inputImageFile): raise IOError('Input image file does not exist.') if len(args.analysis_roi) != 4: raise ValueError('Analysis ROI must be a vector of 4 elements.') if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False start_time = time.time() # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time print('low-res foreground mask computation time = {}'.format( cli_utils.disp_time_hms(fgnd_time))) # # Compute foreground fraction of tiles in parallel # tile_fgnd_frac_list = [1.0] it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs) else: tile_fgnd_frac_list = np.full(num_tiles, 1.0) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {0:d} ({1:2f}%%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # # Detect cell in parallel # print('\n>> Detecting cell ...\n') start_time = time.time() tile_cell_list = [] csv_dict = {} csv_dict['Image Loading'] = [] csv_dict['Cell Detection'] = [] csv_dict['Cell Cropping'] = [] csv_dict['Cell Classification'] = [] csv_dict['Annotation Writing'] = [] csv_dict['Number of Cells'] = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue cur_cell_list, csv_dict = detect_tile_cell(args.inputImageFile, tile_position, csv_dict, args, it_kwargs) # append result to list tile_cell_list.append(cur_cell_list) df = pd.DataFrame(csv_dict, columns=[ 'Number of Cells', 'Image Loading', 'Cell Detection', 'Cell Cropping', 'Cell Classification', 'Annotation Writing' ]) df.to_csv('%s.csv' % args.outputCellAnnotationFile[:-5]) cell_list = list(itertools.chain.from_iterable(tile_cell_list)) cell_detection_time = time.time() - start_time print('Number of cells = {}'.format(len(cell_list))) print('Cell detection time = {}'.format( cli_utils.disp_time_hms(cell_detection_time))) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputCellAnnotationFile))[0] annotation = { "name": annot_fname + '-cell-' + args.cell_annotation_format, "elements": cell_list } with open(args.outputCellAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def main(args): total_start_time = time.time() print('\n>> CLI Parameters ...\n') print(args) if not os.path.isfile(args.inputImageFile): raise IOError('Input image file does not exist.') if len(args.reference_mu_lab) != 3: raise ValueError('Reference Mean LAB should be a 3 element vector.') if len(args.reference_std_lab) != 3: raise ValueError('Reference Stddev LAB should be a 3 element vector.') if len(args.analysis_roi) != 4: raise ValueError('Analysis ROI must be a vector of 4 elements.') if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False # # Initiate Dask client # print('\n>> Creating Dask client ...\n') start_time = time.time() c = cli_utils.create_dask_client(args) print(c) dask_setup_time = time.time() - start_time print('Dask setup time = {}'.format( cli_utils.disp_time_hms(dask_setup_time))) # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time print('low-res foreground mask computation time = {}'.format( cli_utils.disp_time_hms(fgnd_time))) # # Compute foreground fraction of tiles in parallel using Dask # tile_fgnd_frac_list = [1.0] it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs) else: tile_fgnd_frac_list = np.full(num_tiles, 1.0) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {0:d} ({1:2f}%%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # # Compute reinhard stats for color normalization # src_mu_lab = None src_sigma_lab = None if is_wsi and process_whole_image: print('\n>> Computing reinhard color normalization stats ...\n') start_time = time.time() src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( args.inputImageFile, 0.01, magnification=args.analysis_mag) rstats_time = time.time() - start_time print('Reinhard stats computation time = {}'.format( cli_utils.disp_time_hms(rstats_time))) # # Detect nuclei in parallel using Dask # print('\n>> Detecting nuclei ...\n') start_time = time.time() tile_nuclei_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect nuclei cur_nuclei_list = dask.delayed(detect_tile_nuclei)(args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab, src_sigma_lab) # append result to list tile_nuclei_list.append(cur_nuclei_list) tile_nuclei_list = dask.delayed(tile_nuclei_list).compute() nuclei_list = list(itertools.chain.from_iterable(tile_nuclei_list)) nuclei_detection_time = time.time() - start_time print('Number of nuclei = {}'.format(len(nuclei_list))) print('Nuclei detection time = {}'.format( cli_utils.disp_time_hms(nuclei_detection_time))) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = { "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format, "elements": nuclei_list } with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def main(args): # noqa: C901 # initiate dask client # c = cli_utils.create_dask_client(args) c = dask.distributed.Client('127.0.0.1:8786') # read input slide ts = large_image.getTileSource(args.inputSlidePath) # compute colorspace statistics (mean, variance) for whole slide wsi_mean, wsi_stddev = htk_cnorm.reinhard_stats(args.inputSlidePath, args.sample_fraction, args.analysis_mag) # compute tissue/foreground mask at low-res for whole slide images im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) # compute foreground fraction of tiles in parallel using Dask it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputSlidePath, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs) # # Now, we detect superpixel data in parallel using Dask # print('\n>> Detecting superpixel data ...\n') tile_result_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect superpixel data cur_result = dask.delayed(compute_superpixel_data)(args.inputSlidePath, tile_position, wsi_mean, wsi_stddev, args, **it_kwargs) # append result to list tile_result_list.append(cur_result) tile_result_list = dask.delayed(tile_result_list).compute() # initiate output data list superpixel_data = [] x_centroids = [] y_centroids = [] for s_data, x_cent, y_cent in tile_result_list: for s_d in s_data: superpixel_data.append(s_d) for x_c in x_cent: x_centroids.append(x_c) for y_c in y_cent: y_centroids.append(y_c) superpixel_data = np.asarray(superpixel_data, dtype=np.float32) n_superpixels = len(superpixel_data) x_centroids = np.asarray(x_centroids).reshape((n_superpixels, 1)) y_centroids = np.asarray(y_centroids).reshape((n_superpixels, 1)) # # Last, we can store the data # print('>> Writing superpixel data information') output = h5py.File(args.outputSuperpixelFeatureFile, 'w') output.create_dataset('features', data=superpixel_data) output.create_dataset('x_centroid', data=x_centroids) output.create_dataset('y_centroid', data=y_centroids) output.close()
def main(args): total_time_profiler = {} total_start_time = time.time() # ========================================================================= # ======================= Create Dask Client ============================== # ========================================================================= print('\n>> Creating Dask client ...\n') start_time = time.time() c = cli_utils.create_dask_client(args) print(c) dask_setup_time = time.time() - start_time temp_time = cli_utils.disp_time_hms(dask_setup_time) print('Dask setup time = {}'.format( temp_time)) total_time_profiler['Dask setup time'] = temp_time # ========================================================================= # ========================= Read Input Image ============================== # ========================================================================= print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False is_wsi = ts_metadata['magnification'] is not None # ========================================================================= # ===================== Compute Foreground Mask =========================== # ========================================================================= if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time tmp_time = cli_utils.disp_time_hms(fgnd_time) print('low-res foreground mask computation time = {}'.format(tmp_time)) total_time_profiler[ 'low-res foreground mask computation time'] = tmp_time # ========================================================================= # ================== Compute foreground fraction ========================== # ========================================================================= it_kwargs = { 'tile_size': {'width': args.analysis_tile_size}, 'scale': {'magnification': args.analysis_mag}, 'resample': True } tile_fgnd_frac_list = [1.0] if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } # ========================================================================= if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs ) else: tile_fgnd_frac_list = np.full(num_tiles, 1.0) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {:d} ({:2f}%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # ========================================================================= # ========================= Compute reinhard stats ======================== # ========================================================================= src_mu_lab = None src_sigma_lab = None print('\n>> Computing reinhard color normalization stats ...\n') start_time = time.time() # src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( # args.inputImageFile, 0.01, magnification=args.analysis_mag, # tissue_seg_mag=0.625) src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( args.inputImageFile, 0.01, magnification=args.analysis_mag) print('Reinahrd stats') print(src_mu_lab, src_sigma_lab) rstats_time = time.time() - start_time print('Reinhard stats computation time = {}'.format( cli_utils.disp_time_hms(rstats_time))) # ========================================================================= # ======================== Detect Nuclie in Parallel - Dask ============== # ========================================================================= print('\n>> Detecting cell ...\n') start_time = time.time() prep_time_profiler = [] color_deconv_time_profiler = [] total_loading_time_profiler = [] ckpt_loading_time_profiler = [] model_inference_time_profiler = [] detection_time_profiler = [] tile_shapes = [] tile_nuclei_list = [] num_nuclie = [] annotation_dict = [] analysis_dict = [] annotation_dict_list = [] nuclei_annot_list = [] try: for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue if is_wsi and process_whole_image and (tile['width'] != args.analysis_tile_size or tile['height'] != args.analysis_tile_size): continue tmp_csv = dask.delayed(detect_tile_nuclei)( args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab, src_sigma_lab ) prep_time_profiler.append(tmp_csv['PreparationTime']) color_deconv_time_profiler.append(tmp_csv['ColorDeconvTime']) total_loading_time_profiler.append(tmp_csv['TotalTileLoadingTime']) ckpt_loading_time_profiler.append(tmp_csv['CKPTLoadingTime']) model_inference_time_profiler.append(tmp_csv['ModelInfernceTime']) detection_time_profiler.append(tmp_csv['DetectionTime']) tile_shapes.append(tmp_csv['ROIShape']) tile_nuclei_list.append(tmp_csv['ObjectsDict']) num_nuclie.append(tmp_csv['NumObjects']) annotation_dict.append(tmp_csv['AnnotationDict']) analysis_dict.append(tmp_csv['AnalysisDict']) prep_time_profiler,\ color_deconv_time_profiler,\ total_loading_time_profiler,\ ckpt_loading_time_profiler,\ model_inference_time_profiler,\ detection_time_profiler,\ tile_shapes,\ tile_nuclei_list,\ num_nuclie,\ annotation_dict,\ analysis_dict = dask.compute(prep_time_profiler, color_deconv_time_profiler, total_loading_time_profiler, ckpt_loading_time_profiler, model_inference_time_profiler, detection_time_profiler, tile_shapes, tile_nuclei_list, num_nuclie, annotation_dict, analysis_dict ) nuclei_annot_list = list( itertools.chain.from_iterable(list(tile_nuclei_list))) num_nuclei = len(nuclei_annot_list) nuclei_detection_time = time.time() - start_time print('Number of nuclei = {}'.format(num_nuclei)) print('Nuclei detection time = {}'.format( cli_utils.disp_time_hms(nuclei_detection_time))) annotation_dict_list = list( itertools.chain.from_iterable(list(annotation_dict))) finally: agg_csv = {} agg_csv['PreparationTime'] = prep_time_profiler agg_csv['ColorDeconvTime'] = color_deconv_time_profiler agg_csv['TotalTileLoadingTime'] = total_loading_time_profiler agg_csv['CKPTLoadingTime'] = ckpt_loading_time_profiler agg_csv['ModelInfernceTime'] = model_inference_time_profiler agg_csv['DetectionTime'] = detection_time_profiler agg_csv['ROIShape'] = tile_shapes agg_csv['ObjectsDict'] = tile_nuclei_list agg_csv['NumObjects'] = num_nuclie df = pd.DataFrame(agg_csv, columns=['PreparationTime', 'ColorDeconvTime', 'TotalTileLoadingTime', 'CKPTLoadingTime', 'ModelInfernceTime', 'DetectionTime', 'ROIShape', 'NumObjects'] ) df.to_csv(args.outputNucleiDetectionTimeProfilingFile) # ==================================================================================== # ======================= Actual Annotation Writing ====================== # ==================================================================================== print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = { "name": annot_fname + '-cell-' + args.nuclei_annotation_format, "elements": annotation_dict_list } with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def main(args): # noqa: C901 # inputSlidePath = 'test2_superfixel.svs' # outputSuperpixelFeatureFile= #scheduler = dd.LocalCluster(scheduler_port=2222) c = create_dask_client() print('\n>> Creating Dask client and printing its values...\n') #print c ts = large_image.getTileSource(args.inputSlidePath) sample_fraction = 0.1 analysis_mag = 10 #ts = large_image.getTileSource(slidePath) # compute colorspace statistics (mean, variance) for whole slide wsi_mean, wsi_stddev = htk_cnorm.reinhard_stats(args.inputSlidePath, sample_fraction, analysis_mag) # compute tissue/foreground mask at low-res for whole slide images im_fgnd_mask_lres, fgnd_seg_scale = cli_utils.segment_wsi_foreground_at_low_res(ts) # compute foreground fraction of tiles in parallel using Dask analysis_tile_size = 2048 analysis_mag = 10 it_kwargs = { 'tile_size': {'width': analysis_tile_size}, 'scale': {'magnification': analysis_mag}, } inputSlidePath = 0 tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputSlidePath, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs ) tile_result_list = [] min_fgnd_frac = 0.001 for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if tile_fgnd_frac_list[tile_position] <= min_fgnd_frac: continue #tile_result_list.append(compute_superpixel_data(args.inputSlidePath, tile_position, wsi_mean, wsi_stddev)) # detect superpixel data cur_result = dask.delayed(compute_superpixel_data)( "test.svs", tile_position, wsi_mean, wsi_stddev) # append result to list tile_result_list.append(cur_result) print 'hello' tile_result_list = dask.delayed(tile_result_list).compute() # initiate output data list superpixel_data = [] x_centroids = [] y_centroids = [] for s_data, x_cent, y_cent in tile_result_list: for s_d in s_data: superpixel_data.append(s_d) for x_c in x_cent: x_centroids.append(x_c) for y_c in y_cent: y_centroids.append(y_c) superpixel_data = np.asarray(superpixel_data, dtype=np.float32) n_superpixels = len(superpixel_data) x_centroids = np.asarray(x_centroids).reshape((n_superpixels, 1)) y_centroids = np.asarray(y_centroids).reshape((n_superpixels, 1)) print('>> Writing superpixel data information') # output = h5py.File('superpixelResults1', 'w') output = h5py.File(args.outputSuperpixelFeatureFile, 'w') output.create_dataset('features', data=superpixel_data) output.create_dataset('x_centroid', data=x_centroids) output.create_dataset('y_centroid', data=y_centroids) output.close()