def test_reinhard_stats(self): wsi_path = os.path.join( TEST_DATA_DIR, 'sample_svs_image.TCGA-DU-6399-01A-01-TS1.e8eb65de-d63e-42db-af6f-14fefbbdf7bd.svs' # noqa ) np.random.seed(1) # create dask client args = { 'scheduler': None, 'num_workers': -1, 'num_threads_per_worker': 1, } args = collections.namedtuple('Parameters', args.keys())(**args) cli_utils.create_dask_client(args) # compute reinhard stats wsi_mean, wsi_stddev = htk_cn.reinhard_stats(wsi_path, 0.1, 20) gt_mean = [8.88150931, -0.07665037, 0.02211699] gt_stddev = [0.63423921, 0.12760392, 0.02212977] np.testing.assert_allclose(wsi_mean, gt_mean, atol=1e-2) np.testing.assert_allclose(wsi_stddev, gt_stddev, atol=1e-2)
def main(args): utils.create_dask_client(args) ts = large_image.getTileSource(args.inputImageFile) make_label_image = args.outputLabelImage is not None region = utils.get_region_dict( args.region, *(args.maxRegionSize, ts) if make_label_image else ()).get('region') ppc_params = ppc.Parameters( **{k: getattr(args, k) for k in ppc.Parameters._fields}) results = ppc.count_slide( args.inputImageFile, ppc_params, region, args.tile_grouping, make_label_image, ) if make_label_image: stats, label_image = results # Colorize label image. Colors from the "coolwarm" color map color_map = np.empty((4, 3), dtype=np.uint8) color_map[ppc.Labels.NEGATIVE] = 255 color_map[ppc.Labels.WEAK] = 60, 78, 194 color_map[ppc.Labels.PLAIN] = 221, 220, 220 color_map[ppc.Labels.STRONG] = 180, 4, 38 # Cleverly index color_map label_image = color_map[label_image] skimage.io.imsave(args.outputLabelImage, label_image) else: stats, = results with open(args.returnParameterFile, 'w') as f: for k, v in zip(stats._fields, stats): f.write('{} = {}\n'.format(k, v))
def main(args): args = utils.splitArgs(args) args.macenko.I_0 = numpy.array(args.macenko.I_0) utils.create_dask_client(args.dask) sample = utils.sample_pixels(args.sample) stain_matrix = rgb_separate_stains_macenko_pca(sample.T, **vars(args.macenko)) with open(args.returnParameterFile, 'w') as f: for i, stain in enumerate(stain_matrix.T): f.write('stainColor_{} = {}\n'.format(i + 1, ','.join(map(str, stain))))
def main(args): other_args = set(['returnParameterFile', 'scheduler']) kwargs = {k: v for k, v in vars(args).items() if k not in other_args} # Allow (some) default parameters to work. Assume certain values # are not valid. for k in 'sample_fraction', 'sample_approximate_total': if kwargs[k] == -1: del kwargs[k] utils.create_dask_client(args) I_0 = background_intensity(**kwargs) with open(args.returnParameterFile, 'w') as f: f.write('BackgroundIntensity = ' + ','.join(map(str, I_0)) + '\n')
def main(args): utils.create_dask_client(args) ts = large_image.getTileSource(args.inputImageFile) kwargs = dict(format=large_image.tilesource.TILE_FORMAT_NUMPY) makeLabelImage = args.outputLabelImage is not None kwargs.update(utils.get_region_dict( args.region, *(args.maxRegionSize, ts) if makeLabelImage else () )) if makeLabelImage: tile = ts.getRegion(**kwargs)[0] results, labelImage = positive_pixel_count_single_tile( args, tile, makeLabelImage=True) skimage.io.imsave(args.outputLabelImage, labelImage) else: results = [] total_tiles = ts.getSingleTile(**kwargs)['iterator_range']['position'] for position in range(0, total_tiles, args.tile_grouping): results.append(delayed(positive_pixel_count_tiles)( args, kwargs, position, min(args.tile_grouping, total_tiles - position))) results = delayed(combine)(results).compute() r = results total_all_positive = sum(r[k] for k in results_num_keys) output = ( [(k, r[k]) for k in results_keys] + [('IntensityAverage', sum(r[k] for k in results_i_keys) / total_all_positive), ('RatioStrongToTotal', float(r['NumberStrongPositive']) / total_all_positive), ('IntensityAverageWeakAndPositive', (r['IntensitySumWeakPositive'] + r['IntensitySumPositive']) / (r['NumberWeakPositive'] + r['NumberPositive']))] ) with open(args.returnParameterFile, 'w') as f: for k, v in output: f.write('{} = {}\n'.format(k, v))
def test_background_intensity(self): wsi_path = os.path.join( TEST_DATA_DIR, 'sample_svs_image.TCGA-DU-6399-01A-01-TS1.e8eb65de-d63e-42db-af6f-14fefbbdf7bd.svs' # noqa ) np.random.seed(1) # create dask client args = { 'scheduler': None, 'num_workers': -1, 'num_threads_per_worker': 1, } args = collections.namedtuple('Parameters', args.keys())(**args) cli_utils.create_dask_client(args) # compute background intensity I_0 = htk_cn.background_intensity(wsi_path, sample_approximate_total=5000) np.testing.assert_allclose(I_0, [242, 244, 241], atol=1)
def main(args): args = utils.splitArgs(args) args.snmf.I_0 = numpy.array(args.snmf.I_0) print(">> Starting Dask cluster and sampling pixels") utils.create_dask_client(args.dask) sample = utils.sample_pixels(args.sample) # Create stain matrix print('>> Creating stain matrix') args.snmf.w_init = utils.get_stain_matrix(args.stains, 2) print args.snmf.w_init # Perform color deconvolution print('>> Performing color deconvolution') w_est = htk_cdeconv.rgb_separate_stains_xu_snmf(sample.T, **vars(args.snmf)) w_est = htk_cdeconv.complement_stain_matrix(w_est) with open(args.returnParameterFile, 'w') as f: for i, stain in enumerate(w_est.T): f.write('stainColor_{} = {}\n'.format(i+1, ','.join(map(str, stain))))
def main(args): total_start_time = time.time() print('\n>> CLI Parameters ...\n') print(args) check_args(args) feature_file_format = os.path.splitext(args.outputNucleiFeatureFile)[1] if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False # # Initiate Dask client # print('\n>> Creating Dask client ...\n') start_time = time.time() c = cli_utils.create_dask_client(args) print(c) dask_setup_time = time.time() - start_time print('Dask setup time = {} seconds'.format(dask_setup_time)) # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time print('low-res foreground mask computation time = {}'.format( cli_utils.disp_time_hms(fgnd_time))) # # Compute foreground fraction of tiles in parallel using Dask # tile_fgnd_frac_list = [1.0] it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs) else: tile_fgnd_frac_list = [1.0] * num_tiles num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {0:d} ((1:2f)%%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # # Compute reinhard stats for color normalization # src_mu_lab = None src_sigma_lab = None if is_wsi and process_whole_image: print('\n>> Computing reinhard color normalization stats ...\n') start_time = time.time() src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( args.inputImageFile, 0.01, magnification=args.analysis_mag) rstats_time = time.time() - start_time print('Reinhard stats computation time = {}'.format( cli_utils.disp_time_hms(rstats_time))) # # Detect and compute nuclei features in parallel using Dask # print('\n>> Detecting nuclei and computing features ...\n') start_time = time.time() tile_result_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect nuclei cur_result = dask.delayed(compute_tile_nuclei_features)( args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab, src_sigma_lab) # append result to list tile_result_list.append(cur_result) tile_result_list = dask.delayed(tile_result_list).compute() nuclei_annot_list = [ annot for annot_list, fdata in tile_result_list for annot in annot_list ] nuclei_fdata = pd.concat([fdata for annot_list, fdata in tile_result_list], ignore_index=True) nuclei_detection_time = time.time() - start_time print('Number of nuclei = {}'.format(len(nuclei_annot_list))) print('Nuclei detection time = {}'.format( cli_utils.disp_time_hms(nuclei_detection_time))) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = { "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format, "elements": nuclei_annot_list } with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) # # Create CSV Feature file # print('>> Writing CSV feature file') if feature_file_format == '.csv': nuclei_fdata.to_csv(args.outputNucleiFeatureFile, index=False) elif feature_file_format == '.h5': nuclei_fdata.to_hdf(args.outputNucleiFeatureFile, 'Features', format='table', mode='w') else: raise ValueError( 'Extension of output feature file must be .csv or .h5') total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def main(args): print('\n>> CLI Parameters ...\n') print args # # Initiate Dask client # print('\n>> Creating Dask client ...\n') c = cli_utils.create_dask_client(args) print c # # read model file # print('\n>> Loading classification model ...\n') clf_model = joblib.load(args.inputModelFile) # # read feature file # print('\n>> Loading nuclei feature file ...\n') ddf = read_feature_file(args) if len(ddf.columns) != clf_model.n_features_: raise ValueError('The number of features of the classification model ' 'and the input feature file do not match.') # # read nuclei annotation file # print('\n>> Loading nuclei annotation file ...\n') with open(args.inputNucleiAnnotationFile) as f: nuclei_annot_list = json.load(f)['elements'] if len(nuclei_annot_list) != len(ddf.index): raise ValueError('The number of nuclei in the feature file and the ' 'annotation file do not match') # # Perform nuclei classification # print('\n>> Performing nuclei classification using Dask ...\n') def predict_nuclei_class_prob(df, clf_model): return pd.DataFrame(data=clf_model.predict_proba(df.as_matrix()), columns=clf_model.classes_) outfmt = pd.DataFrame(columns=clf_model.classes_, dtype=np.float64) df_class_prob = ddf.map_partitions(predict_nuclei_class_prob, clf_model, meta=outfmt).compute() pred_class = df_class_prob.idxmax(axis=1) # # Group nuclei annotations by class # print('\n>> Grouping nuclei annotations by class ...\n') num_classes = len(clf_model.classes_) nuclei_annot_by_class = {c: [] for c in clf_model.classes_} class_color_map = dict( zip(clf_model.classes_, gen_distinct_rgb_colors(num_classes, seed=1))) for i in range(len(nuclei_annot_list)): cur_class = pred_class.iloc[i] cur_anot = nuclei_annot_list[i] cur_anot['lineColor'] = 'rgb(%s)' % ','.join( [str(int(round(col * 255))) for col in class_color_map[cur_class]]) nuclei_annot_by_class[cur_class].append(cur_anot) # # Write annotation file # print('\n>> Writing classified nuclei annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = [] for c in clf_model.classes_: annotation.append({ "name": annot_fname + '-nuclei-class-' + str(c), "elements": nuclei_annot_by_class[c] }) with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False)
def test_create_tile_nuclei_annotations(self): wsi_path = os.path.join( TEST_DATA_DIR, 'TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs') # define parameters args = { 'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433], 'reference_std_lab': [0.57506023, 0.10403329, 0.01364062], 'stain_1': 'hematoxylin', 'stain_2': 'eosin', 'stain_3': 'null', 'stain_1_vector': [-1, -1, -1], 'stain_2_vector': [-1, -1, -1], 'stain_3_vector': [-1, -1, -1], 'min_fgnd_frac': 0.50, 'analysis_mag': 20, 'analysis_tile_size': 1200, 'min_radius': 12, 'max_radius': 30, 'foreground_threshold': 60, 'min_nucleus_area': 80, 'local_max_search_radius': 10, 'scheduler_address': None } args = collections.namedtuple('Parameters', args.keys())(**args) # read WSI ts = large_image.getTileSource(wsi_path) ts_metadata = ts.getMetadata() analysis_tile_size = { 'width': int(ts_metadata['tileWidth'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileWidth'])), 'height': int(ts_metadata['tileHeight'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileHeight'])) } # define ROI roi = { 'left': ts_metadata['sizeX'] / 2, 'top': ts_metadata['sizeY'] * 3 / 4, 'width': analysis_tile_size['width'], 'height': analysis_tile_size['height'], 'units': 'base_pixels' } # define tile iterator parameters it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, 'region': roi } # create dask client cli_utils.create_dask_client(args) # get tile foregreoung at low res im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) # compute tile foreground fraction tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( wsi_path, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) np.testing.assert_equal(num_fgnd_tiles, 2) # create nuclei annotations nuclei_bbox_annot_list = [] nuclei_bndry_annot_list = [] for tile_info in ts.tileIterator( format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs): im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab) # perform color deconvolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi( im_nuclei_stain, args) # generate nuclei annotations as bboxes cur_bbox_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'bbox') nuclei_bbox_annot_list.extend(cur_bbox_annot_list) # generate nuclei annotations as boundaries cur_bndry_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'boundary') nuclei_bndry_annot_list.extend(cur_bndry_annot_list) # compare nuclei bbox annotations with gtruth nuclei_bbox_annot_gtruth_file = os.path.join( TEST_DATA_DIR, 'TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot' # noqa ) with open(nuclei_bbox_annot_gtruth_file, 'r') as fbbox_annot: nuclei_bbox_annot_list_gtruth = json.load(fbbox_annot)['elements'] # Check that nuclei_bbox_annot_list is nearly equal to # nuclei_bbox_annot_list_gtruth self.assertEqual(len(nuclei_bbox_annot_list), len(nuclei_bbox_annot_list_gtruth)) for pos in range(len(nuclei_bbox_annot_list)): np.testing.assert_array_almost_equal( nuclei_bbox_annot_list[pos]['center'], nuclei_bbox_annot_list_gtruth[pos]['center'], 0) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['width'], nuclei_bbox_annot_list_gtruth[pos]['width'], 1) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['height'], nuclei_bbox_annot_list_gtruth[pos]['height'], 1) # compare nuclei boundary annotations with gtruth nuclei_bndry_annot_gtruth_file = os.path.join( TEST_DATA_DIR, 'TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot' # noqa ) with open(nuclei_bndry_annot_gtruth_file, 'r') as fbndry_annot: nuclei_bndry_annot_list_gtruth = json.load( fbndry_annot)['elements'] self.assertEqual(len(nuclei_bndry_annot_list), len(nuclei_bndry_annot_list_gtruth)) for pos in range(len(nuclei_bndry_annot_list)): np.testing.assert_array_almost_equal( nuclei_bndry_annot_list[pos]['points'], nuclei_bndry_annot_list_gtruth[pos]['points'], 0)
def main(args): total_start_time = time.time() print('\n>> CLI Parameters ...\n') print(args) if not os.path.isfile(args.inputImageFile): raise IOError('Input image file does not exist.') if len(args.reference_mu_lab) != 3: raise ValueError('Reference Mean LAB should be a 3 element vector.') if len(args.reference_std_lab) != 3: raise ValueError('Reference Stddev LAB should be a 3 element vector.') if len(args.analysis_roi) != 4: raise ValueError('Analysis ROI must be a vector of 4 elements.') if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False # # Initiate Dask client # print('\n>> Creating Dask client ...\n') start_time = time.time() c = cli_utils.create_dask_client(args) print(c) dask_setup_time = time.time() - start_time print('Dask setup time = {}'.format( cli_utils.disp_time_hms(dask_setup_time))) # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print(json.dumps(ts_metadata, indent=2)) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi and process_whole_image: print('\n>> Computing tissue/foreground mask at low-res ...\n') start_time = time.time() im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) fgnd_time = time.time() - start_time print('low-res foreground mask computation time = {}'.format( cli_utils.disp_time_hms(fgnd_time))) # # Compute foreground fraction of tiles in parallel using Dask # tile_fgnd_frac_list = [1.0] it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } if is_wsi: print('\n>> Computing foreground fraction of all tiles ...\n') start_time = time.time() num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position'] print('Number of tiles = {}'.format(num_tiles)) if process_whole_image: tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs) else: tile_fgnd_frac_list = np.full(num_tiles, 1.0) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles fgnd_frac_comp_time = time.time() - start_time print('Number of foreground tiles = {0:d} ({1:2f}%%)'.format( num_fgnd_tiles, percent_fgnd_tiles)) print('Tile foreground fraction computation time = {}'.format( cli_utils.disp_time_hms(fgnd_frac_comp_time))) # # Compute reinhard stats for color normalization # src_mu_lab = None src_sigma_lab = None if is_wsi and process_whole_image: print('\n>> Computing reinhard color normalization stats ...\n') start_time = time.time() src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats( args.inputImageFile, 0.01, magnification=args.analysis_mag) rstats_time = time.time() - start_time print('Reinhard stats computation time = {}'.format( cli_utils.disp_time_hms(rstats_time))) # # Detect nuclei in parallel using Dask # print('\n>> Detecting nuclei ...\n') start_time = time.time() tile_nuclei_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect nuclei cur_nuclei_list = dask.delayed(detect_tile_nuclei)(args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab, src_sigma_lab) # append result to list tile_nuclei_list.append(cur_nuclei_list) tile_nuclei_list = dask.delayed(tile_nuclei_list).compute() nuclei_list = list(itertools.chain.from_iterable(tile_nuclei_list)) nuclei_detection_time = time.time() - start_time print('Number of nuclei = {}'.format(len(nuclei_list))) print('Nuclei detection time = {}'.format( cli_utils.disp_time_hms(nuclei_detection_time))) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = { "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format, "elements": nuclei_list } with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False) total_time_taken = time.time() - total_start_time print('Total analysis time = {}'.format( cli_utils.disp_time_hms(total_time_taken)))
def main(args): # noqa: C901 # initiate dask client c = cli_utils.create_dask_client(args) # read input slide ts = large_image.getTileSource(args.inputSlidePath) # compute colorspace statistics (mean, variance) for whole slide wsi_mean, wsi_stddev = htk_cnorm.reinhard_stats(args.inputSlidePath, args.sample_fraction, args.analysis_mag) # compute tissue/foreground mask at low-res for whole slide images im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) # compute foreground fraction of tiles in parallel using Dask it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( args.inputSlidePath, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs) # # Now, we detect superpixel data in parallel using Dask # print('\n>> Detecting superpixel data ...\n') tile_result_list = [] for tile in ts.tileIterator(**it_kwargs): tile_position = tile['tile_position']['position'] if tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac: continue # detect superpixel data cur_result = dask.delayed(compute_superpixel_data)(args.inputSlidePath, tile_position, wsi_mean, wsi_stddev, args, **it_kwargs) # append result to list tile_result_list.append(cur_result) tile_result_list = dask.delayed(tile_result_list).compute() # initiate output data list superpixel_data = [] x_centroids = [] y_centroids = [] x_boundaries = [] y_boundaries = [] for s_data, x_cent, y_cent, x_brs, y_brs in tile_result_list: for s_d in s_data: superpixel_data.append(s_d) for x_c in x_cent: x_centroids.append(x_c) for y_c in y_cent: y_centroids.append(y_c) for x_b in x_brs: x_boundaries.append(x_b) for y_b in y_brs: y_boundaries.append(y_b) superpixel_data = np.asarray(superpixel_data, dtype=np.float32) n_superpixels = len(superpixel_data) x_centroids = np.asarray(x_centroids).reshape((n_superpixels, 1)) y_centroids = np.asarray(y_centroids).reshape((n_superpixels, 1)) # # Last, we can store the data # print('>> Writing superpixel data information') output = h5py.File(args.outputSuperpixelFeatureFile, 'w') output.create_dataset('features', data=superpixel_data) output.create_dataset('x_centroid', data=x_centroids) output.create_dataset('y_centroid', data=y_centroids) output.close() # # Create Text file for boundaries # print('>> Writing text boundary file') boundary_file = open(args.outputBoundariesFile, 'w') for i in range(n_superpixels): boundary_file.write("%.1f\t" % y_centroids[i, 0]) boundary_file.write("%.1f\t" % x_centroids[i, 0]) for j in range(len(x_boundaries[i])): boundary_file.write("%d,%d " % (y_boundaries[i][j], x_boundaries[i][j])) boundary_file.write("\n")