Beispiel #1
0
def main(args):

    total_start_time = time.time()

    print('\n>> CLI Parameters ...\n')

    print(args)

    check_args(args)

    feature_file_format = os.path.splitext(args.outputNucleiFeatureFile)[1]

    if np.all(np.array(args.analysis_roi) == -1):
        process_whole_image = True
    else:
        process_whole_image = False

    #
    # Initiate Dask client
    #
    print('\n>> Creating Dask client ...\n')

    start_time = time.time()

    c = cli_utils.create_dask_client(args)

    print(c)

    dask_setup_time = time.time() - start_time
    print('Dask setup time = {} seconds'.format(dask_setup_time))

    #
    # Read Input Image
    #
    print('\n>> Reading input image ... \n')

    ts = large_image.getTileSource(args.inputImageFile)

    ts_metadata = ts.getMetadata()

    print(json.dumps(ts_metadata, indent=2))

    is_wsi = ts_metadata['magnification'] is not None

    #
    # Compute tissue/foreground mask at low-res for whole slide images
    #
    if is_wsi and process_whole_image:

        print('\n>> Computing tissue/foreground mask at low-res ...\n')

        start_time = time.time()

        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        fgnd_time = time.time() - start_time

        print('low-res foreground mask computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_time)))

    #
    # Compute foreground fraction of tiles in parallel using Dask
    #
    tile_fgnd_frac_list = [1.0]

    it_kwargs = {
        'tile_size': {
            'width': args.analysis_tile_size
        },
        'scale': {
            'magnification': args.analysis_mag
        },
    }

    if not process_whole_image:

        it_kwargs['region'] = {
            'left': args.analysis_roi[0],
            'top': args.analysis_roi[1],
            'width': args.analysis_roi[2],
            'height': args.analysis_roi[3],
            'units': 'base_pixels'
        }

    if is_wsi:

        print('\n>> Computing foreground fraction of all tiles ...\n')

        start_time = time.time()

        num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position']

        print('Number of tiles = {}'.format(num_tiles))

        if process_whole_image:

            tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
                args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
                it_kwargs)

        else:

            tile_fgnd_frac_list = [1.0] * num_tiles

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles

        fgnd_frac_comp_time = time.time() - start_time

        print('Number of foreground tiles = {0:d} ((1:2f)%%)'.format(
            num_fgnd_tiles, percent_fgnd_tiles))

        print('Tile foreground fraction computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_frac_comp_time)))

    #
    # Compute reinhard stats for color normalization
    #
    src_mu_lab = None
    src_sigma_lab = None

    if is_wsi and process_whole_image:

        print('\n>> Computing reinhard color normalization stats ...\n')

        start_time = time.time()

        src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats(
            args.inputImageFile, 0.01, magnification=args.analysis_mag)

        rstats_time = time.time() - start_time

        print('Reinhard stats computation time = {}'.format(
            cli_utils.disp_time_hms(rstats_time)))

    #
    # Detect and compute nuclei features in parallel using Dask
    #
    print('\n>> Detecting nuclei and computing features ...\n')

    start_time = time.time()

    tile_result_list = []

    for tile in ts.tileIterator(**it_kwargs):

        tile_position = tile['tile_position']['position']

        if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac:
            continue

        # detect nuclei
        cur_result = dask.delayed(compute_tile_nuclei_features)(
            args.inputImageFile, tile_position, args, it_kwargs, src_mu_lab,
            src_sigma_lab)

        # append result to list
        tile_result_list.append(cur_result)

    tile_result_list = dask.delayed(tile_result_list).compute()

    nuclei_annot_list = [
        annot for annot_list, fdata in tile_result_list for annot in annot_list
    ]

    nuclei_fdata = pd.concat([fdata for annot_list, fdata in tile_result_list],
                             ignore_index=True)

    nuclei_detection_time = time.time() - start_time

    print('Number of nuclei = {}'.format(len(nuclei_annot_list)))
    print('Nuclei detection time = {}'.format(
        cli_utils.disp_time_hms(nuclei_detection_time)))

    #
    # Write annotation file
    #
    print('\n>> Writing annotation file ...\n')

    annot_fname = os.path.splitext(
        os.path.basename(args.outputNucleiAnnotationFile))[0]

    annotation = {
        "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format,
        "elements": nuclei_annot_list
    }

    with open(args.outputNucleiAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)

    #
    # Create CSV Feature file
    #
    print('>> Writing CSV feature file')

    if feature_file_format == '.csv':

        nuclei_fdata.to_csv(args.outputNucleiFeatureFile, index=False)

    elif feature_file_format == '.h5':

        nuclei_fdata.to_hdf(args.outputNucleiFeatureFile,
                            'Features',
                            format='table',
                            mode='w')

    else:

        raise ValueError(
            'Extension of output feature file must be .csv or .h5')

    total_time_taken = time.time() - total_start_time

    print('Total analysis time = {}'.format(
        cli_utils.disp_time_hms(total_time_taken)))
Beispiel #2
0
    def test_create_tile_nuclei_annotations(self):

        wsi_path = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs.sha512'  # noqa
            ))

        # define parameters
        args = {
            'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433],
            'reference_std_lab': [0.57506023, 0.10403329, 0.01364062],
            'stain_1': 'hematoxylin',
            'stain_2': 'eosin',
            'stain_3': 'null',
            'stain_1_vector': [-1, -1, -1],
            'stain_2_vector': [-1, -1, -1],
            'stain_3_vector': [-1, -1, -1],
            'min_fgnd_frac': 0.50,
            'analysis_mag': 20,
            'analysis_tile_size': 1200,
            'foreground_threshold': 60,
            'min_radius': 6,
            'max_radius': 12,
            'min_nucleus_area': 25,
            'local_max_search_radius': 8,

            # In Python 3 unittesting, the scheduler fails if it uses processes
            'scheduler': 'multithreading',  # None,
            'num_workers': -1,
            'num_threads_per_worker': 1,
        }

        args = collections.namedtuple('Parameters', args.keys())(**args)

        # read WSI
        ts = large_image.getTileSource(wsi_path)

        ts_metadata = ts.getMetadata()

        analysis_tile_size = {
            'width':
            int(ts_metadata['tileWidth'] * np.floor(
                1.0 * args.analysis_tile_size / ts_metadata['tileWidth'])),
            'height':
            int(ts_metadata['tileHeight'] * np.floor(
                1.0 * args.analysis_tile_size / ts_metadata['tileHeight']))
        }

        # define ROI
        roi = {
            'left': ts_metadata['sizeX'] / 2,
            'top': ts_metadata['sizeY'] * 3 / 4,
            'width': analysis_tile_size['width'],
            'height': analysis_tile_size['height'],
            'units': 'base_pixels'
        }

        # define tile iterator parameters
        it_kwargs = {
            'tile_size': {
                'width': args.analysis_tile_size
            },
            'scale': {
                'magnification': args.analysis_mag
            },
            'region': roi
        }

        # create dask client
        cli_utils.create_dask_client(args)

        # get tile foregreoung at low res
        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        # compute tile foreground fraction
        tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
            wsi_path, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs)

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        np.testing.assert_equal(num_fgnd_tiles, 2)

        # create nuclei annotations
        nuclei_bbox_annot_list = []
        nuclei_bndry_annot_list = []

        for tile_info in ts.tileIterator(
                format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs):

            im_tile = tile_info['tile'][:, :, :3]

            # perform color normalization
            im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab,
                                         args.reference_std_lab)

            # perform color deconvolution
            w = cli_utils.get_stain_matrix(args)

            im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

            im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

            # segment nuclei
            im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi(
                im_nuclei_stain, im_nuclei_stain < args.foreground_threshold,
                args.min_radius, args.max_radius, args.min_nucleus_area,
                args.local_max_search_radius)

            # generate nuclei annotations as bboxes
            cur_bbox_annot_list = cli_utils.create_tile_nuclei_annotations(
                im_nuclei_seg_mask, tile_info, 'bbox')

            nuclei_bbox_annot_list.extend(cur_bbox_annot_list)

            # generate nuclei annotations as boundaries
            cur_bndry_annot_list = cli_utils.create_tile_nuclei_annotations(
                im_nuclei_seg_mask, tile_info, 'boundary')

            nuclei_bndry_annot_list.extend(cur_bndry_annot_list)

        # compare nuclei bbox annotations with gtruth
        nuclei_bbox_annot_gtruth_file = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot.sha512'  # noqa
            ))

        with open(nuclei_bbox_annot_gtruth_file, 'r') as fbbox_annot:
            nuclei_bbox_annot_list_gtruth = json.load(fbbox_annot)['elements']

        # Check that nuclei_bbox_annot_list is nearly equal to
        # nuclei_bbox_annot_list_gtruth
        assert len(nuclei_bbox_annot_list) == len(
            nuclei_bbox_annot_list_gtruth)
        for pos in range(len(nuclei_bbox_annot_list)):
            np.testing.assert_array_almost_equal(
                nuclei_bbox_annot_list[pos]['center'],
                nuclei_bbox_annot_list_gtruth[pos]['center'], 0)
            np.testing.assert_almost_equal(
                nuclei_bbox_annot_list[pos]['width'],
                nuclei_bbox_annot_list_gtruth[pos]['width'], 1)
            np.testing.assert_almost_equal(
                nuclei_bbox_annot_list[pos]['height'],
                nuclei_bbox_annot_list_gtruth[pos]['height'], 1)

        # compare nuclei boundary annotations with gtruth
        nuclei_bndry_annot_gtruth_file = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot.sha512'  # noqa
            ))

        with open(nuclei_bndry_annot_gtruth_file, 'r') as fbndry_annot:
            nuclei_bndry_annot_list_gtruth = json.load(
                fbndry_annot)['elements']

        assert len(nuclei_bndry_annot_list) == len(
            nuclei_bndry_annot_list_gtruth)

        for pos in range(len(nuclei_bndry_annot_list)):

            np.testing.assert_array_almost_equal(
                nuclei_bndry_annot_list[pos]['points'],
                nuclei_bndry_annot_list_gtruth[pos]['points'], 0)
def main(args):

    total_start_time = time.time()

    print('\n>> CLI Parameters ...\n')

    print(args)

    if not os.path.isfile(args.inputImageFile):
        raise IOError('Input image file does not exist.')

    if len(args.analysis_roi) != 4:
        raise ValueError('Analysis ROI must be a vector of 4 elements.')

    if np.all(np.array(args.analysis_roi) == -1):
        process_whole_image = True
    else:
        process_whole_image = False

    start_time = time.time()

    #
    # Read Input Image
    #
    print('\n>> Reading input image ... \n')

    ts = large_image.getTileSource(args.inputImageFile)

    ts_metadata = ts.getMetadata()

    print(json.dumps(ts_metadata, indent=2))

    is_wsi = ts_metadata['magnification'] is not None

    #
    # Compute tissue/foreground mask at low-res for whole slide images
    #
    if is_wsi and process_whole_image:

        print('\n>> Computing tissue/foreground mask at low-res ...\n')

        start_time = time.time()

        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        fgnd_time = time.time() - start_time

        print('low-res foreground mask computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_time)))

    #
    # Compute foreground fraction of tiles in parallel
    #
    tile_fgnd_frac_list = [1.0]

    it_kwargs = {
        'tile_size': {
            'width': args.analysis_tile_size
        },
        'scale': {
            'magnification': args.analysis_mag
        },
    }

    if not process_whole_image:

        it_kwargs['region'] = {
            'left': args.analysis_roi[0],
            'top': args.analysis_roi[1],
            'width': args.analysis_roi[2],
            'height': args.analysis_roi[3],
            'units': 'base_pixels'
        }

    if is_wsi:

        print('\n>> Computing foreground fraction of all tiles ...\n')

        start_time = time.time()

        num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position']

        print('Number of tiles = {}'.format(num_tiles))

        if process_whole_image:

            tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
                args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
                it_kwargs)

        else:

            tile_fgnd_frac_list = np.full(num_tiles, 1.0)

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles

        fgnd_frac_comp_time = time.time() - start_time

        print('Number of foreground tiles = {0:d} ({1:2f}%%)'.format(
            num_fgnd_tiles, percent_fgnd_tiles))

        print('Tile foreground fraction computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_frac_comp_time)))

    #
    # Detect cell in parallel
    #
    print('\n>> Detecting cell ...\n')

    start_time = time.time()

    tile_cell_list = []

    csv_dict = {}
    csv_dict['Image Loading'] = []
    csv_dict['Cell Detection'] = []
    csv_dict['Cell Cropping'] = []
    csv_dict['Cell Classification'] = []
    csv_dict['Annotation Writing'] = []
    csv_dict['Number of Cells'] = []

    for tile in ts.tileIterator(**it_kwargs):

        tile_position = tile['tile_position']['position']

        if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac:
            continue

        cur_cell_list, csv_dict = detect_tile_cell(args.inputImageFile,
                                                   tile_position, csv_dict,
                                                   args, it_kwargs)
        # append result to list
        tile_cell_list.append(cur_cell_list)

        df = pd.DataFrame(csv_dict,
                          columns=[
                              'Number of Cells', 'Image Loading',
                              'Cell Detection', 'Cell Cropping',
                              'Cell Classification', 'Annotation Writing'
                          ])
        df.to_csv('%s.csv' % args.outputCellAnnotationFile[:-5])

    cell_list = list(itertools.chain.from_iterable(tile_cell_list))

    cell_detection_time = time.time() - start_time

    print('Number of cells = {}'.format(len(cell_list)))

    print('Cell detection time = {}'.format(
        cli_utils.disp_time_hms(cell_detection_time)))

    #
    # Write annotation file
    #
    print('\n>> Writing annotation file ...\n')

    annot_fname = os.path.splitext(
        os.path.basename(args.outputCellAnnotationFile))[0]

    annotation = {
        "name": annot_fname + '-cell-' + args.cell_annotation_format,
        "elements": cell_list
    }

    with open(args.outputCellAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)

    total_time_taken = time.time() - total_start_time

    print('Total analysis time = {}'.format(
        cli_utils.disp_time_hms(total_time_taken)))
Beispiel #4
0
def main(args):

    total_start_time = time.time()

    print('\n>> CLI Parameters ...\n')

    print(args)

    if not os.path.isfile(args.inputImageFile):
        raise IOError('Input image file does not exist.')

    if len(args.reference_mu_lab) != 3:
        raise ValueError('Reference Mean LAB should be a 3 element vector.')

    if len(args.reference_std_lab) != 3:
        raise ValueError('Reference Stddev LAB should be a 3 element vector.')

    if len(args.analysis_roi) != 4:
        raise ValueError('Analysis ROI must be a vector of 4 elements.')

    if np.all(np.array(args.analysis_roi) == -1):
        process_whole_image = True
    else:
        process_whole_image = False

    #
    # Initiate Dask client
    #
    print('\n>> Creating Dask client ...\n')

    start_time = time.time()

    c = cli_utils.create_dask_client(args)

    print(c)

    dask_setup_time = time.time() - start_time
    print('Dask setup time = {}'.format(
        cli_utils.disp_time_hms(dask_setup_time)))

    #
    # Read Input Image
    #
    print('\n>> Reading input image ... \n')

    ts = large_image.getTileSource(args.inputImageFile)

    ts_metadata = ts.getMetadata()

    print(json.dumps(ts_metadata, indent=2))

    is_wsi = ts_metadata['magnification'] is not None

    #
    # Compute tissue/foreground mask at low-res for whole slide images
    #
    if is_wsi and process_whole_image:

        print('\n>> Computing tissue/foreground mask at low-res ...\n')

        start_time = time.time()

        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        fgnd_time = time.time() - start_time

        print('low-res foreground mask computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_time)))

    #
    # Compute foreground fraction of tiles in parallel using Dask
    #
    tile_fgnd_frac_list = [1.0]

    it_kwargs = {
        'tile_size': {
            'width': args.analysis_tile_size
        },
        'scale': {
            'magnification': args.analysis_mag
        },
    }

    if not process_whole_image:

        it_kwargs['region'] = {
            'left': args.analysis_roi[0],
            'top': args.analysis_roi[1],
            'width': args.analysis_roi[2],
            'height': args.analysis_roi[3],
            'units': 'base_pixels'
        }

    if is_wsi:

        print('\n>> Computing foreground fraction of all tiles ...\n')

        start_time = time.time()

        num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position']

        print('Number of tiles = {}'.format(num_tiles))

        if process_whole_image:

            tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
                args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
                **it_kwargs)

        else:

            tile_fgnd_frac_list = np.full(num_tiles, 1.0)

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles

        fgnd_frac_comp_time = time.time() - start_time

        print('Number of foreground tiles = {0:d} ({1:2f}%%)'.format(
            num_fgnd_tiles, percent_fgnd_tiles))

        print('Tile foreground fraction computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_frac_comp_time)))

    #
    # Compute reinhard stats for color normalization
    #
    src_mu_lab = None
    src_sigma_lab = None

    if is_wsi and process_whole_image:

        print('\n>> Computing reinhard color normalization stats ...\n')

        start_time = time.time()

        src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats(
            args.inputImageFile, 0.01, magnification=args.analysis_mag)

        rstats_time = time.time() - start_time

        print('Reinhard stats computation time = {}'.format(
            cli_utils.disp_time_hms(rstats_time)))

    #
    # Detect nuclei in parallel using Dask
    #
    print('\n>> Detecting nuclei ...\n')

    start_time = time.time()

    tile_nuclei_list = []

    for tile in ts.tileIterator(**it_kwargs):

        tile_position = tile['tile_position']['position']

        if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac:
            continue

        # detect nuclei
        cur_nuclei_list = dask.delayed(detect_tile_nuclei)(args.inputImageFile,
                                                           tile_position, args,
                                                           it_kwargs,
                                                           src_mu_lab,
                                                           src_sigma_lab)

        # append result to list
        tile_nuclei_list.append(cur_nuclei_list)

    tile_nuclei_list = dask.delayed(tile_nuclei_list).compute()

    nuclei_list = list(itertools.chain.from_iterable(tile_nuclei_list))

    nuclei_detection_time = time.time() - start_time

    print('Number of nuclei = {}'.format(len(nuclei_list)))

    print('Nuclei detection time = {}'.format(
        cli_utils.disp_time_hms(nuclei_detection_time)))

    #
    # Write annotation file
    #
    print('\n>> Writing annotation file ...\n')

    annot_fname = os.path.splitext(
        os.path.basename(args.outputNucleiAnnotationFile))[0]

    annotation = {
        "name": annot_fname + '-nuclei-' + args.nuclei_annotation_format,
        "elements": nuclei_list
    }

    with open(args.outputNucleiAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)

    total_time_taken = time.time() - total_start_time

    print('Total analysis time = {}'.format(
        cli_utils.disp_time_hms(total_time_taken)))
Beispiel #5
0
def main(args):  # noqa: C901

    # initiate dask client
    # c = cli_utils.create_dask_client(args)
    c = dask.distributed.Client('127.0.0.1:8786')

    # read input slide
    ts = large_image.getTileSource(args.inputSlidePath)

    # compute colorspace statistics (mean, variance) for whole slide
    wsi_mean, wsi_stddev = htk_cnorm.reinhard_stats(args.inputSlidePath,
                                                    args.sample_fraction,
                                                    args.analysis_mag)

    # compute tissue/foreground mask at low-res for whole slide images
    im_fgnd_mask_lres, fgnd_seg_scale = \
        cli_utils.segment_wsi_foreground_at_low_res(ts)

    # compute foreground fraction of tiles in parallel using Dask
    it_kwargs = {
        'tile_size': {
            'width': args.analysis_tile_size
        },
        'scale': {
            'magnification': args.analysis_mag
        },
    }

    tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
        args.inputSlidePath, im_fgnd_mask_lres, fgnd_seg_scale, **it_kwargs)

    #
    # Now, we detect superpixel data in parallel using Dask
    #
    print('\n>> Detecting superpixel data ...\n')

    tile_result_list = []

    for tile in ts.tileIterator(**it_kwargs):

        tile_position = tile['tile_position']['position']

        if tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac:
            continue

        # detect superpixel data
        cur_result = dask.delayed(compute_superpixel_data)(args.inputSlidePath,
                                                           tile_position,
                                                           wsi_mean,
                                                           wsi_stddev, args,
                                                           **it_kwargs)

        # append result to list
        tile_result_list.append(cur_result)

    tile_result_list = dask.delayed(tile_result_list).compute()

    # initiate output data list
    superpixel_data = []
    x_centroids = []
    y_centroids = []

    for s_data, x_cent, y_cent in tile_result_list:

        for s_d in s_data:
            superpixel_data.append(s_d)

        for x_c in x_cent:
            x_centroids.append(x_c)

        for y_c in y_cent:
            y_centroids.append(y_c)

    superpixel_data = np.asarray(superpixel_data, dtype=np.float32)

    n_superpixels = len(superpixel_data)
    x_centroids = np.asarray(x_centroids).reshape((n_superpixels, 1))
    y_centroids = np.asarray(y_centroids).reshape((n_superpixels, 1))

    #
    # Last, we can store the data
    #
    print('>> Writing superpixel data information')

    output = h5py.File(args.outputSuperpixelFeatureFile, 'w')
    output.create_dataset('features', data=superpixel_data)
    output.create_dataset('x_centroid', data=x_centroids)
    output.create_dataset('y_centroid', data=y_centroids)
    output.close()
Beispiel #6
0
def main(args):

    total_time_profiler = {}

    total_start_time = time.time()

    # =========================================================================
    # ======================= Create Dask Client ==============================
    # =========================================================================
    print('\n>> Creating Dask client ...\n')

    start_time = time.time()
    c = cli_utils.create_dask_client(args)
    print(c)
    dask_setup_time = time.time() - start_time
    temp_time = cli_utils.disp_time_hms(dask_setup_time)
    print('Dask setup time = {}'.format(
        temp_time))
    total_time_profiler['Dask setup time'] = temp_time

    # =========================================================================
    # ========================= Read Input Image ==============================
    # =========================================================================

    print('\n>> Reading input image ... \n')

    ts = large_image.getTileSource(args.inputImageFile)
    ts_metadata = ts.getMetadata()

    print(json.dumps(ts_metadata, indent=2))
    if np.all(np.array(args.analysis_roi) == -1):
        process_whole_image = True
    else:
        process_whole_image = False
    is_wsi = ts_metadata['magnification'] is not None

    # =========================================================================
    # ===================== Compute Foreground Mask ===========================
    # =========================================================================

    if is_wsi and process_whole_image:

        print('\n>> Computing tissue/foreground mask at low-res ...\n')

        start_time = time.time()

        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        fgnd_time = time.time() - start_time

        tmp_time = cli_utils.disp_time_hms(fgnd_time)
        print('low-res foreground mask computation time = {}'.format(tmp_time))
        total_time_profiler[
            'low-res foreground mask computation time'] = tmp_time

    # =========================================================================
    # ================== Compute foreground fraction ==========================
    # =========================================================================
    it_kwargs = {
        'tile_size': {'width': args.analysis_tile_size},
        'scale': {'magnification': args.analysis_mag},
        'resample': True
    }
    tile_fgnd_frac_list = [1.0]
    if not process_whole_image:

        it_kwargs['region'] = {
            'left': args.analysis_roi[0],
            'top': args.analysis_roi[1],
            'width': args.analysis_roi[2],
            'height': args.analysis_roi[3],
            'units': 'base_pixels'
        }
    # =========================================================================
    if is_wsi:
        print('\n>> Computing foreground fraction of all tiles ...\n')

        start_time = time.time()

        num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position']

        print('Number of tiles = {}'.format(num_tiles))

        if process_whole_image:
            tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
                args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
                it_kwargs
            )

        else:

            tile_fgnd_frac_list = np.full(num_tiles, 1.0)

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        percent_fgnd_tiles = 100.0 * num_fgnd_tiles / num_tiles

        fgnd_frac_comp_time = time.time() - start_time

        print('Number of foreground tiles = {:d} ({:2f}%)'.format(
            num_fgnd_tiles, percent_fgnd_tiles))

        print('Tile foreground fraction computation time = {}'.format(
            cli_utils.disp_time_hms(fgnd_frac_comp_time)))

    # =========================================================================
    # ========================= Compute reinhard stats ========================
    # =========================================================================
    src_mu_lab = None
    src_sigma_lab = None

    print('\n>> Computing reinhard color normalization stats ...\n')

    start_time = time.time()

    # src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats(
    #     args.inputImageFile, 0.01, magnification=args.analysis_mag,
    #     tissue_seg_mag=0.625)
    src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats(
        args.inputImageFile, 0.01, magnification=args.analysis_mag)

    print('Reinahrd stats')
    print(src_mu_lab, src_sigma_lab)

    rstats_time = time.time() - start_time

    print('Reinhard stats computation time = {}'.format(
        cli_utils.disp_time_hms(rstats_time)))

    # =========================================================================
    # ======================== Detect Nuclie in Parallel -  Dask ==============
    # =========================================================================
    print('\n>> Detecting cell ...\n')
    start_time = time.time()

    prep_time_profiler = []
    color_deconv_time_profiler = []
    total_loading_time_profiler = []
    ckpt_loading_time_profiler = []
    model_inference_time_profiler = []
    detection_time_profiler = []
    tile_shapes = []
    tile_nuclei_list = []
    num_nuclie = []
    annotation_dict = []
    analysis_dict = []

    annotation_dict_list = []
    nuclei_annot_list = []

    try:
        for tile in ts.tileIterator(**it_kwargs):

            tile_position = tile['tile_position']['position']
            if is_wsi and tile_fgnd_frac_list[tile_position] <= args.min_fgnd_frac:
                continue
            if is_wsi and process_whole_image and (tile['width'] != args.analysis_tile_size or tile['height'] != args.analysis_tile_size):
                continue

            tmp_csv = dask.delayed(detect_tile_nuclei)(
                args.inputImageFile,
                tile_position,
                args, it_kwargs,
                src_mu_lab, src_sigma_lab
            )

            prep_time_profiler.append(tmp_csv['PreparationTime'])
            color_deconv_time_profiler.append(tmp_csv['ColorDeconvTime'])
            total_loading_time_profiler.append(tmp_csv['TotalTileLoadingTime'])
            ckpt_loading_time_profiler.append(tmp_csv['CKPTLoadingTime'])
            model_inference_time_profiler.append(tmp_csv['ModelInfernceTime'])
            detection_time_profiler.append(tmp_csv['DetectionTime'])
            tile_shapes.append(tmp_csv['ROIShape'])
            tile_nuclei_list.append(tmp_csv['ObjectsDict'])
            num_nuclie.append(tmp_csv['NumObjects'])
            annotation_dict.append(tmp_csv['AnnotationDict'])
            analysis_dict.append(tmp_csv['AnalysisDict'])

        prep_time_profiler,\
            color_deconv_time_profiler,\
            total_loading_time_profiler,\
            ckpt_loading_time_profiler,\
            model_inference_time_profiler,\
            detection_time_profiler,\
            tile_shapes,\
            tile_nuclei_list,\
            num_nuclie,\
            annotation_dict,\
            analysis_dict = dask.compute(prep_time_profiler,
                                         color_deconv_time_profiler,
                                         total_loading_time_profiler,
                                         ckpt_loading_time_profiler,
                                         model_inference_time_profiler,
                                         detection_time_profiler,
                                         tile_shapes,
                                         tile_nuclei_list,
                                         num_nuclie,
                                         annotation_dict,
                                         analysis_dict
                                         )

        nuclei_annot_list = list(
            itertools.chain.from_iterable(list(tile_nuclei_list)))
        num_nuclei = len(nuclei_annot_list)

        nuclei_detection_time = time.time() - start_time

        print('Number of nuclei = {}'.format(num_nuclei))
        print('Nuclei detection time = {}'.format(
            cli_utils.disp_time_hms(nuclei_detection_time)))

        annotation_dict_list = list(
            itertools.chain.from_iterable(list(annotation_dict)))

    finally:
        agg_csv = {}
        agg_csv['PreparationTime'] = prep_time_profiler
        agg_csv['ColorDeconvTime'] = color_deconv_time_profiler
        agg_csv['TotalTileLoadingTime'] = total_loading_time_profiler
        agg_csv['CKPTLoadingTime'] = ckpt_loading_time_profiler
        agg_csv['ModelInfernceTime'] = model_inference_time_profiler
        agg_csv['DetectionTime'] = detection_time_profiler
        agg_csv['ROIShape'] = tile_shapes
        agg_csv['ObjectsDict'] = tile_nuclei_list
        agg_csv['NumObjects'] = num_nuclie

        df = pd.DataFrame(agg_csv,
                          columns=['PreparationTime', 'ColorDeconvTime',
                                   'TotalTileLoadingTime',
                                   'CKPTLoadingTime', 'ModelInfernceTime',
                                   'DetectionTime',
                                   'ROIShape',
                                   'NumObjects']
                          )
        df.to_csv(args.outputNucleiDetectionTimeProfilingFile)

    # ====================================================================================
    # ======================= Actual Annotation Writing ======================
    # ====================================================================================

    print('\n>> Writing annotation file ...\n')

    annot_fname = os.path.splitext(
        os.path.basename(args.outputNucleiAnnotationFile))[0]

    annotation = {
        "name":     annot_fname + '-cell-' + args.nuclei_annotation_format,
        "elements": annotation_dict_list
    }

    with open(args.outputNucleiAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)

    total_time_taken = time.time() - total_start_time

    print('Total analysis time = {}'.format(
        cli_utils.disp_time_hms(total_time_taken)))
Beispiel #7
0
def main(args):  # noqa: C901

    # inputSlidePath = 'test2_superfixel.svs'
    # outputSuperpixelFeatureFile= 
    #scheduler = dd.LocalCluster(scheduler_port=2222)
    c = create_dask_client() 
    print('\n>> Creating Dask client and printing its values...\n')
    #print c

    ts = large_image.getTileSource(args.inputSlidePath)

    sample_fraction = 0.1
    analysis_mag = 10
    #ts = large_image.getTileSource(slidePath)
    # compute colorspace statistics (mean, variance) for whole slide
    wsi_mean, wsi_stddev = htk_cnorm.reinhard_stats(args.inputSlidePath, sample_fraction, analysis_mag)

    # compute tissue/foreground mask at low-res for whole slide images
    im_fgnd_mask_lres, fgnd_seg_scale = cli_utils.segment_wsi_foreground_at_low_res(ts)

    # compute foreground fraction of tiles in parallel using Dask
    analysis_tile_size = 2048
    analysis_mag = 10
    it_kwargs = {
        'tile_size': {'width': analysis_tile_size},
        'scale': {'magnification': analysis_mag},
    }

    inputSlidePath = 0

    tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
        args.inputSlidePath, im_fgnd_mask_lres, fgnd_seg_scale,
        **it_kwargs
    )


    tile_result_list = []
    min_fgnd_frac = 0.001

    for tile in ts.tileIterator(**it_kwargs):
        tile_position = tile['tile_position']['position']
        if tile_fgnd_frac_list[tile_position] <= min_fgnd_frac:
            continue
        #tile_result_list.append(compute_superpixel_data(args.inputSlidePath, tile_position, wsi_mean, wsi_stddev))
        # detect superpixel data
        cur_result = dask.delayed(compute_superpixel_data)(
            "test.svs",
            tile_position,
            wsi_mean, wsi_stddev)

        # append result to list
        tile_result_list.append(cur_result)
	print 'hello'

    tile_result_list = dask.delayed(tile_result_list).compute()

    # initiate output data list
    superpixel_data = []
    x_centroids = []
    y_centroids = []


    for s_data, x_cent, y_cent in tile_result_list:

        for s_d in s_data:
            superpixel_data.append(s_d)

        for x_c in x_cent:
            x_centroids.append(x_c)

        for y_c in y_cent:
            y_centroids.append(y_c)

    superpixel_data = np.asarray(superpixel_data, dtype=np.float32)


    n_superpixels = len(superpixel_data)
    x_centroids = np.asarray(x_centroids).reshape((n_superpixels, 1))
    y_centroids = np.asarray(y_centroids).reshape((n_superpixels, 1))

    print('>> Writing superpixel data information')

    # output = h5py.File('superpixelResults1', 'w')
    output = h5py.File(args.outputSuperpixelFeatureFile, 'w')
    output.create_dataset('features', data=superpixel_data)
    output.create_dataset('x_centroid', data=x_centroids)
    output.create_dataset('y_centroid', data=y_centroids)
    output.close()