Example #1
0
def cls_weights(pth=args.train_image_pth,
                ignore_index=None,
                ignore_cls=False,
                ignore_seg=False):
    '''
    given gt.npy,
    calculates class distributions
    of images and returns inverse
    '''
    metadata_pth = '{}/gt.npy'.format(pth)
    metadata = ufs.fetch_metadata(metadata_pth)

    numsamples_cls = np.zeros((args.num_classes, ), dtype=np.int64)
    numsamples_seg = np.zeros((args.num_classes, ), dtype=np.int64)

    for _, item in metadata.items():
        for _, subitem in item.items():

            if not ignore_seg and isinstance(subitem['label'], str):
                l = Image.open(subitem['label'])
                l = np.array(l)
                n = np.bincount(l.reshape(-1), minlength=args.num_classes)
                numsamples_seg += n

            if not ignore_cls and isinstance(subitem['label'], int):
                numsamples_cls[int(subitem['label'])] += 1

    if ignore_index is not None:
        numsamples_seg[ignore_index] = 0
        numsamples_cls[ignore_index] = 0

    ratios_cls, ratios_seg = numsamples_cls / (
        args.epsilon + numsamples_cls.sum()), numsamples_seg / (
            args.epsilon + numsamples_seg.sum())

    ' find classes with sample count > 0'
    nonzero_cls = np.nonzero(numsamples_cls)
    nonzero_seg = np.nonzero(numsamples_seg)

    'inverse ratios (i.e. weights)'
    ratios_cls = 1.0 / ratios_cls[nonzero_cls]
    ratios_seg = 1.0 / ratios_seg[nonzero_seg]

    'placeholder for class weights'
    cls_weights_cls = np.zeros((args.num_classes, ))
    cls_weights_seg = np.zeros((args.num_classes, ))

    'normalize max weight to 1'
    if ~(ratios_cls == []):
        ratios_cls /= (args.epsilon + ratios_cls.max())
        cls_weights_cls[nonzero_cls] = ratios_cls
    if ~(ratios_seg == []):
        ratios_seg /= (args.epsilon + ratios_seg.max())
        cls_weights_seg[nonzero_seg] = ratios_seg

    return cls_weights_cls, cls_weights_seg
Example #2
0
def cls_ratios_hr(pth=args.train_hr_image_pth, ignore_index=None):

    import utils.dataset_hr as ds

    metadata_pth = ufs.fix_path('../{}/gt.npy'.format(pth))
    metadata = ufs.fetch_metadata(metadata_pth)

    ' build the datalist '
    cls = np.zeros(args.num_classes, )
    for filename in metadata:
        first_region_id = list(metadata[filename].keys())[0]
        first_sub_region_id = list(
            metadata[filename][first_region_id].keys())[0]
        pth = metadata[filename][first_region_id][first_sub_region_id][
            'wsipath']
        pth = ufs.fix_path(pth)

        _wsi_ = openslide.OpenSlide(pth)

        params = {
            'iw': _wsi_.level_dimensions[0][0],
            'ih': _wsi_.level_dimensions[0][1],
            'tile_w': ds.HR_PATCH_W,
            'tile_h': ds.HR_PATCH_H,
            'scan_level': ds.HR_SCAN_LEVEL
        }
        params = DotDict(params)

        for conncomp in metadata[filename]:
            for id in metadata[filename][conncomp]:

                region_obj = metadata[filename][conncomp][id].copy()

                'which/are the points valid for this patch size, scan level combo?'
                region_obj['cnt_xy'], num_cnt_pts = regiontools.map_points(
                    region_obj['cnt_xy'], params)
                region_obj['perim_xy'], num_perim_pts = regiontools.map_points(
                    region_obj['perim_xy'], params)

                if num_cnt_pts >= ds.HR_NUM_CNT_SAMPLES and \
                        num_perim_pts >= ds.HR_NUM_PERIM_SAMPLES:
                    cls[region_obj['label']] += 1

    cls = np.array(cls)
    print(cls, cls.sum())
    cls = cls / cls.sum()

    return cls
Example #3
0
    x = int(x)
    return 1 << (x-1).bit_length()


def ispow2(x):
    x = int(x)
    return x > 0 and (x & (x - 1))

args.raw_train_pth = 'data/sunnybrook/WSI'

ufs.make_folder('../' + args.train_image_pth, True)
wsipaths = glob.glob('../{}/*.svs'.format(args.raw_train_pth))

' check if metadata gt.npy already exists to append to it '
metadata_pth = '../{}/gt.npy'.format(args.train_image_pth)
metadata = ufs.fetch_metadata(metadata_pth)

pwhs = {
    np.maximum(args.tile_w, args.tile_h): 0
}
wsipaths = sorted(wsipaths)
patch_id = 0

num_iters = 1  # each iter randomizes the centers of objects

for _ in range(num_iters):

    for wsipath in tqdm(wsipaths):

        'read scan and get metadata'
        scan = openslide.OpenSlide(wsipath)
Example #4
0
    #args.patch_folder = '/home/ozan/Downloads/breastpathq/datasets (copy)/validation'
    #args.label_csv_path = '/home/ozan/Downloads/breastpathq/datasets (copy)/val_labels.csv'
    savepath = args.val_image_pth
else:
    args.patch_folder = '/home/ozan/Downloads/breastpathq/datasets/train'
    args.label_csv_path = '/home/ozan/Downloads/breastpathq/datasets/train_labels.csv'
    #args.patch_folder = '/home/ozan/Downloads/breastpathq/datasets (copy)/train'
    #args.label_csv_path = '/home/ozan/Downloads/breastpathq/datasets (copy)/train_labels.csv'
    savepath = args.train_image_pth

if __name__ == '__main__':

    'train'
    ufs.make_folder('../' + savepath, is_spie)
    metadata_pth_train = '../{}/gt.npy'.format(savepath)
    metadata = ufs.fetch_metadata(metadata_pth_train)

    raw_gt = {}

    cc = []

    with open('{}'.format(args.label_csv_path)) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            image_id = int(row[0])
            region_id = int(row[1])
            cellularity = float(row[2])
            if image_id not in raw_gt:
                raw_gt[image_id] = {}
            raw_gt[image_id][region_id] = cellularity
Example #5
0
    def __init__(self, pth, eval, remove_white, duplicate_dataset):

        self.base_path = Path(__file__).parent
        metadata_pth = (self.base_path /
                        '../{}/gt.npy'.format(pth)).resolve().as_posix()
        metadata = ufs.fetch_metadata(metadata_pth)
        '''
        dataset structure:
        dataset is comprised of patches+wsi regions.
        patches:
        metadata['P'] indicates where all the patches are.
        wsi:
        0. metadata[filename/svs file name]
        1. m[f][connected component id]
        2. m[f][c][region within the connected component]
        @ level 1, we have the connected component
        as given in gt mask. at this level m[f][c][0] 
        always points to the large region
        if the region is large enough, we then split it
        to smaller sub-regions at m[f][c][>=1].
        '''

        ' build the datalist '
        self.datalist = []
        cls = np.zeros(args.num_classes, )

        ' build patch portion of ds '
        if 'P' in metadata:
            P = copy.deepcopy(metadata['P'][0])
            del metadata['P']

            P_dims = {}
            for key in P:
                d = P[key]['dimensions']

                if d not in P_dims:
                    params = {
                        'num_center_points': HR_NUM_CNT_SAMPLES,
                        'num_perim_points': HR_NUM_PERIM_SAMPLES,
                        'scan_level': HR_SCAN_LEVEL,
                        'tile_w': HR_PATCH_W,
                        'tile_h': HR_PATCH_H,
                        'dimensions': d
                    }
                    params = preprocessing.DotDict(params)
                    P_dims[d] = regiontools.get_key_points_for_patch(params)

                item = {**P[key], **P_dims[d]}

                self.datalist.append(item)
                cls[item['label']] += 1

        ' build wsi regions portion '
        self.wsis = {}

        for filename in metadata:
            first_region_id = list(metadata[filename].keys())[0]
            first_sub_region_id = list(
                metadata[filename][first_region_id].keys())[0]
            pth = metadata[filename][first_region_id][first_sub_region_id][
                'wsipath']
            pth = (self.base_path / pth).resolve().as_posix()
            self.wsis[pth] = openslide.OpenSlide(pth)

            if remove_white:
                'get low res. nuclei image/foreground mask'
                scan = self.wsis[pth]
                x, y = scan.level_dimensions[-1]
                mask = scan.read_region((0, 0), scan.level_count - 1,
                                        (x, y)).convert('RGB')
                mask = mask.resize((x // 4, y // 4))
                mask = preprocessing.find_nuclei(mask)
                mask = Image.fromarray(mask.astype(np.uint8)).resize((x, y))
                mask = np.asarray(mask)

            params = {
                'iw':
                self.wsis[pth].level_dimensions[0][0],
                'ih':
                self.wsis[pth].level_dimensions[0][1],
                'tile_w':
                HR_PATCH_W,
                'tile_h':
                HR_PATCH_H,
                'scan_level':
                metadata[filename][first_region_id][first_sub_region_id]
                ['scan_level']
            }
            params = preprocessing.DotDict(params)

            for conncomp in metadata[filename]:
                for id in metadata[filename][conncomp]:
                    region_obj = metadata[filename][conncomp][id].copy()

                    if remove_white:
                        'given points, remove patches that are only white'
                        region_obj[
                            'cnt_xy'], num_cnt_pts = regiontools.remove_white_region(
                                mask, region_obj['cnt_xy'], params)
                        region_obj[
                            'perim_xy'], num_perim_pts = regiontools.remove_white_region(
                                mask, region_obj['perim_xy'], params)

                    'which points valid for this patch size, scan level combo?'
                    region_obj['cnt_xy'], num_cnt_pts = regiontools.map_points(
                        region_obj['cnt_xy'], params)
                    region_obj[
                        'perim_xy'], num_perim_pts = regiontools.map_points(
                            region_obj['perim_xy'], params)

                    if num_cnt_pts >= HR_NUM_CNT_SAMPLES and \
                            num_perim_pts >= HR_NUM_PERIM_SAMPLES:
                        self.datalist.append(region_obj)
                        cls[region_obj['label']] += 1

        self.eval = eval

        cls = np.array(cls)
        '''cls[0] += cls[1]
        cls[1] = cls[2]
        cls[2] = cls[3]
        cls[3] = 0'''

        print(cls)
        cls = cls / cls.sum()
        print(cls)
        if not self.eval:
            args.cls_ratios = cls

        ' augmentation settings '
        self.image_aug = preprocessing.standard_augmentor(self.eval)

        if not self.eval:
            from itertools import chain
            self.datalist = list(
                chain(*[[i] * duplicate_dataset for i in self.datalist]))