Ejemplo n.º 1
0
    def run(self):
        model_path = self.inf_model_path

        MODEL_MAKER = Model_NP_XY if self.model_mode == 'np+xy' else Model_NP_DIST

        pred_config = PredictConfig(
            model=MODEL_MAKER(),
            session_init=get_model_loader(model_path),
            input_names=self.eval_inf_input_tensor_names,
            output_names=self.eval_inf_output_tensor_names)
        predictor = OfflinePredictor(pred_config)

        for norm_target in self.inf_norm_codes:
            norm_dir = '%s/%s/' % (self.inf_norm_root_dir, norm_target)
            norm_save_dir = '%s/%s/' % (self.inf_output_dir, norm_target)

            # TODO: cache list to check later norm dir has same number of files
            file_list = glob.glob('%s/*%s' % (norm_dir, self.inf_imgs_ext))
            file_list.sort()  # ensure same order

            rm_n_mkdir(norm_save_dir)
            for filename in file_list:
                filename = os.path.basename(filename)
                basename = filename.split('.')[0]
                print(basename, norm_target, end=' ', flush=True)

                ##
                img = cv2.imread(norm_dir + filename)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                ##
                pred_map = self.__gen_prediction(img, predictor)
                sio.savemat('%s/%s.mat' % (norm_save_dir, basename),
                            {'result': [pred_map]})
                print('FINISH')
Ejemplo n.º 2
0
    def run(self):
        predictor = OfflinePredictor(self.gen_pred_config())
        for num, data_dir in enumerate(self.inf_data_list):
            save_dir = os.path.join(self.inf_output_dir, str(num))
            print(save_dir)

            file_list = glob.glob(
                os.path.join(data_dir, "*{}".format(self.inf_imgs_ext)))
            file_list.sort()  # ensure same order

            rm_n_mkdir(save_dir)
            for filename in file_list:
                filename = os.path.basename(filename)
                basename = filename.split(".")[0]
                print(data_dir, basename, end=" ", flush=True)

                ##
                img = cv2.imread(os.path.join(data_dir, filename))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                ##
                pred_map = self.__gen_prediction(img, predictor)
                sio.savemat(
                    os.path.join(save_dir, "{}.mat".format(basename)),
                    {"result": [pred_map]},
                )
                print(f"Finished. {datetime.now().strftime('%H:%M:%S.%f')}")
Ejemplo n.º 3
0
def stain_normilize(img_dir, save_dir, stain_norm_target, norm_brightness=False):
    file_list = glob.glob(os.path.join(img_dir, '*.png'))
    file_list.sort()

    if norm_brightness:
        standardizer = staintools.LuminosityStandardizer()
    stain_normalizer = staintools.StainNormalizer(method='vahadane')

    # dict of paths to target image and dir code to make output folder
    # {'/data/TCGA-21-5784-01Z-00-DX1.tif' : '5784'}
    # stain_norm_targets = {k : v for k, v in zip(glob.glob(os.path.join(targets_dir, '*.*')), range(len(glob.glob(os.path.join(targets_dir, '*.*')))))}
    # stain_norm_target = {target : '1'}

    target_img = cv2.imread(stain_norm_target)
    target_img = cv2.cvtColor(target_img, cv2.COLOR_BGR2RGB)
    if norm_brightness:
        target_img = standardizer.standardize(target_img)
    stain_normalizer.fit(target_img)

    norm_dir = save_dir
    rm_n_mkdir(norm_dir)

    for img_path in file_list:
        filename = os.path.basename(img_path)
        basename = filename.split('.')[0]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if norm_brightness:
            img = standardizer.standardize(img)
        img = stain_normalizer.transform(img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(norm_dir, '{}.png'.format(basename)), img)
        print(f"Saved {os.path.join(norm_dir, '{}.png'.format(basename))}.")
Ejemplo n.º 4
0
    def __infer(self, inf_model_path, input_file_list, pred_out_dir):      

        augmentors = self.infer_augmentors()
        saved_state_dict = torch.load(inf_model_path)
        net_g = netdesc.Generator(1, 3).to('cuda')
        net_g.load_state_dict(saved_state_dict['net_g'])
        net_g = torch.nn.DataParallel(net_g).to('cuda')

        filepath_list = input_file_list
        basename_list = [os.path.basename(path).split('.')[0] for path in input_file_list]
        input_info = list(zip(filepath_list, basename_list))

        infer_dataset = SerialLoader(
                input_info, run_mode='infer',
                shape_augs=iaa.Sequential(augmentors[0]),
                input_augs=iaa.Sequential(augmentors[1]))

        dataloader = data.DataLoader(infer_dataset, 
                        num_workers=self.nr_procs_infer, 
                        batch_size=1, 
                        drop_last=False)

        utils.rm_n_mkdir(pred_out_dir)
        self.__run_list(net_g, dataloader, pred_out_dir)

        return
Ejemplo n.º 5
0
 def run(self):
     exporter = ModelExporter(self.gen_pred_config())
     rm_n_mkdir(self.model_export_dir)
     exporter.export_compact(
         filename="{}/compact.pb".format(self.model_export_dir))
     exporter.export_serving(
         os.path.join(self.model_export_dir, "serving"),
         signature_name="serving_default",
     )
     print(f"Saved model to {self.model_export_dir}.")
Ejemplo n.º 6
0
    def run(self, save_only): 
        if self.inf_auto_find_chkpt:
            self.inf_model_path = os.path.join(self.save_dir, str(max([int(x) for x in [name for name in os.listdir(self.save_dir) if os.path.isdir(os.path.join(self.save_dir, name))]])))
            print(f"Inference model path: <{self.inf_model_path}>")
            print('-----Auto Selecting Checkpoint Basing On "%s" Through "%s" Comparison' % \
                        (self.inf_auto_metric, self.inf_auto_comparator))
            model_path, stat = get_best_chkpts(self.inf_model_path, self.inf_auto_metric, self.inf_auto_comparator)
            print('Selecting: %s' % model_path)
            print('Having Following Statistics:')
            for key, value in stat.items():
                print('\t%s: %s' % (key, value))
        else:
            model_path = self.inf_model_path

        model_constructor = self.get_model()
        pred_config = PredictConfig(
            model        = model_constructor(),
            session_init = get_model_loader(model_path),
            input_names  = self.eval_inf_input_tensor_names,
            output_names = self.eval_inf_output_tensor_names)
        predictor = OfflinePredictor(pred_config)
        
        if save_only:
            exporter = ModelExporter(pred_config)
            rm_n_mkdir(self.model_export_dir)
            print ('{}/compact.pb'.format(self.model_export_dir))
            exporter.export_compact(filename='{}/compact.pb'.format(self.model_export_dir))
            exporter.export_serving(os.path.join(self.model_export_dir, 'serving'), signature_name='serving_default')
            return

        for num, data_dir in enumerate(self.inf_data_list):
            save_dir = os.path.join(self.inf_output_dir, str(num))

            file_list = glob.glob(os.path.join(data_dir, '*{}'.format(self.inf_imgs_ext)))
            file_list.sort() # ensure same order

            rm_n_mkdir(save_dir)
            for filename in file_list:
                filename = os.path.basename(filename)
                basename = filename.split('.')[0]
                print(data_dir, basename, end=' ', flush=True)

                ##
                img = cv2.imread(os.path.join(data_dir, filename))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                ##
                pred_map = self.__gen_prediction(img, predictor)
                sio.savemat(os.path.join(save_dir,'{}.mat'.format(basename)), {'result':[pred_map]})
                print(f"Finished. {datetime.now().strftime('%H:%M:%S.%f')}")
Ejemplo n.º 7
0
    def run(self):

        if self.inf_auto_find_chkpt:
            print('-----Auto Selecting Checkpoint Basing On "%s" Through "%s" Comparison' % \
                        (self.inf_auto_metric, self.inf_auto_comparator))
            model_path, stat = get_best_chkpts(self.save_dir,
                                               self.inf_auto_metric,
                                               self.inf_auto_comparator)
            print('Selecting: %s' % model_path)
            print('Having Following Statistics:')
            for key, value in stat.items():
                print('\t%s: %s' % (key, value))
        else:
            model_path = self.inf_model_path

        model_constructor = self.get_model()
        pred_config = PredictConfig(
            model=model_constructor(),
            session_init=get_model_loader(model_path),
            input_names=self.eval_inf_input_tensor_names,
            output_names=self.eval_inf_output_tensor_names)
        predictor = OfflinePredictor(pred_config)

        for data_dir_set in self.inf_data_list:
            data_root_dir = data_dir_set[0]
            data_out_code = data_dir_set[1]

            for subdir in data_dir_set[2:]:
                data_dir = '%s/%s/' % (data_root_dir, subdir)
                save_dir = '%s/%s/%s' % (self.inf_output_dir, data_out_code,
                                         subdir)

                file_list = glob.glob('%s/*%s' % (data_dir, self.inf_imgs_ext))
                file_list.sort()  # ensure same order

                rm_n_mkdir(save_dir)
                for filename in file_list:
                    filename = os.path.basename(filename)
                    basename = filename.split('.')[0]
                    print(data_dir, basename, end=' ', flush=True)

                    ##
                    img = cv2.imread(data_dir + filename)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                    ##
                    pred_map = self.__gen_prediction(img, predictor)
                    sio.savemat('%s/%s.mat' % (save_dir, basename),
                                {'result': [pred_map]})
                    print('FINISH')
Ejemplo n.º 8
0
    def run(self):

        if self.inf_auto_find_chkpt:
            print(
                '-----Auto Selecting Checkpoint Basing On "%s" Through "%s" Comparison'
                % (self.inf_auto_metric, self.inf_auto_comparator))
            model_path, stat = get_best_chkpts(self.save_dir,
                                               self.inf_auto_metric,
                                               self.inf_auto_comparator)
            print("Selecting: %s" % model_path)
            print("Having Following Statistics:")
            for key, value in stat.items():
                print("\t%s: %s" % (key, value))
        else:
            model_path = self.inf_model_path

        model_constructor = self.get_model()
        pred_config = PredictConfig(
            model=model_constructor(),
            session_init=get_model_loader(model_path),
            input_names=self.eval_inf_input_tensor_names,
            output_names=self.eval_inf_output_tensor_names,
            create_graph=False,
        )
        predictor = OfflinePredictor(pred_config)

        for data_dir in self.inf_data_list:
            save_dir = self.inf_output_dir + "/raw/"
            file_list = glob.glob("%s/*%s" % (data_dir, self.inf_imgs_ext))
            file_list.sort()  # ensure same order

            rm_n_mkdir(save_dir)
            for filename in file_list:
                start = time.time()
                filename = os.path.basename(filename)
                basename = filename.split(".")[0]
                print(data_dir, basename, end=" ", flush=True)

                ##
                img = cv2.imread(data_dir + filename)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                pred_map = self.__gen_prediction(img, predictor)

                np.save("%s/%s.npy" % (save_dir, basename), [pred_map])
                end = time.time()
                diff = str(round(end - start, 2))
                print("FINISH. TIME: %s" % diff)
Ejemplo n.º 9
0
    def process_wsi_list(self, run_args):
        """Process a list of whole-slide images.

        Args:
            run_args: arguments as defined in run_infer.py
        
        """
        self._parse_args(run_args)

        if not os.path.exists(self.cache_path):
            rm_n_mkdir(self.cache_path)

        if not os.path.exists(self.output_dir + "/json/"):
            rm_n_mkdir(self.output_dir + "/json/")
        if self.save_thumb:
            if not os.path.exists(self.output_dir + "/thumb/"):
                rm_n_mkdir(self.output_dir + "/thumb/")
        if self.save_mask:
            if not os.path.exists(self.output_dir + "/mask/"):
                rm_n_mkdir(self.output_dir + "/mask/")

        wsi_path_list = glob.glob(self.input_dir + "/*")
        wsi_path_list.sort()  # ensure ordering
        for wsi_path in wsi_path_list[:]:
            wsi_base_name = pathlib.Path(wsi_path).stem
            msk_path = "%s/%s.png" % (self.input_mask_dir, wsi_base_name)
            if self.save_thumb or self.save_mask:
                output_file = "%s/json/%s.json" % (self.output_dir,
                                                   wsi_base_name)
            else:
                output_file = "%s/%s.json" % (self.output_dir, wsi_base_name)
            if os.path.exists(output_file):
                log_info("Skip: %s" % wsi_base_name)
                continue
            try:
                log_info("Process: %s" % wsi_base_name)
                self.process_single_file(wsi_path, msk_path, self.output_dir)
                log_info("Finish")
            except:
                logging.exception("Crash")
        rm_n_mkdir(self.cache_path)  # clean up all cache
        return
Ejemplo n.º 10
0
    def run_once(self,
                 opt,
                 run_engine_opt,
                 log_dir,
                 prev_log_dir=None,
                 fold_idx=0):
        """Simply run the defined run_step of the related method once."""
        check_manual_seed(self.seed)

        log_info = {}
        if self.logging:
            # check_log_dir(log_dir)
            rm_n_mkdir(log_dir)

            tfwriter = SummaryWriter(log_dir=log_dir)
            json_log_file = log_dir + "/stats.json"
            with open(json_log_file, "w") as json_file:
                json.dump({}, json_file)  # create empty file
            log_info = {
                "json_file": json_log_file,
                "tfwriter": tfwriter,
            }

        ####
        loader_dict = {}
        for runner_name, runner_opt in run_engine_opt.items():
            loader_dict[runner_name] = self._get_datagen(
                opt["batch_size"][runner_name],
                runner_name,
                opt["target_info"]["gen"],
                nr_procs=runner_opt["nr_procs"],
                fold_idx=fold_idx,
            )
        ####
        def get_last_chkpt_path(prev_phase_dir, net_name):
            stat_file_path = prev_phase_dir + "/stats.json"
            with open(stat_file_path) as stat_file:
                info = json.load(stat_file)
            epoch_list = [int(v) for v in info.keys()]
            last_chkpts_path = "%s/%s_epoch=%d.tar" % (
                prev_phase_dir,
                net_name,
                max(epoch_list),
            )
            return last_chkpts_path

        # TODO: adding way to load pretrained weight or resume the training
        # parsing the network and optimizer information
        net_run_info = {}
        net_info_opt = opt["run_info"]
        for net_name, net_info in net_info_opt.items():
            assert inspect.isclass(net_info["desc"]) or inspect.isfunction(
                net_info["desc"]
            ), "`desc` must be a Class or Function which instantiate NEW objects !!!"
            net_desc = net_info["desc"]()

            # TODO: customize print-out for each run ?
            # summary_string(net_desc, (3, 270, 270), device='cpu')

            pretrained_path = net_info["pretrained"]
            if pretrained_path is not None:
                if pretrained_path == -1:
                    # * depend on logging format so may be broken if logging format has been changed
                    pretrained_path = get_last_chkpt_path(
                        prev_log_dir, net_name)
                    net_state_dict = torch.load(pretrained_path)["desc"]
                else:
                    chkpt_ext = os.path.basename(pretrained_path).split(
                        ".")[-1]
                    if chkpt_ext == "npz":
                        net_state_dict = dict(np.load(pretrained_path))
                        net_state_dict = {
                            k: torch.from_numpy(v)
                            for k, v in net_state_dict.items()
                        }
                    elif chkpt_ext == "tar":  # ! assume same saving format we desire
                        net_state_dict = torch.load(pretrained_path)["desc"]

                colored_word = colored(net_name, color="red", attrs=["bold"])
                print("Model `%s` pretrained path: %s" %
                      (colored_word, pretrained_path))

                # load_state_dict returns (missing keys, unexpected keys)
                net_state_dict = convert_pytorch_checkpoint(net_state_dict)
                load_feedback = net_desc.load_state_dict(net_state_dict,
                                                         strict=False)
                # * uncomment for your convenience
                print("Missing Variables: \n", load_feedback[0])
                print("Detected Unknown Variables: \n", load_feedback[1])

            # * extremely slow to pass this on DGX with 1 GPU, why (?)
            net_desc = DataParallel(net_desc)
            net_desc = net_desc.to("cuda")
            # print(net_desc) # * dump network definition or not?
            optimizer, optimizer_args = net_info["optimizer"]
            optimizer = optimizer(net_desc.parameters(), **optimizer_args)
            # TODO: expand for external aug for scheduler
            nr_iter = opt["nr_epochs"] * len(loader_dict["train"])
            scheduler = net_info["lr_scheduler"](optimizer)
            net_run_info[net_name] = {
                "desc": net_desc,
                "optimizer": optimizer,
                "lr_scheduler": scheduler,
                # TODO: standardize API for external hooks
                "extra_info": net_info["extra_info"],
            }

        # parsing the running engine configuration
        assert ("train" in run_engine_opt
                ), "No engine for training detected in description file"

        # initialize runner and attach callback afterward
        # * all engine shared the same network info declaration
        runner_dict = {}
        for runner_name, runner_opt in run_engine_opt.items():
            runner_dict[runner_name] = RunEngine(
                dataloader=loader_dict[runner_name],
                engine_name=runner_name,
                run_step=runner_opt["run_step"],
                run_info=net_run_info,
                log_info=log_info,
            )

        for runner_name, runner in runner_dict.items():
            callback_info = run_engine_opt[runner_name]["callbacks"]
            for event, callback_list, in callback_info.items():
                for callback in callback_list:
                    if callback.engine_trigger:
                        triggered_runner_name = callback.triggered_engine_name
                        callback.triggered_engine = runner_dict[
                            triggered_runner_name]
                    runner.add_event_handler(event, callback)

        # retrieve main runner
        main_runner = runner_dict["train"]
        main_runner.state.logging = self.logging
        main_runner.state.log_dir = log_dir
        # start the run loop
        main_runner.run(opt["nr_epochs"])

        print("\n")
        print("########################################################")
        print("########################################################")
        print("\n")
        return
Ejemplo n.º 11
0
    img_dirs = cfg.out_preproc if normalized else cfg.img_dirs

    print(f"Using folders <{list(img_dirs.values())}> as input")
    print(f"Saving results to <{list(cfg.out_extract.values())}>")

    for data_mode in img_dirs.keys():
        xtractor = PatchExtractor(cfg.win_size, cfg.step_size)

        img_dir = img_dirs[data_mode]
        ann_dir = cfg.labels_dirs[data_mode]
        
        file_list = glob.glob(os.path.join(img_dir, '*{}'.format(cfg.img_ext)))
        file_list.sort()
        out_dir = cfg.out_extract[data_mode]

        rm_n_mkdir(out_dir)
        for filename in file_list:
            filename = os.path.basename(filename)
            basename = filename.split('.')[0]
            print('Mode: {}, filename - {}'.format(data_mode, filename))

            img = cv2.imread(os.path.join(img_dir, '{}{}'.format(basename, cfg.img_ext)))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            if cfg.type_classification:
                # # assumes that ann is HxWx2 (nuclei class labels are available at index 1 of C)
                ann = np.load(os.path.join(ann_dir, '{}.npy'.format(basename)), allow_pickle=True)
                # ann_inst = ann[...,0]
                # ann_type = ann[...,1]
                ann_inst = ann.item().get('inst_map')
                ann_type = ann.item().get('type_map')
Ejemplo n.º 12
0
    def process_file_list(self, run_args):
        """
        Process a single image tile < 5000x5000 in size.
        """
        for variable, value in run_args.items():
            self.__setattr__(variable, value)

        # * depend on the number of samples and their size, this may be less efficient
        patterning = lambda x: re.sub("([\[\]])", "[\\1]", x)
        file_path_list = glob.glob(patterning("%s/*" % self.input_dir))
        file_path_list.sort()  # ensure same order
        assert len(file_path_list) > 0, 'Not Detected Any Files From Path'

        rm_n_mkdir(self.output_dir + '/json/')
        rm_n_mkdir(self.output_dir + '/mat/')
        rm_n_mkdir(self.output_dir + '/overlay/')
        if self.save_qupath:
            rm_n_mkdir(self.output_dir + "/qupath/")

        def proc_callback(results):
            """Post processing callback.
            
            Output format is implicit assumption, taken from `_post_process_patches`

            """
            img_name, pred_map, pred_inst, inst_info_dict, overlaid_img = results

            inst_type = [[k, v["type"]] for k, v in inst_info_dict.items()]
            inst_type = np.array(inst_type)
            mat_dict = {
                "inst_map": pred_inst,
                "inst_type": inst_type,
            }
            if self.nr_types is None:  # matlab does not have None type array
                mat_dict.pop("inst_type", None)

            if self.save_raw_map:
                mat_dict["raw_map"] = pred_map
            save_path = "%s/mat/%s.mat" % (self.output_dir, img_name)
            sio.savemat(save_path, mat_dict)

            save_path = "%s/overlay/%s.png" % (self.output_dir, img_name)
            cv2.imwrite(save_path, cv2.cvtColor(overlaid_img,
                                                cv2.COLOR_RGB2BGR))

            if self.save_qupath:
                nuc_val_list = list(inst_info_dict.values())
                nuc_type_list = np.array([v["type"] for v in nuc_val_list])
                nuc_coms_list = np.array([v["centroid"] for v in nuc_val_list])
                save_path = "%s/qupath/%s.tsv" % (self.output_dir, img_name)
                convert_format.to_qupath(save_path, nuc_coms_list,
                                         nuc_type_list, self.type_info_dict)

            save_path = "%s/json/%s.json" % (self.output_dir, img_name)
            self.__save_json(save_path, inst_info_dict, None)
            return img_name

        def detach_items_of_uid(items_list, uid, nr_expected_items):
            item_counter = 0
            detached_items_list = []
            remained_items_list = []
            while True:
                pinfo, pdata = items_list.pop(0)
                pinfo = np.squeeze(pinfo)
                if pinfo[-1] == uid:
                    detached_items_list.append([pinfo, pdata])
                    item_counter += 1
                else:
                    remained_items_list.append([pinfo, pdata])
                if item_counter == nr_expected_items:
                    break
            # do this to ensure the ordering
            remained_items_list = remained_items_list + items_list
            return detached_items_list, remained_items_list

        proc_pool = None
        future_list = []
        if self.nr_post_proc_workers > 0:
            proc_pool = ProcessPoolExecutor(self.nr_post_proc_workers)

        while len(file_path_list) > 0:

            hardware_stats = psutil.virtual_memory()
            available_ram = getattr(hardware_stats, "available")
            available_ram = int(available_ram * 0.6)
            # available_ram >> 20 for MB, >> 30 for GB

            # TODO: this portion looks clunky but seems hard to detach into separate func

            # * caching N-files into memory such that their expected (total) memory usage
            # * does not exceed the designated percentage of currently available memory
            # * the expected memory is a factor w.r.t original input file size and
            # * must be manually provided
            file_idx = 0
            use_path_list = []
            cache_image_list = []
            cache_patch_info_list = []
            cache_image_info_list = []
            while len(file_path_list) > 0:
                file_path = file_path_list.pop(0)

                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                src_shape = img.shape

                img, patch_info, top_corner = _prepare_patching(
                    img, self.patch_input_shape, self.patch_output_shape, True)
                self_idx = np.full(patch_info.shape[0],
                                   file_idx,
                                   dtype=np.int32)
                patch_info = np.concatenate([patch_info, self_idx[:, None]],
                                            axis=-1)
                # ? may be expensive op
                patch_info = np.split(patch_info, patch_info.shape[0], axis=0)
                patch_info = [np.squeeze(p) for p in patch_info]

                # * this factor=5 is only applicable for HoVerNet
                expected_usage = sys.getsizeof(img) * 5
                available_ram -= expected_usage
                if available_ram < 0:
                    break

                file_idx += 1
                # if file_idx == 4: break
                use_path_list.append(file_path)
                cache_image_list.append(img)
                cache_patch_info_list.extend(patch_info)
                # TODO: refactor to explicit protocol
                cache_image_info_list.append(
                    [src_shape, len(patch_info), top_corner])

            # * apply neural net on cached data
            dataset = SerializeFileList(cache_image_list,
                                        cache_patch_info_list,
                                        self.patch_input_shape)

            dataloader = data.DataLoader(
                dataset,
                num_workers=self.nr_inference_workers,
                batch_size=self.batch_size,
                drop_last=False,
            )

            pbar = tqdm.tqdm(
                desc="Process Patches",
                leave=True,
                total=int(len(cache_patch_info_list) / self.batch_size) + 1,
                ncols=80,
                ascii=True,
                position=0,
            )

            accumulated_patch_output = []
            for batch_idx, batch_data in enumerate(dataloader):
                sample_data_list, sample_info_list = batch_data
                sample_output_list = self.run_step(sample_data_list)
                sample_info_list = sample_info_list.numpy()
                curr_batch_size = sample_output_list.shape[0]
                sample_output_list = np.split(sample_output_list,
                                              curr_batch_size,
                                              axis=0)
                sample_info_list = np.split(sample_info_list,
                                            curr_batch_size,
                                            axis=0)
                sample_output_list = list(
                    zip(sample_info_list, sample_output_list))
                accumulated_patch_output.extend(sample_output_list)
                pbar.update()
            pbar.close()

            # * parallely assemble the processed cache data for each file if possible
            for file_idx, file_path in enumerate(use_path_list):
                image_info = cache_image_info_list[file_idx]
                file_ouput_data, accumulated_patch_output = detach_items_of_uid(
                    accumulated_patch_output, file_idx, image_info[1])

                # * detach this into func and multiproc dispatch it
                src_pos = image_info[
                    2]  # src top left corner within padded image
                src_image = cache_image_list[file_idx]
                src_image = src_image[src_pos[0]:src_pos[0] + image_info[0][0],
                                      src_pos[1]:src_pos[1] +
                                      image_info[0][1], ]

                base_name = pathlib.Path(file_path).stem
                file_info = {
                    "src_shape": image_info[0],
                    "src_image": src_image,
                    "name": base_name,
                }

                post_proc_kwargs = {
                    "nr_types": self.nr_types,
                    "return_centroids": True,
                }  # dynamicalize this

                overlay_kwargs = {
                    "draw_dot": self.draw_dot,
                    "type_colour": self.type_info_dict,
                    "line_thickness": 1,
                }
                func_args = (
                    self.post_proc_func,
                    post_proc_kwargs,
                    file_ouput_data,
                    file_info,
                    overlay_kwargs,
                )

                # dispatch for parallel post-processing
                if proc_pool is not None:
                    proc_future = proc_pool.submit(_post_process_patches,
                                                   *func_args)
                    # ! manually poll future and call callback later as there is no guarantee
                    # ! that the callback is called from main thread
                    future_list.append(proc_future)
                else:
                    proc_output = _post_process_patches(*func_args)
                    proc_callback(proc_output)

        if proc_pool is not None:
            # loop over all to check state a.k.a polling
            for future in as_completed(future_list):
                # TODO: way to retrieve which file crashed ?
                # ! silent crash, cancel all and raise error
                if future.exception() is not None:
                    log_info("Silent Crash")
                    # ! cancel somehow leads to cascade error later
                    # ! so just poll it then crash once all future
                    # ! acquired for now
                    # for future in future_list:
                    #     future.cancel()
                    # break
                else:
                    file_path = proc_callback(future.result())
                    log_info("Done Assembling %s" % file_path)
        return
Ejemplo n.º 13
0
    def run(self):
        if self.inf_auto_find_chkpt:
            print(
                '-----Auto Selecting Checkpoint Basing On "%s" Through "%s" Comparison'
                % (self.inf_auto_metric, self.inf_auto_comparator))
            model_path, stat = get_best_chkpts(self.save_dir,
                                               self.inf_auto_metric,
                                               self.inf_auto_comparator)
            print('Selecting: %s' % model_path)
            print('Having Following Statistics:')
            for key, value in stat.items():
                print('\t%s: %s' % (key, value))
        else:
            model_path = self.inf_model_path

        ####
        save_dir = self.inf_output_dir
        predict_list = [['case', 'prediction']]

        file_load_img = HDF5Matrix(
            self.inf_data_list[0] + 'camelyonpatch_level_2_split_test_x.h5',
            'x')
        file_load_lab = HDF5Matrix(
            self.inf_data_list[0] + 'camelyonpatch_level_2_split_test_y.h5',
            'y')

        true_list = []
        prob_list = []
        pred_list = []

        num_ims = file_load_img.shape[0]
        last_step = math.floor(num_ims / self.inf_batch_size)
        last_step = self.inf_batch_size * last_step
        last_batch = num_ims - last_step
        count = 0
        for start_batch in range(0, last_step + 1, self.inf_batch_size):
            sys.stdout.write("\rProcessed (%d/%d)" % (start_batch, num_ims))
            sys.stdout.flush()
            if start_batch != last_step:
                img = file_load_img[start_batch:start_batch +
                                    self.inf_batch_size]
                img = img.astype('uint8')
                lab = np.squeeze(file_load_lab[start_batch:start_batch +
                                               self.inf_batch_size])
            else:
                img = file_load_img[start_batch:start_batch + last_batch]
                img = img.astype('uint8')
                lab = np.squeeze(file_load_lab[start_batch:start_batch +
                                               last_batch])

            prob, pred = self.__gen_prediction(img, predictor)

            for j in range(prob.shape[0]):
                predict_list.append([str(count), str(prob[j])])
                count += 1

            prob_list.extend(prob)
            pred_list.extend(pred)
            true_list.extend(lab)

        prob_list = np.array(prob_list)
        pred_list = np.array(pred_list)
        true_list = np.array(true_list)
        accuracy = (pred_list == true_list).sum() / np.size(true_list)
        error = (pred_list != true_list).sum() / np.size(true_list)

        print('Accurcy (%): ', 100 * accuracy)
        print('Error (%): ', 100 * error)
        if self.model_mode == 'class_pcam':
            auc = roc_auc_score(true_list, prob_list)
            print('AUC: ', auc)

        # Save predictions to csv
        rm_n_mkdir(save_dir)
        for result in predict_list:
            predict_file = open('%s/predict.csv' % save_dir, "a")
            predict_file.write(result[0])
            predict_file.write(',')
            predict_file.write(result[1])
            predict_file.write("\n")
            predict_file.close()
Ejemplo n.º 14
0
import numpy as np
import os
import glob
import scipy.io as sio
from misc.utils import get_inst_centroid, rm_n_mkdir
from metrics.stats_utils import remap_label

ann_dir = '/home/test/GhulamMurtaza/panNuke/Test/Labels/'  # * directory contains .npy
filepath_list = glob.glob('%s/*.npy' % ann_dir)

save_dir = 'GroundTruth/dump/'  # directory to save summarized info about nuclei

rm_n_mkdir(save_dir)
for path in filepath_list:
    basename = os.path.basename(path).split('.')[0]

    true_map = np.load(path)
    true_inst = true_map[..., 0]
    true_type = true_map[..., 1]

    true_inst = remap_label(true_inst, by_size=True)
    true_inst_centroid = get_inst_centroid(true_inst)
    #### * Get class of each instance id, stored at index id-1
    # for ground truth instance blob
    true_id_list = list(np.unique(true_inst))[1:]  # exclude background
    true_inst_type = np.full(len(true_id_list), -1, dtype=np.int32)
    for idx, inst_id in enumerate(true_id_list):
        inst_type = true_type[true_inst == inst_id]
        type_list, type_pixels = np.unique(inst_type, return_counts=True)
        inst_type = type_list[np.argmax(type_pixels)]
        if inst_type != 0:  # there are artifact nuclei (background types)
Ejemplo n.º 15
0
    def run(self):
        if self.inf_auto_find_chkpt:
            print(
                '-----Auto Selecting Checkpoint Basing On "%s" Through "%s" Comparison'
                % (self.inf_auto_metric, self.inf_auto_comparator))
            model_path, stat = get_best_chkpts(self.save_dir,
                                               self.inf_auto_metric,
                                               self.inf_auto_comparator)
            print("Selecting: %s" % model_path)
            print("Having Following Statistics:")
            for key, value in stat.items():
                print("\t%s: %s" % (key, value))
        else:
            model_path = self.inf_model_path
        model_constructor = self.get_model()
        pred_config = PredictConfig(
            model=model_constructor(),
            session_init=get_model_loader(model_path),
            input_names=self.eval_inf_input_tensor_names,
            output_names=self.eval_inf_output_tensor_names,
            create_graph=False)
        predictor = OfflinePredictor(pred_config)

        ####
        save_dir = self.inf_output_dir
        predict_list = [["case", "prediction"]]

        file_load_img = HDF5Matrix(
            self.inf_data_list[0] + "camelyonpatch_level_2_split_test_x.h5",
            "x")
        file_load_lab = HDF5Matrix(
            self.inf_data_list[0] + "camelyonpatch_level_2_split_test_y.h5",
            "y")

        true_list = []
        prob_list = []
        pred_list = []

        num_ims = file_load_img.shape[0]
        last_step = math.floor(num_ims / self.inf_batch_size)
        last_step = self.inf_batch_size * last_step
        last_batch = num_ims - last_step
        count = 0
        for start_batch in range(0, last_step + 1, self.inf_batch_size):
            sys.stdout.write("\rProcessed (%d/%d)" % (start_batch, num_ims))
            sys.stdout.flush()
            if start_batch != last_step:
                img = file_load_img[start_batch:start_batch +
                                    self.inf_batch_size]
                img = img.astype("uint8")
                lab = np.squeeze(file_load_lab[start_batch:start_batch +
                                               self.inf_batch_size])
            else:
                img = file_load_img[start_batch:start_batch + last_batch]
                img = img.astype("uint8")
                lab = np.squeeze(file_load_lab[start_batch:start_batch +
                                               last_batch])

            prob, pred = self.__gen_prediction(img, predictor)

            for j in range(prob.shape[0]):
                predict_list.append([str(count), str(prob[j])])
                count += 1

            prob_list.extend(prob)
            pred_list.extend(pred)
            true_list.extend(lab)

        prob_list = np.array(prob_list)
        pred_list = np.array(pred_list)
        true_list = np.array(true_list)
        accuracy = (pred_list == true_list).sum() / np.size(true_list)
        error = (pred_list != true_list).sum() / np.size(true_list)

        print("Accurcy (%): ", 100 * accuracy)
        print("Error (%): ", 100 * error)
        if self.model_mode == "class_pcam":
            auc = roc_auc_score(true_list, prob_list)
            print("AUC: ", auc)

        # Save predictions to csv
        rm_n_mkdir(save_dir)
        for result in predict_list:
            predict_file = open("%s/predict.csv" % save_dir, "a")
            predict_file.write(result[0])
            predict_file.write(",")
            predict_file.write(result[1])
            predict_file.write("\n")
            predict_file.close()
Ejemplo n.º 16
0
stain_normalizer = staintools.StainNormalizer(method='vahadane')

# dict of paths to target image and dir code to make output folder
stain_norm_target = {
    '../data/TCGA-21-5784-01Z-00-DX1.tif' : '5784'
}

for target_path, target_code in stain_norm_target.items():
    target_img = cv2.imread(target_path)
    target_img = cv2.cvtColor(target_img, cv2.COLOR_BGR2RGB)
    if norm_brightness:
        target_img = standardizer.transform(target_img)
    stain_normalizer.fit(target_img)
    
    norm_dir = "%s/%s/" % (save_dir, target_code)
    rm_n_mkdir(norm_dir)
    
    for img_path in file_list:
        filename = os.path.basename(img_path)
        basename = filename.split('.')[0]
        print(basename)
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if norm_brightness:
            img = standardizer.transform(img)
        img = stain_normalizer.transform(img)

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite("%s/%s.png" % (norm_dir, basename), img)