Example #1
0
    def evaluate(self):
        if self._distributed:
            dist.synchronize()
            predictions = dist.gather(self._predictions, dst=0)
            predictions = list(itertools.chain(*predictions))

            if not dist.is_main_process(): return {}
        else:
            predictions = self._predictions

        if len(predictions) == 0:
            self._logger.error(
                "[COCOEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with open(file_path, "wb") as f:
                torch.save(predictions, f)

        self._results = OrderedDict()
        if "instances" in predictions[0]: self._eval_instances(predictions)

        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)
Example #2
0
def _distributed_worker(local_rank, main_func, world_size,
                        num_gpus_per_machine, machine_rank, dist_url, args):
    assert torch.cuda.is_available(
    ), "cuda is not available. Please check your installation."
    global_rank = machine_rank * num_gpus_per_machine + local_rank
    try:
        dist.init_process_group(backend="NCCL",
                                init_method=dist_url,
                                world_size=world_size,
                                rank=global_rank)
    except Exception as e:
        logger = setup_logger(__name__)
        logger.error("Process group URL: {}".format(dist_url))
        raise e

    dist.synchronize()

    assert num_gpus_per_machine <= torch.cuda.device_count()
    torch.cuda.set_device(local_rank)

    # Setup the local process group (which contains ranks within the same machine)
    assert dist._LOCAL_PROCESS_GROUP is None
    num_machines = world_size // num_gpus_per_machine
    for i in range(num_machines):
        ranks_on_i = list(
            range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
        pg = dist.new_group(ranks_on_i)
        if i == machine_rank:
            dist._LOCAL_PROCESS_GROUP = pg

    main_func(*args)
Example #3
0
    def __init__(self, cfg):
        self._logger = setup_logger(__name__, all_rank=True)
        
        if dist.is_main_process():
            self._logger.debug(f'Config File : \n{cfg}')
            if cfg.VISUALIZE_DIR and not os.path.isdir(cfg.VISUALIZE_DIR) : os.makedirs(cfg.VISUALIZE_DIR)
            self.visualize_dir = cfg.VISUALIZE_DIR
        dist.synchronize()
        
        self.test_loader = build_test_loader(cfg)

        self.model = build_model(cfg)
        self.model.eval()
        if dist.is_main_process():
            self._logger.debug(f"Model Structure\n{self.model}")
                
        if dist.get_world_size() > 1:
            self.model = DistributedDataParallel(self.model, device_ids=[dist.get_local_rank()], broadcast_buffers=False)

        self.checkpointer = Checkpointer(
            self.model,
            cfg.OUTPUT_DIR,
        )
        self.checkpointer.load(cfg.WEIGHTS)

        self.meta_data = MetadataCatalog.get(cfg.LOADER.TEST_DATASET)
        self.class_color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
Example #4
0
    def _eval_instances(self):
        if self._distributed:
            dist.synchronize()
            all_predictions = dist.gather(self._pred_instances, dst=0)
            if not dist.is_main_process() : return {}

            predictions = defaultdict(list)
            for predictions_per_rank in all_predictions:
                for clsid, lines in predictions_per_rank.items():
                    predictions[clsid].extend(lines)
            del all_predictions

        else:
            predictions = self._pred_instances

        results = OrderedDict()

        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
            res_file_template = os.path.join(dirname, "{}.txt")

            aps = defaultdict(float)  # iou -> ap per class
            for cls_id, cls_name in enumerate(self._category):
                pred_cls = predictions.get(cls_id, None)
                if pred_cls is None : continue

                with open(res_file_template.format(cls_name), "w") as f:
                    for pred in pred_cls:
                        line = f"{pred['image_id']} {pred['score']:.3f}"
                        if 'pred_box' in pred :
                            xmin, ymin, xmax, ymax = pred['pred_box']
                            # The inverse of data loading logic in `loader/data/pascal_voc/load_data.py`
                            xmin += 1
                            ymin += 1
                            line = f"{line} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"

                        f.write(f'{line}\n')

                thresh = 50
                rec, prec, ap = voc_eval(
                    res_file_template,
                    self._anno_file_template,
                    self._image_set_path,
                    cls_name,
                    ovthresh=thresh / 100.0,
                    use_07_metric=self._is_2007,
                )
                aps[cls_name] = ap * 100
        
        mAP = np.mean(list(aps.values()))
        aps['mAP'] = mAP
        results["bbox"] = aps
        
        table = create_small_table(results['bbox'])
        self._logger.info(f"\n{table}")

        return results
Example #5
0
    def __init__(self, cfg):
        super().__init__(cfg)

        if cfg.SEED < 0 : cfg.SEED = dist.shared_random_seed()
        self._seed = cfg.SEED
        seed_all_rng(self._seed)
        
        self._logger.debug(f'Config File : \n{cfg}')
        if dist.is_main_process():
            if cfg.OUTPUT_DIR and not os.path.isdir(cfg.OUTPUT_DIR) : os.makedirs(cfg.OUTPUT_DIR)
            with open(os.path.join(cfg.OUTPUT_DIR, 'config'), 'w') as f:
                f.write(cfg.dump())
        dist.synchronize()
        
        self.train_loader = build_train_loader(cfg)
        self.test_loader = build_test_loader(cfg)
        self.train_iter = iter(self.train_loader)

        self.model = build_model(cfg)
        self.model.train()
        if dist.is_main_process():
            self._logger.debug(f"Model Structure\n{self.model}")
        
        self.optimizer = build_optimizer(cfg, self.model)
        self.optimizer.zero_grad()
        self.scheduler = build_lr_scheduler(cfg, self.optimizer)
        self.accumulate = cfg.SOLVER.ACCUMULATE
        
        if dist.get_world_size() > 1:
            self.model = DistributedDataParallel(self.model, device_ids=[dist.get_local_rank()], broadcast_buffers=False)

        self.weight_path = cfg.WEIGHTS
        self.checkpointer = Checkpointer(
            self.model,
            cfg.OUTPUT_DIR,
            optimizer=self.optimizer,
            scheduler=self.scheduler,
        )

        self.evaluator = build_evaluator(cfg)

        hooks = build_hooks(cfg, self.model, self.optimizer, self.scheduler, self.checkpointer)
        self.register_hooks(hooks)
Example #6
0
    def _do_eval(self):
        results = self.trainer.test()

        if results:
            assert isinstance(
                results, dict
            ), f"Eval function must return a dict. Got {results} instead."

            flattened_results = self.flatten_results_dict(results)
            for k, v in flattened_results.items():
                try:
                    v = float(v)
                except Exception:
                    raise ValueError(
                        "[EvalHook] eval_function should return a nested dict of float. "
                        f"Got '{k}: {v}' instead.")
            self.trainer.storage.put_scalars(**flattened_results,
                                             smoothing_hint=False)

        dist.synchronize()
Example #7
0
    def _eval_proposals(self):
        if self._distributed:
            dist.synchronize()
            all_predictions = dist.gather(self._pred_proposals, dst=0)
            if not dist.is_main_process(): return {}

            predictions = list()
            for predictions_per_rank in all_predictions:
                predictions.extend(predictions_per_rank)
            del all_predictions

        else:
            predictions = self._pred_proposals

        results = OrderedDict()
        mAP = defaultdict(float)  # iou -> ap

        for thresh in range(50, 100, 5):
            rec, prec, ap = voc_eval(
                predictions,
                self._anno_file_template,
                self._image_set_path,
                ovthresh=thresh / 100.0,
                use_07_metric=self._is_2007,
            )
            mAP[thresh] = ap * 100

        results["proposal"] = {
            "AP": np.mean(list(mAP.values())),
            "AP50": mAP[50],
            "AP75": mAP[75]
        }

        table = create_small_table(results['proposal'])
        self._logger.info(f"\n{table}")

        return results