def evaluate(self): if self._distributed: dist.synchronize() predictions = dist.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not dist.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.error( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: file_path = os.path.join(self._output_dir, "instances_predictions.pth") with open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "instances" in predictions[0]: self._eval_instances(predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _distributed_worker(local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args): assert torch.cuda.is_available( ), "cuda is not available. Please check your installation." global_rank = machine_rank * num_gpus_per_machine + local_rank try: dist.init_process_group(backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank) except Exception as e: logger = setup_logger(__name__) logger.error("Process group URL: {}".format(dist_url)) raise e dist.synchronize() assert num_gpus_per_machine <= torch.cuda.device_count() torch.cuda.set_device(local_rank) # Setup the local process group (which contains ranks within the same machine) assert dist._LOCAL_PROCESS_GROUP is None num_machines = world_size // num_gpus_per_machine for i in range(num_machines): ranks_on_i = list( range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) pg = dist.new_group(ranks_on_i) if i == machine_rank: dist._LOCAL_PROCESS_GROUP = pg main_func(*args)
def __init__(self, cfg): self._logger = setup_logger(__name__, all_rank=True) if dist.is_main_process(): self._logger.debug(f'Config File : \n{cfg}') if cfg.VISUALIZE_DIR and not os.path.isdir(cfg.VISUALIZE_DIR) : os.makedirs(cfg.VISUALIZE_DIR) self.visualize_dir = cfg.VISUALIZE_DIR dist.synchronize() self.test_loader = build_test_loader(cfg) self.model = build_model(cfg) self.model.eval() if dist.is_main_process(): self._logger.debug(f"Model Structure\n{self.model}") if dist.get_world_size() > 1: self.model = DistributedDataParallel(self.model, device_ids=[dist.get_local_rank()], broadcast_buffers=False) self.checkpointer = Checkpointer( self.model, cfg.OUTPUT_DIR, ) self.checkpointer.load(cfg.WEIGHTS) self.meta_data = MetadataCatalog.get(cfg.LOADER.TEST_DATASET) self.class_color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
def _eval_instances(self): if self._distributed: dist.synchronize() all_predictions = dist.gather(self._pred_instances, dst=0) if not dist.is_main_process() : return {} predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions else: predictions = self._pred_instances results = OrderedDict() with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(float) # iou -> ap per class for cls_id, cls_name in enumerate(self._category): pred_cls = predictions.get(cls_id, None) if pred_cls is None : continue with open(res_file_template.format(cls_name), "w") as f: for pred in pred_cls: line = f"{pred['image_id']} {pred['score']:.3f}" if 'pred_box' in pred : xmin, ymin, xmax, ymax = pred['pred_box'] # The inverse of data loading logic in `loader/data/pascal_voc/load_data.py` xmin += 1 ymin += 1 line = f"{line} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" f.write(f'{line}\n') thresh = 50 rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[cls_name] = ap * 100 mAP = np.mean(list(aps.values())) aps['mAP'] = mAP results["bbox"] = aps table = create_small_table(results['bbox']) self._logger.info(f"\n{table}") return results
def __init__(self, cfg): super().__init__(cfg) if cfg.SEED < 0 : cfg.SEED = dist.shared_random_seed() self._seed = cfg.SEED seed_all_rng(self._seed) self._logger.debug(f'Config File : \n{cfg}') if dist.is_main_process(): if cfg.OUTPUT_DIR and not os.path.isdir(cfg.OUTPUT_DIR) : os.makedirs(cfg.OUTPUT_DIR) with open(os.path.join(cfg.OUTPUT_DIR, 'config'), 'w') as f: f.write(cfg.dump()) dist.synchronize() self.train_loader = build_train_loader(cfg) self.test_loader = build_test_loader(cfg) self.train_iter = iter(self.train_loader) self.model = build_model(cfg) self.model.train() if dist.is_main_process(): self._logger.debug(f"Model Structure\n{self.model}") self.optimizer = build_optimizer(cfg, self.model) self.optimizer.zero_grad() self.scheduler = build_lr_scheduler(cfg, self.optimizer) self.accumulate = cfg.SOLVER.ACCUMULATE if dist.get_world_size() > 1: self.model = DistributedDataParallel(self.model, device_ids=[dist.get_local_rank()], broadcast_buffers=False) self.weight_path = cfg.WEIGHTS self.checkpointer = Checkpointer( self.model, cfg.OUTPUT_DIR, optimizer=self.optimizer, scheduler=self.scheduler, ) self.evaluator = build_evaluator(cfg) hooks = build_hooks(cfg, self.model, self.optimizer, self.scheduler, self.checkpointer) self.register_hooks(hooks)
def _do_eval(self): results = self.trainer.test() if results: assert isinstance( results, dict ), f"Eval function must return a dict. Got {results} instead." flattened_results = self.flatten_results_dict(results) for k, v in flattened_results.items(): try: v = float(v) except Exception: raise ValueError( "[EvalHook] eval_function should return a nested dict of float. " f"Got '{k}: {v}' instead.") self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) dist.synchronize()
def _eval_proposals(self): if self._distributed: dist.synchronize() all_predictions = dist.gather(self._pred_proposals, dst=0) if not dist.is_main_process(): return {} predictions = list() for predictions_per_rank in all_predictions: predictions.extend(predictions_per_rank) del all_predictions else: predictions = self._pred_proposals results = OrderedDict() mAP = defaultdict(float) # iou -> ap for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( predictions, self._anno_file_template, self._image_set_path, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) mAP[thresh] = ap * 100 results["proposal"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } table = create_small_table(results['proposal']) self._logger.info(f"\n{table}") return results