def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) self._targets = comm.gather(self._targets, dst=0) self._targets = list(itertools.chain(*self._targets)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning("[ClassificationEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() assert len(self._predictions) == len(self._targets) if self._predictions[0] is not None: self._eval_classification_accuracy() if self._dump: _dump_to_markdown(self._dump_infos) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in cvpods. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max( [x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for key, loss in metrics_dict.items() if "loss" in key) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict)
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: ensure_dir(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with megfile.smart_open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "instances" in self._predictions[0]: self._eval_predictions(set(self._tasks)) if self._dump: _dump_to_markdown(self._dump_infos) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012)) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } small_table = create_small_table(ret["bbox"]) self._logger.info("Evaluation results for bbox: \n" + small_table) if self._dump: dump_info_one_task = { "task": "bbox", "tables": [small_table], } _dump_to_markdown([dump_info_one_task]) return ret
def evaluate(self): comm.synchronize() self._predictions = comm.gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return # gt_json = PathManager.get_local_path(self._metadata.panoptic_json) gt_json = self._metadata.panoptic_json gt_folder = self._metadata.panoptic_root with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: logger.info( "Writing all panoptic predictions to {} ...".format(pred_dir)) for p in self._predictions: with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: f.write(p.pop("png_string")) with open(gt_json, "r") as f: json_data = json.load(f) json_data["annotations"] = self._predictions with megfile.smart_open(self._predictions_json, "w") as f: f.write(json.dumps(json_data)) from panopticapi.evaluation import pq_compute with contextlib.redirect_stdout(io.StringIO()): pq_res = pq_compute( gt_json, self._predictions_json, gt_folder=gt_folder, pred_folder=pred_dir, ) res = {} res["PQ"] = 100 * pq_res["All"]["pq"] res["SQ"] = 100 * pq_res["All"]["sq"] res["RQ"] = 100 * pq_res["All"]["rq"] res["PQ_th"] = 100 * pq_res["Things"]["pq"] res["SQ_th"] = 100 * pq_res["Things"]["sq"] res["RQ_th"] = 100 * pq_res["Things"]["rq"] res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] results = OrderedDict({"panoptic_seg": res}) table = _print_panoptic_results(pq_res) if self._dump: dump_info_one_task = { "task": "panoptic_seg", "tables": [table], } _dump_to_markdown([dump_info_one_task]) return results
def _write_metrics( self, loss_dict: Dict[str, torch.Tensor], data_time: float, prefix: str = "", ): """ Args: loss_dict (dict): dict of scalar losses data_time (float): time taken by the dataloader iteration """ device = next(iter(loss_dict.values())).device # Use a new stream so these ops don't wait for DDP or backward with torch.cuda.stream(torch.cuda.Stream() if device.type == "cuda" else None): metrics_dict = { k: v.detach().cpu().item() for k, v in loss_dict.items() } metrics_dict["data_time"] = data_time # Gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in cvpods. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): storage = get_event_storage() # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for key, loss in metrics_dict.items() if "loss" in key) storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced) if len(metrics_dict) > 1: storage.put_scalars(**metrics_dict)
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "proposals" in self._predictions[0]: self._eval_box_proposals() if "instances" in self._predictions[0]: self._eval_predictions(set(self._tasks)) if self._dump: extra_infos = { "title": os.path.basename(os.getcwd()), "seed": self.cfg.SEED, } _dump_to_markdown(extra_infos, self._dump_infos) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)