def main(args): cfg = setup(args) if args.eval_only: model = Trainer.build_model(cfg) if args.eval_iter != -1: # load checkpoint at specified iteration ckpt_file = os.path.join( cfg.OUTPUT_DIR, 'model_{:07d}.pth'.format(args.eval_iter - 1)) resume = False else: # load checkpoint at last iteration ckpt_file = cfg.MODEL.WEIGHTS resume = True DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( ckpt_file, resume=resume) res = Trainer.test(cfg, model) if comm.is_main_process(): verify_results(cfg, res) # save evaluation results in json os.makedirs(os.path.join(cfg.OUTPUT_DIR, 'inference'), exist_ok=True) with open( os.path.join(cfg.OUTPUT_DIR, 'inference', 'res_final.json'), 'w') as fp: json.dump(res, fp) return res elif args.eval_all: tester = Tester(cfg) all_ckpts = sorted(tester.check_pointer.get_all_checkpoint_files()) for i, ckpt in enumerate(all_ckpts): ckpt_iter = ckpt.split('model_')[-1].split('.pth')[0] if ckpt_iter.isnumeric() and int(ckpt_iter) + 1 < args.start_iter: # skip evaluation of checkpoints before start iteration continue if args.end_iter != -1: if not ckpt_iter.isnumeric( ) or int(ckpt_iter) + 1 > args.end_iter: # skip evaluation of checkpoints after end iteration break tester.test(ckpt) return best_res elif args.eval_during_train: tester = Tester(cfg) saved_checkpoint = None while True: if tester.check_pointer.has_checkpoint(): current_ckpt = tester.check_pointer.get_checkpoint_file() if saved_checkpoint is None or current_ckpt != saved_checkpoint: saved_checkpoint = current_ckpt tester.test(current_ckpt) time.sleep(10) else: if comm.is_main_process(): print( 'Please specify --eval-only, --eval-all, or --eval-during-train' )
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the Fs3c logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) if not cfg.MUTE_HEADER: logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file"): logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, PathManager.open(args.config_file, "r").read())) if not cfg.MUTE_HEADER: logger.info("Running with full config:\n{}".format(cfg)) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") with PathManager.open(path, "w") as f: f.write(cfg.dump()) logger.info("Full config saved to {}".format(os.path.abspath(path))) # make sure each worker has a different, yet deterministic seed if specified seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in Fs3c. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for loss in metrics_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict)
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(self._predictions, f) self._results = OrderedDict() if "instances" in self._predictions[0]: self._eval_predictions() # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def evaluate(self): results = OrderedDict() for evaluator in self._evaluators: result = evaluator.evaluate() if is_main_process(): for k, v in result.items(): assert ( k not in results ), "Different evaluators produce results with the same key {}".format(k) results[k] = v return results
def train(self): """ Run training. Returns: OrderedDict of results, if evaluation is enabled. Otherwise None. """ super().train(self.start_iter, self.max_iter) if hasattr(self, "_last_eval_results") and comm.is_main_process(): verify_results(self.cfg, self._last_eval_results) return self._last_eval_results
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): is_main_process = comm.is_main_process() super().__init__( model, save_dir, save_to_disk=is_main_process if save_to_disk is None else save_to_disk, **checkpointables, )
def test(cls, cfg, model, evaluators=None): """ Args: cfg (CfgNode): model (nn.Module): evaluators (list[DatasetEvaluator] or None): if None, will call :meth:`build_evaluator`. Otherwise, must have the same length as `cfg.DATASETS.TEST`. Returns: dict: a dict of result metrics """ logger = logging.getLogger(__name__) if isinstance(evaluators, DatasetEvaluator): evaluators = [evaluators] if evaluators is not None: assert len( cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( len(cfg.DATASETS.TEST), len(evaluators)) results = OrderedDict() for idx, dataset_name in enumerate(cfg.DATASETS.TEST): data_loader = cls.build_test_loader(cfg, dataset_name) # When evaluators are passed in as arguments, # implicitly assume that evaluators can be created before data_loader. if evaluators is not None: evaluator = evaluators[idx] else: try: evaluator = cls.build_evaluator(cfg, dataset_name) except NotImplementedError: logger.warn( "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " "or implement its `build_evaluator` method.") results[dataset_name] = {} continue results_i = inference_on_dataset(model, data_loader, evaluator) results[dataset_name] = results_i if comm.is_main_process(): assert isinstance( results_i, dict ), "Evaluator must return a dict on the main process. Got {} instead.".format( results_i) logger.info("Evaluation results for {} in csv format:".format( dataset_name)) print_csv_format(results_i) if len(results) == 1: results = list(results.values())[0] return results
def main(args): cfg = setup(args) if args.eval_only: model = Trainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume) res = Trainer.test(cfg, model) if comm.is_main_process(): verify_results(cfg, res) return res """ If you'd like to do anything fancier than the standard training logic, consider writing your own training loop or subclassing the trainer. """ trainer = Trainer(cfg) trainer.resume_or_load(resume=args.resume) return trainer.train()
def after_step(self): next_iter = self.trainer.iter + 1 is_final = next_iter == self.trainer.max_iter if is_final or (self._period > 0 and next_iter % self._period == 0): results = self._func() if results: assert isinstance( results, dict ), "Eval function must return a dict. Got {} instead.".format( results) flattened_results = flatten_results_dict(results) for k, v in flattened_results.items(): try: v = float(v) except Exception: raise ValueError( "[EvalHook] eval_function should return a nested dict of float. " "Got '{}: {}' instead.".format(k, v)) self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) if comm.is_main_process() and results: # save evaluation results in json os.makedirs(os.path.join(global_cfg.OUTPUT_DIR, 'inference'), exist_ok=True) output_file = 'res_final.json' if is_final else \ 'iter_{:07d}.json'.format(self.trainer.iter) with open( os.path.join(global_cfg.OUTPUT_DIR, 'inference', output_file), 'w') as fp: json.dump(results, fp) # Evaluation may take different time among workers. # A barrier make them start the next iteration together. comm.synchronize()
def test(self, ckpt): self.check_pointer._load_model(self.check_pointer._load_file(ckpt)) print('evaluating checkpoint {}'.format(ckpt)) res = Trainer.test(self.cfg, self.model) if comm.is_main_process(): verify_results(self.cfg, res) print(res) if (self.best_res is None) or ( self.best_res is not None and self.best_res['bbox']['AP'] < res['bbox']['AP']): self.best_res = res self.best_file = ckpt print('best results from checkpoint {}'.format(self.best_file)) print(self.best_res) self.all_res["best_file"] = self.best_file self.all_res["best_res"] = self.best_res self.all_res[ckpt] = res os.makedirs(os.path.join(self.cfg.OUTPUT_DIR, 'inference'), exist_ok=True) with open( os.path.join(self.cfg.OUTPUT_DIR, 'inference', 'all_res.json'), 'w') as fp: json.dump(self.all_res, fp)
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012)) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class aps_base = defaultdict(list) aps_novel = defaultdict(list) exist_base, exist_novel = False, False for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) if self._base_classes is not None and cls_name in self._base_classes: aps_base[thresh].append(ap * 100) exist_base = True if self._novel_classes is not None and cls_name in self._novel_classes: aps_novel[thresh].append(ap * 100) exist_novel = True ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } # adding evaluation of the base and novel classes if exist_base: mAP_base = {iou: np.mean(x) for iou, x in aps_base.items()} ret["bbox"].update({ "bAP": np.mean(list(mAP_base.values())), "bAP50": mAP_base[50], "bAP75": mAP_base[75] }) if exist_novel: mAP_novel = {iou: np.mean(x) for iou, x in aps_novel.items()} ret["bbox"].update({ "nAP": np.mean(list(mAP_novel.values())), "nAP50": mAP_novel[50], "nAP75": mAP_novel[75] }) # write per class AP to logger per_class_res = { self._class_names[idx]: ap for idx, ap in enumerate(aps[50]) } self._logger.info("Evaluate per-class mAP50:\n" + create_small_table(per_class_res)) self._logger.info("Evaluate overall bbox:\n" + create_small_table(ret["bbox"])) return ret
def build_hooks(self): """ Build a list of default hooks, including timing, evaluation, checkpointing, lr scheduling, precise BN, writing events. Returns: list[HookBase]: """ cfg = self.cfg.clone() cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN if cfg.SSL: ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), hooks.PreciseBN( # Run at the same freq as (but before) evaluation. cfg.TEST.EVAL_PERIOD, self.model, # Build a new data loader to not affect training self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, self.build_ssl_loader(cfg), ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) else None, ] else: ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), hooks.PreciseBN( # Run at the same freq as (but before) evaluation. cfg.TEST.EVAL_PERIOD, self.model, # Build a new data loader to not affect training self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) else None, ] # Do PreciseBN before checkpointer, because it updates the model and need to # be saved by checkpointer. # This is not always the best: if checkpointing has a different frequency, # some checkpoints may have more precise statistics than others. if comm.is_main_process(): ret.append( hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) def test_and_save_results(): self._last_eval_results = self.test(self.cfg, self.model) return self._last_eval_results # Do evaluation after checkpointer, because then if it fails, # we can use the saved checkpoint to debug. ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) if comm.is_main_process(): # run writers in the end, so that evaluation metrics are written ret.append(hooks.PeriodicWriter(self.build_writers())) return ret