def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the detectron2 logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode or omegaconf.DictConfig): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, _highlight( PathManager.open(args.config_file, "r").read(), args.config_file), )) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") if isinstance(cfg, CfgNode): logger.info("Running with full config:\n{}".format( _highlight(cfg.dump(), ".yaml"))) with PathManager.open(path, "w") as f: f.write(cfg.dump()) else: LazyConfig.save(cfg, path) logger.info("Full config saved to {}".format(path)) # make sure each worker has a different, yet deterministic seed if specified seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) seed_all_rng(None if seed < 0 else seed + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = _try_get_key(cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False)
def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): """ Load a config file. Args: filename: absolute path or relative path w.r.t. the current working directory keys: keys to load and return. If not given, return all keys (whose values are config objects) in a dict. """ has_keys = keys is not None filename = filename.replace("/./", "/") # redundant if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]: raise ValueError(f"Config file {filename} has to be a python or yaml file.") if filename.endswith(".py"): _validate_py_syntax(filename) with _patch_import(): # Record the filename module_namespace = { "__file__": filename, "__package__": _random_package_name(filename), } with PathManager.open(filename) as f: content = f.read() # Compile first with filename to: # 1. make filename appears in stacktrace # 2. make load_rel able to find its parent's (possibly remote) location exec(compile(content, filename, "exec"), module_namespace) ret = module_namespace else: with PathManager.open(filename) as f: obj = yaml.unsafe_load(f) ret = OmegaConf.create(obj, flags={"allow_objects": True}) if has_keys: if isinstance(keys, str): return _cast_to_config(ret[keys]) else: return tuple(_cast_to_config(ret[a]) for a in keys) else: if filename.endswith(".py"): # when not specified, only load those that are config objects ret = DictConfig( { name: _cast_to_config(value) for name, value in ret.items() if isinstance(value, (DictConfig, ListConfig, dict)) and not name.startswith("_") }, flags={"allow_objects": True}, ) return ret
def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) tasks = self._tasks or self._tasks_from_predictions(coco_results) # unmap the category ids for COCO if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) num_classes = len(all_contiguous_ids) assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} for result in coco_results: category_id = result["category_id"] assert category_id < num_classes, ( f"A prediction has class={category_id}, " f"but the dataset only has {num_classes} classes and " f"predicted class id should be in [0, {num_classes - 1}]." ) result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info( "Evaluating predictions with {} COCO API...".format( "unofficial" if self._use_fast_impl else "official" ) ) for task in sorted(tasks): assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" coco_eval = ( _evaluate_predictions_on_coco( self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas, use_fast_impl=self._use_fast_impl, img_ids=img_ids, ) if len(coco_results) > 0 else None # cocoapi does not handle empty results very well ) res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes") ) self._results[task] = res
def convert_to_coco_json(dataset_name, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in detectron2's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: logger.info( f"Converting annotations of dataset '{dataset_name}' to COCO format ...)" ) coco_dict = convert_to_coco_dict(dataset_name) logger.info( f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file + ".tmp" with PathManager.open(tmp_file, "w") as f: json.dump(coco_dict, f) shutil.move(tmp_file, output_file)
def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _validate_py_syntax(filename): # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py with PathManager.open(filename, "r") as f: content = f.read() try: ast.parse(content) except SyntaxError as e: raise SyntaxError(f"Config file {filename} has syntax error!") from e
def _load_file(self, filename): if filename.endswith(".pkl"): with PathManager.open(filename, "rb") as f: data = pickle.load(f, encoding="latin1") if "model" in data and "__author__" in data: # file is in Detectron2 model zoo format self.logger.info("Reading a file from '{}'".format( data["__author__"])) return data else: # assume file is from Caffe2 / Detectron1 model zoo if "blobs" in data: # Detection models have "blobs", but ImageNet models don't data = data["blobs"] data = { k: v for k, v in data.items() if not k.endswith("_momentum") } return { "model": data, "__author__": "Caffe2", "matching_heuristics": True } elif filename.endswith(".pyth"): # assume file is from pycls; no one else seems to use the ".pyth" extension with PathManager.open(filename, "rb") as f: data = torch.load(f) assert ( "model_state" in data ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'." model_state = { k: v for k, v in data["model_state"].items() if not k.endswith("num_batches_tracked") } return { "model": model_state, "__author__": "pycls", "matching_heuristics": True } loaded = super()._load_file(filename) # load native pth checkpoint if "model" not in loaded: loaded = {"model": loaded} return loaded
def load_proposals_into_dataset(dataset_dicts, proposal_file): """ Load precomputed object proposals into the dataset. The proposal file should be a pickled dict with the following keys: - "ids": list[int] or list[str], the image ids - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores corresponding to the boxes. - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. Args: dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger = logging.getLogger(__name__) logger.info("Loading proposals from: {}".format(proposal_file)) with PathManager.open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Fetch the indexes of all proposals that are in the dataset # Convert image_id to str since they could be int. img_ids = set({str(record["image_id"]) for record in dataset_dicts}) id_to_index = { str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids } # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"] ) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for record in dataset_dicts: # Get the index of the proposal i = id_to_index[str(record["image_id"])] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def save(cfg, filename: str): """ Save a config object to a yaml file. Note that when the config dictionary contains complex objects (e.g. lambda), it can't be saved to yaml. In that case we will print an error and attempt to save to a pkl file instead. Args: cfg: an omegaconf config object filename: yaml file name to save the config file """ logger = logging.getLogger(__name__) try: cfg = deepcopy(cfg) except Exception: pass else: # if it's deep-copyable, then... def _replace_type_by_name(x): if "_target_" in x and callable(x._target_): try: x._target_ = _convert_target_to_string(x._target_) except AttributeError: pass # not necessary, but makes yaml looks nicer _visit_dict_config(cfg, _replace_type_by_name) try: with PathManager.open(filename, "w") as f: dict = OmegaConf.to_container(cfg, resolve=False) dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999) f.write(dumped) except Exception: logger.exception("Unable to serialize the config to yaml. Error:") new_filename = filename + ".pkl" try: # retry by pickle with PathManager.open(new_filename, "wb") as f: cloudpickle.dump(cfg, f) logger.warning(f"Config saved using cloudpickle at {new_filename} ...") except Exception: pass
def read_image(file_name, format=None): """ Read an image into the given format. Will apply rotation and flipping if the image has such exif information. Args: file_name (str): image file path format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". Returns: image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. """ with PathManager.open(file_name, "rb") as f: image = Image.open(f) # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 image = _apply_exif_orientation(image) return convert_PIL_to_numpy(image, format)
def after_step(self): if self._profiler is None: return self._profiler.__exit__(None, None, None) PathManager.mkdirs(self._output_dir) out_file = os.path.join( self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)) if "://" not in out_file: self._profiler.export_chrome_trace(out_file) else: # Support non-posix filesystems with tempfile.TemporaryDirectory( prefix="detectron2_profiler") as d: tmp_file = os.path.join(d, "tmp.json") self._profiler.export_chrome_trace(tmp_file) with open(tmp_file) as f: content = f.read() with PathManager.open(out_file, "w") as f: f.write(content)
def new_import(name, globals=None, locals=None, fromlist=(), level=0): if ( # Only deal with relative imports inside config files level != 0 and globals is not None and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME) ): cur_file = find_relative_file(globals["__file__"], name, level) _validate_py_syntax(cur_file) spec = importlib.machinery.ModuleSpec( _random_package_name(cur_file), None, origin=cur_file ) module = importlib.util.module_from_spec(spec) module.__file__ = cur_file with PathManager.open(cur_file) as f: content = f.read() exec(compile(content, cur_file, "exec"), module.__dict__) for name in fromlist: # turn imported dict into DictConfig automatically val = _cast_to_config(module.__dict__[name]) module.__dict__[name] = val return module return old_import(name, globals, locals, fromlist=fromlist, level=level)
def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
def _open_cfg(cls, filename): return PathManager.open(filename, "r")