def convert_to_coco_json(dataset_name, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in detectron2's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: logger.info( f"Converting annotations of dataset '{dataset_name}' to COCO format ...)" ) coco_dict = convert_to_coco_dict(dataset_name) logger.info( f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file + ".tmp" with PathManager.open(tmp_file, "w") as f: json.dump(coco_dict, f) shutil.move(tmp_file, output_file)
def default_export_predictor(cfg, pytorch_model, predictor_type, output_dir, data_loader): # The default implementation acts based on the PredictorExportConfig returned by # calling "prepare_for_export". It'll export all sub models in standard way # according to the "predictor_type". assert hasattr(pytorch_model, "prepare_for_export"), pytorch_model inputs = next(iter(data_loader)) export_config = pytorch_model.prepare_for_export(cfg, inputs, predictor_type) model_inputs = (export_config.data_generator(inputs) if export_config.data_generator is not None else (inputs, )) predictor_path = os.path.join(output_dir, predictor_type) PathManager.mkdirs(predictor_path) predictor_init_kwargs = { "preprocess_info": export_config.preprocess_info, "postprocess_info": export_config.postprocess_info, "run_func_info": export_config.run_func_info, } if isinstance(export_config.model, dict): models_info = {} for name, model in export_config.model.items(): save_path = os.path.join(predictor_path, name) model_info = _export_single_model( predictor_path=predictor_path, model=model, input_args=model_inputs[name] if model_inputs is not None else None, save_path=save_path, model_export_method=( predictor_type if export_config.model_export_method is None else export_config.model_export_method[name]), model_export_kwargs=( {} if export_config.model_export_kwargs is None else export_config.model_export_kwargs[name]), ) models_info[name] = model_info predictor_init_kwargs["models"] = models_info else: save_path = predictor_path # for single model exported files are put under `predictor_path` together with predictor_info.json model_info = _export_single_model( predictor_path=predictor_path, model=export_config.model, input_args=model_inputs, save_path=save_path, model_export_method=export_config.model_export_method or predictor_type, model_export_kwargs=export_config.model_export_kwargs or {}, ) predictor_init_kwargs["model"] = model_info # assemble predictor predictor_info = PredictorInfo(**predictor_init_kwargs) with PathManager.open(os.path.join(predictor_path, "predictor_info.json"), "w") as f: json.dump(predictor_info.to_dict(), f, indent=4) return predictor_path
def evaluate(self): """ Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): * Mean intersection-over-union averaged across classes (mIoU) * Frequency Weighted IoU (fwIoU) * Mean pixel accuracy averaged across classes (mACC) * Pixel Accuracy (pACC) """ if self._distributed: synchronize() conf_matrix_list = all_gather(self._conf_matrix) self._predictions = all_gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not is_main_process(): return self._conf_matrix = np.zeros_like(self._conf_matrix) for conf_matrix in conf_matrix_list: self._conf_matrix += conf_matrix if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) acc = np.full(self._num_classes, np.nan, dtype=np.float) iou = np.full(self._num_classes, np.nan, dtype=np.float) tp = self._conf_matrix.diagonal()[:-1].astype(np.float) pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) class_weights = pos_gt / np.sum(pos_gt) pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) acc_valid = pos_gt > 0 acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] iou_valid = (pos_gt + pos_pred) > 0 union = pos_gt + pos_pred - tp iou[acc_valid] = tp[acc_valid] / union[acc_valid] macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) pacc = np.sum(tp) / np.sum(pos_gt) res = {} res["mIoU"] = 100 * miou res["fwIoU"] = 100 * fiou for i, name in enumerate(self._class_names): res["IoU-{}".format(name)] = 100 * iou[i] res["mACC"] = 100 * macc res["pACC"] = 100 * pacc for i, name in enumerate(self._class_names): res["ACC-{}".format(name)] = 100 * acc[i] if self._output_dir: file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"sem_seg": res}) self._logger.info(results) return results
def trace_and_save_torchscript( model: nn.Module, inputs: Tuple[torch.Tensor], output_path: str, _extra_files: Optional[Dict[str, bytes]] = None, ): logger.info("Tracing and saving TorchScript to {} ...".format(output_path)) # TODO: patch_builtin_len depends on D2, we should either copy the function or # dynamically registering the D2's version. from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): script_model = torch.jit.trace(model, inputs) if _extra_files is None: _extra_files = {} model_file = os.path.join(output_path, "model.jit") PathManager.mkdirs(output_path) with PathManager.open(model_file, "wb") as f: torch.jit.save(script_model, f, _extra_files=_extra_files) data_file = os.path.join(output_path, "data.pth") with PathManager.open(data_file, "wb") as f: torch.save(inputs, f) # NOTE: new API doesn't require return return model_file
def save_protobuf(self, output_dir): """ Save the model as caffe2's protobuf format. It saves the following files: * "model.pb": definition of the graph. Can be visualized with tools like `netron <https://github.com/lutzroeder/netron>`_. * "model_init.pb": model parameters * "model.pbtxt": human-readable definition of the graph. Not needed for deployment. Args: output_dir (str): the output directory to save protobuf files. """ logger = logging.getLogger(__name__) logger.info("Saving model to {} ...".format(output_dir)) if not PathManager.exists(output_dir): PathManager.mkdirs(output_dir) with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f: f.write(self._predict_net.SerializeToString()) with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f: f.write(str(self._predict_net)) with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f: f.write(self._init_net.SerializeToString())
def evaluate(self): if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[LVISEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _eval_predictions(self, predictions, multi_storage=None, img_ids=None): """ Evaluate predictions on densepose. Return results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._logger.info("Evaluating predictions ...") res = OrderedDict() results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco( self._coco_api, predictions, multi_storage, self._embedder, class_names=self._metadata.get("thing_classes"), min_threshold=self._min_threshold, img_ids=img_ids, ) res["densepose_gps"] = results_gps res["densepose_gpsm"] = results_gpsm res["densepose_segm"] = results_segm if self._should_evaluate_mesh_alignment: res["densepose_mesh_alignment"] = self._evaluate_mesh_alignment() return res
def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions on densepose. Return results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._logger.info("Evaluating predictions ...") res = OrderedDict() results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco( self._coco_api, predictions, min_threshold=self._min_threshold, img_ids=img_ids, ) res["densepose_gps"] = results_gps res["densepose_gpsm"] = results_gpsm res["densepose_segm"] = results_segm return res
def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) lesion_detection_evaluation.compute_froc_lesion(predictions) self._results = OrderedDict() if "instances" in predictions[0]: self._eval_box_proposals(predictions) if "proposals" in predictions[0]: self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def trace_and_save_torchscript( model: nn.Module, inputs: Tuple[torch.Tensor], output_path: str, mobile_optimization: Optional[MobileOptimizationConfig] = None, _extra_files: Optional[Dict[str, bytes]] = None, ): logger.info("Tracing and saving TorchScript to {} ...".format(output_path)) PathManager.mkdirs(output_path) if _extra_files is None: _extra_files = {} # TODO: patch_builtin_len depends on D2, we should either copy the function or # dynamically registering the D2's version. from detectron2.export.torchscript_patch import patch_builtin_len with torch.no_grad(), patch_builtin_len(): script_model = torch.jit.trace(model, inputs) with make_temp_directory("trace_and_save_torchscript") as tmp_dir: @contextlib.contextmanager def _synced_local_file(rel_path): remote_file = os.path.join(output_path, rel_path) local_file = os.path.join(tmp_dir, rel_path) yield local_file PathManager.copy_from_local(local_file, remote_file, overwrite=True) with _synced_local_file("model.jit") as model_file: torch.jit.save(script_model, model_file, _extra_files=_extra_files) with _synced_local_file("data.pth") as data_file: torch.save(inputs, data_file) if mobile_optimization is not None: logger.info("Applying optimize_for_mobile ...") liteopt_model = optimize_for_mobile( script_model, optimization_blocklist=mobile_optimization. optimization_blocklist, preserved_methods=mobile_optimization.preserved_methods, backend=mobile_optimization.backend, ) with _synced_local_file("mobile_optimized.ptl") as lite_path: liteopt_model._save_for_lite_interpreter(lite_path) # liteopt_model(*inputs) # sanity check op_names = torch.jit.export_opnames(liteopt_model) logger.info("Operator names from lite interpreter:\n{}".format( "\n".join(op_names))) logger.info("Applying augment_model_with_bundled_inputs ...") augment_model_with_bundled_inputs(liteopt_model, [inputs]) liteopt_model.run_on_bundled_input(0) # sanity check with _synced_local_file( "mobile_optimized_bundled.ptl") as lite_path: liteopt_model._save_for_lite_interpreter(lite_path)
def convert_coco_text_to_coco_detection_json( source_json: str, target_json: str, set_type: Optional[str] = None, min_img_size: int = 100, text_cat_id: int = 1, ) -> Dict: """ This function converts a COCOText style JSON to a COCODetection style JSON. For COCOText see: https://vision.cornell.edu/se3/coco-text-2/ For COCODetection see: http://cocodataset.org/#overview """ with PathManager.open(source_json, "r") as f: coco_text_json = json.load(f) coco_text_json["annotations"] = list(coco_text_json["anns"].values()) coco_text_json["images"] = list(coco_text_json["imgs"].values()) if set_type is not None: # COCO Text style JSONs often mix test, train, and val sets. # We need to make sure we only use the data type we want. coco_text_json["images"] = [ x for x in coco_text_json["images"] if x["set"] == set_type ] coco_text_json["categories"] = [{"name": "text", "id": text_cat_id}] del coco_text_json["cats"] del coco_text_json["imgs"] del coco_text_json["anns"] for ann in coco_text_json["annotations"]: ann["category_id"] = text_cat_id ann["iscrowd"] = 0 # Don't evaluate the model on illegible words if set_type == "val" and ann["legibility"] != "legible": ann["ignore"] = True # Some datasets seem to have extremely small images which break downstream # operations. If min_img_size is set, we can remove these. coco_text_json["images"] = [ x for x in coco_text_json["images"] if x["height"] >= min_img_size and x["width"] >= min_img_size ] # Remap image_ids if necessary if isinstance(coco_text_json["images"][0]["id"], str): image_id_remap = { x["id"]: id_no for (id_no, x) in enumerate(coco_text_json["images"]) } for x in coco_text_json["images"]: x["id"] = image_id_remap[x["id"]] for x in coco_text_json["annotations"]: if x["image_id"] in image_id_remap: x["image_id"] = image_id_remap[x["image_id"]] PathManager.mkdirs(os.path.dirname(target_json)) if comm.get_local_rank() == 0: with PathManager.open(target_json, "w") as f: json.dump(coco_text_json, f) return coco_text_json
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the detectron2 logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode or omegaconf.DictConfig): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, _highlight( PathManager.open(args.config_file, "r").read(), args.config_file), )) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") if isinstance(cfg, CfgNode): logger.info("Running with full config:\n{}".format( _highlight(cfg.dump(), ".yaml"))) with PathManager.open(path, "w") as f: f.write(cfg.dump()) else: LazyConfig.save(cfg, path) logger.info("Full config saved to {}".format(path)) # make sure each worker has a different, yet deterministic seed if specified seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) seed_all_rng(None if seed < 0 else seed + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = _try_get_key(cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False)
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the detectron2 logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format( rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": args.config_file = '/home/bong08/lib/ms/detectron2/detectron2/model_zoo/configs/COCO-Detection/car_faster_rcnn_R_50_FPN_1x.yaml' print(args.config_file, '=======================================================') logger.info("Contents of args.config_file={}:\n{}".format( args.config_file, PathManager.open(args.config_file, "r").read())) logger.info("Running with full config:\n{}".format(cfg)) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") with PathManager.open(path, "w") as f: f.write(cfg.dump()) logger.info("Full config saved to {}".format(path)) # make sure each worker has a different, yet deterministic seed if specified seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank) # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
def __init__(self, dataset_name, output_dir): """ Args: dataset_name (str): name of the dataset output_dir (str): output directory to save results for evaluation """ self._metadata = MetadataCatalog.get(dataset_name) self._thing_contiguous_id_to_dataset_id = { v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() } self._stuff_contiguous_id_to_dataset_id = { v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() } PathManager.mkdirs(output_dir) self._predictions_json = os.path.join(output_dir, "predictions.json")
def default_export_predictor(cfg, pytorch_model, predictor_type, output_dir, data_loader): # The default implementation acts based on the PredictorExportConfig returned by # calling "prepare_for_export". It'll export all sub models in standard way # according to the "predictor_type". assert hasattr(pytorch_model, "prepare_for_export"), pytorch_model inputs = next(iter(data_loader)) export_config = pytorch_model.prepare_for_export( cfg, inputs, export_scheme=predictor_type) predictor_path = os.path.join(output_dir, predictor_type) PathManager.mkdirs(predictor_path) # TODO: also support multiple models from nested dict in the default implementation assert isinstance(export_config.model, nn.Module), "Currently support single model" model = export_config.model input_args = (export_config.data_generator(inputs) if export_config.data_generator is not None else None) model_export_kwargs = export_config.model_export_kwargs or {} # the default implementation assumes model type is the same as the predictor type model_type = predictor_type model_path = predictor_path # maye be sub dir for multipe models standard_model_export( model, model_type=model_type, save_path=model_path, input_args=input_args, **model_export_kwargs, ) model_rel_path = os.path.relpath(model_path, predictor_path) # assemble predictor predictor_info = PredictorInfo( model=ModelInfo(path=model_rel_path, type=model_type), preprocess_info=export_config.preprocess_info, postprocess_info=export_config.postprocess_info, run_func_info=export_config.run_func_info, ) with PathManager.open(os.path.join(predictor_path, "predictor_info.json"), "w") as f: json.dump(predictor_info.to_dict(), f, indent=4) return predictor_path
def save_protobuf(self, output_dir): """ Save the model as caffe2's protobuf format. Args: output_dir (str): the output directory to save protobuf files. """ logger = logging.getLogger(__name__) logger.info("Saving model to {} ...".format(output_dir)) if not PathManager.exists(output_dir): PathManager.mkdirs(output_dir) with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f: f.write(self._predict_net.SerializeToString()) with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f: f.write(str(self._predict_net)) with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f: f.write(self._init_net.SerializeToString())
def after_step(self): if self._profiler is None: return self._profiler.__exit__(None, None, None) PathManager.mkdirs(self._output_dir) out_file = os.path.join( self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter) ) if "://" not in out_file: self._profiler.export_chrome_trace(out_file) else: # Support non-posix filesystems with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d: tmp_file = os.path.join(d, "tmp.json") self._profiler.export_chrome_trace(tmp_file) with open(tmp_file) as f: content = f.read() with PathManager.open(out_file, "w") as f: f.write(content)
def default_writers(output_dir: str, max_iter: Optional[int] = None): """ Build a list of :class:`EventWriter` to be used. It now consists of a :class:`CommonMetricPrinter`, :class:`TensorboardXWriter` and :class:`JSONWriter`. Args: output_dir: directory to store JSON metrics and tensorboard events max_iter: the total number of iterations Returns: list[EventWriter]: a list of :class:`EventWriter` objects. """ PathManager.mkdirs(output_dir) return [ # It may not always print what you want to see, since it prints "common" metrics only. CommonMetricPrinter(max_iter), JSONWriter(os.path.join(output_dir, "metrics.json")), TensorboardXWriter(output_dir), ]
def __init__(self, dataset_name: str, output_dir: Optional[str] = None): """ Args: dataset_name: name of the dataset output_dir: output directory to save results for evaluation. """ self._metadata = MetadataCatalog.get(dataset_name) self._thing_contiguous_id_to_dataset_id = { v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() } self._stuff_contiguous_id_to_dataset_id = { v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() } self._output_dir = output_dir if self._output_dir is not None: PathManager.mkdirs(self._output_dir)
def build_densepose_evaluator_storage(cfg: CfgNode, output_folder: str): storage_spec = cfg.DENSEPOSE_EVALUATION.STORAGE if storage_spec == "none": return None evaluator_type = cfg.DENSEPOSE_EVALUATION.TYPE # common output tensor sizes hout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE wout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE n_csc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS # specific output tensors if evaluator_type == "iuv": n_fsc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 schema = { "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)), "fine_segm": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), "u": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), "v": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), } elif evaluator_type == "cse": embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE schema = { "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)), "embedding": SizeData(dtype="float32", shape=(embed_size, hout, wout)), } else: raise ValueError(f"Unknown evaluator type: {evaluator_type}") # storage types if storage_spec == "ram": storage = SingleProcessRamTensorStorage(schema, io.BytesIO()) elif storage_spec == "file": fpath = os.path.join(output_folder, f"DensePoseEvaluatorStorage.{get_rank()}.bin") PathManager.mkdirs(output_folder) storage = SingleProcessFileTensorStorage(schema, fpath, "wb") else: raise ValueError(f"Unknown storage specification: {storage_spec}") return storage
def dump_trained_model_configs( output_dir: str, trained_cfgs: Dict[str, CfgNode] ) -> Dict[str, str]: """Writes trained model config files to output_dir. Args: output_dir: output file directory. trained_cfgs: map from model name to the config of trained model. Returns: A map of model name to model config path. """ trained_model_configs = {} trained_model_config_dir = os.path.join(output_dir, "trained_model_configs") PathManager.mkdirs(trained_model_config_dir) for name, trained_cfg in trained_cfgs.items(): config_file = os.path.join(trained_model_config_dir, "{}.yaml".format(name)) trained_model_configs[name] = config_file if comm.is_main_process(): logger.info("Dump trained config file: {}".format(config_file)) with PathManager.open(config_file, "w") as f: f.write(trained_cfg.dump()) return trained_model_configs
def export_optimize_and_save_torchscript( model: nn.Module, inputs: Optional[Tuple[Any]], output_path: str, *, jit_mode: Optional[str] = DEFAULT_JIT_MODE, torchscript_filename: str = "model.jit", mobile_optimization: Optional[MobileOptimizationConfig] = None, _extra_files: Optional[Dict[str, bytes]] = None, ) -> str: """ The primary function for exporting PyTorch model to TorchScript. Args: model (nn.Module): the model to export. When given a ScriptModule, skip the export and only optimize and save model. inputs (tuple or None): input arguments of model, can be called as model(*inputs). Will not be used when scripting the model. output_path (str): directory that the model will be saved. jit_mode (str): trace/script or None if the model is already a ScriptModule. torchscript_filename (str): the filename of non-mobile-optimized model. mobile_optimization (MobileOptimizationConfig): when provided, the mobile optimization will be applied. _extra_files (Dict[str, bytes]): when provided, extra files will be saved. Returns: (str): filename of the final model no matter optmized or not. """ logger.info("Export, optimize and saving TorchScript to {} ...".format( output_path)) PathManager.mkdirs(output_path) if _extra_files is None: _extra_files = {} if isinstance(model, torch.jit.ScriptModule): if jit_mode is not None: logger.info( "The input model is already a ScriptModule, skip the jit step") elif jit_mode == "trace": logger.info("Tracing the model ...") with torch.no_grad(): script_model = torch.jit.trace(model, inputs) elif jit_mode == "script": logger.info("Scripting the model ...") script_model = torch.jit.script(model) else: raise ValueError("Unsupported jit_mode: {}".format(jit_mode)) with make_temp_directory( "export_optimize_and_save_torchscript") as tmp_dir: @contextlib.contextmanager def _synced_local_file(rel_path): remote_file = os.path.join(output_path, rel_path) local_file = os.path.join(tmp_dir, rel_path) yield local_file PathManager.copy_from_local(local_file, remote_file, overwrite=True) with _synced_local_file(torchscript_filename) as model_file: logger.info(f"Saving torchscript model to: {torchscript_filename}") torch.jit.save(script_model, model_file, _extra_files=_extra_files) dump_torchscript_IR(script_model, os.path.join(output_path, "torchscript_IR")) data_filename = "data.pth" with _synced_local_file(data_filename) as data_file: logger.info(f"Saving example data to: {data_filename}") torch.save(inputs, data_file) if mobile_optimization is not None: logger.info("Applying optimize_for_mobile ...") liteopt_model = optimize_for_mobile( script_model, optimization_blocklist=mobile_optimization. optimization_blocklist, preserved_methods=mobile_optimization.preserved_methods, backend=mobile_optimization.backend, ) torchscript_filename = mobile_optimization.torchscript_filename with _synced_local_file(torchscript_filename) as lite_path: logger.info( f"Saving mobile optimized model to: {torchscript_filename}" ) liteopt_model._save_for_lite_interpreter( lite_path, _extra_files=_extra_files) op_names = torch.jit.export_opnames(liteopt_model) logger.info("Operator names from lite interpreter:\n{}".format( "\n".join(op_names))) logger.info("Applying augment_model_with_bundled_inputs ...") # make all tensors zero-like to save storage iters = recursive_iterate(inputs) for x in iters: if isinstance(x, torch.Tensor): iters.send(torch.zeros_like(x).contiguous()) inputs = iters.value augment_model_with_bundled_inputs(liteopt_model, [inputs]) # For non-cpu backends (e.g. Metal, Vulkan) the bundled inputs need to be # converted with `torch.to(<myDevice>)` in order to predict successfully # This is a temporary bypass until PT Edge supports automatic backend # conversion in the bundled inputs interface, or we can auto-add a input tensor # conversion op to Metal and Vulkan models. target_backend = mobile_optimization.backend.lower() if target_backend == "cpu": # Sanity check by running logger.info( "Running sanity check for the mobile optimized model ...") liteopt_model(*liteopt_model.get_all_bundled_inputs()[0]) name, ext = os.path.splitext(torchscript_filename) input_bundled_path = name + "_bundled" + ext with _synced_local_file(input_bundled_path) as lite_path: logger.info( f"Saving input bundled model to: {input_bundled_path}") liteopt_model._save_for_lite_interpreter(lite_path) return torchscript_filename
def setup_logger(output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None): """ Initialize the detectron2 logger and set its verbosity level to "DEBUG". Args: output (str): a file name or a directory to save log. If None, will not save log file. If ends with ".txt" or ".log", assumed to be a file name. Otherwise, logs will be saved to `output/log.txt`. name (str): the root module name of this logger abbrev_name (str): an abbreviation of the module, to avoid long names in logs. Set to "" to not log the root module in logs. By default, will abbreviate "detectron2" to "d2" and leave other modules unchanged. Returns: logging.Logger: a logger """ logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) logger.propagate = False if abbrev_name is None: abbrev_name = "d2" if name == "detectron2" else name plain_formatter = logging.Formatter( "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S") # stdout logging: master only if distributed_rank == 0: ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) if color: formatter = _ColorfulFormatter( colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", datefmt="%m/%d %H:%M:%S", root_name=name, abbrev_name=str(abbrev_name), ) else: formatter = plain_formatter ch.setFormatter(formatter) logger.addHandler(ch) # file logging: all workers if output is not None: if output.endswith(".txt") or output.endswith(".log"): filename = output else: filename = os.path.join(output, "log.txt") if distributed_rank > 0: filename = filename + ".rank{}".format(distributed_rank) PathManager.mkdirs(os.path.dirname(filename)) fh = logging.StreamHandler(_cached_log_stream(filename)) fh.setLevel(logging.DEBUG) fh.setFormatter(plain_formatter) logger.addHandler(fh) return logger
def trace_and_save_torchscript( model: nn.Module, inputs: Tuple[torch.Tensor], output_path: str, torchscript_filename: str = "model.jit", mobile_optimization: Optional[MobileOptimizationConfig] = None, _extra_files: Optional[Dict[str, bytes]] = None, ): logger.info("Tracing and saving TorchScript to {} ...".format(output_path)) PathManager.mkdirs(output_path) if _extra_files is None: _extra_files = {} with torch.no_grad(): script_model = torch.jit.trace(model, inputs) with make_temp_directory("trace_and_save_torchscript") as tmp_dir: @contextlib.contextmanager def _synced_local_file(rel_path): remote_file = os.path.join(output_path, rel_path) local_file = os.path.join(tmp_dir, rel_path) yield local_file PathManager.copy_from_local(local_file, remote_file, overwrite=True) with _synced_local_file(torchscript_filename) as model_file: torch.jit.save(script_model, model_file, _extra_files=_extra_files) with _synced_local_file("data.pth") as data_file: torch.save(inputs, data_file) if mobile_optimization is not None: logger.info("Applying optimize_for_mobile ...") liteopt_model = optimize_for_mobile( script_model, optimization_blocklist=mobile_optimization. optimization_blocklist, preserved_methods=mobile_optimization.preserved_methods, backend=mobile_optimization.backend, ) torchscript_filename = mobile_optimization.torchscript_filename with _synced_local_file(torchscript_filename) as lite_path: liteopt_model._save_for_lite_interpreter( lite_path, _extra_files=_extra_files) # liteopt_model(*inputs) # sanity check op_names = torch.jit.export_opnames(liteopt_model) logger.info("Operator names from lite interpreter:\n{}".format( "\n".join(op_names))) logger.info("Applying augment_model_with_bundled_inputs ...") # make all tensors zero-like to save storage iters = recursive_iterate(inputs) for x in iters: if isinstance(x, torch.Tensor): iters.send(torch.zeros_like(x).contiguous()) inputs = iters.value augment_model_with_bundled_inputs(liteopt_model, [inputs]) liteopt_model( *liteopt_model.get_all_bundled_inputs()[0]) # sanity check name, ext = os.path.splitext(torchscript_filename) with _synced_local_file(name + "_bundled" + ext) as lite_path: liteopt_model._save_for_lite_interpreter(lite_path) return torchscript_filename
default="", metavar="FILE", help="path to config file") parser.add_argument("--run-eval", action="store_true") parser.add_argument("--output", help="output directory for the converted model") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() logger = setup_logger() logger.info("Command line arguments: " + str(args)) PathManager.mkdirs(args.output) # Disable respecialization on new shapes. Otherwise --run-eval will be slow torch._C._jit_set_bailout_depth(1) cfg = setup_cfg(args) # create a torch model torch_model = build_model(cfg) DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) torch_model.eval() # get a sample data data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) first_batch = next(iter(data_loader)) # convert and save model
def main(args): # utils.init_distributed_mode(args) if args.frozen_weights is not None: assert args.masks, "Frozen training is meant for segmentation only" print(args) device = torch.device(args.device) # fix the seed for reproducibility seed = args.seed + utils.get_rank() torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) model, criterion, postprocessors = build_model(args) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print("number of params:", n_parameters) param_dicts = [ { "params": [ p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad ] }, { "params": [ p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad ], "lr": args.lr_backbone, }, ] optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) dataset_train = build_dataset(image_set="train", args=args) dataset_val = build_dataset(image_set="val", args=args) if args.distributed: sampler_train = DistributedSampler(dataset_train) sampler_val = DistributedSampler(dataset_val, shuffle=False) else: sampler_train = torch.utils.data.RandomSampler(dataset_train) sampler_val = torch.utils.data.SequentialSampler(dataset_val) batch_sampler_train = torch.utils.data.BatchSampler(sampler_train, args.batch_size, drop_last=True) data_loader_train = DataLoader( dataset_train, batch_sampler=batch_sampler_train, collate_fn=utils.collate_fn, num_workers=args.num_workers, ) data_loader_val = DataLoader( dataset_val, args.batch_size, sampler=sampler_val, drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, ) if args.dataset_file == "coco_panoptic": # We also evaluate AP during panoptic training, on original coco DS coco_val = datasets.coco.build("val", args) base_ds = get_coco_api_from_dataset(coco_val) else: base_ds = get_coco_api_from_dataset(dataset_val) if args.frozen_weights is not None: checkpoint = torch.load(args.frozen_weights, map_location="cpu") model_without_ddp.detr.load_state_dict(checkpoint["model"]) if args.resume: if args.resume.startswith("https"): checkpoint = torch.hub.load_state_dict_from_url(args.resume, map_location="cpu", check_hash=True) else: checkpoint = torch.load(args.resume, map_location="cpu") model_without_ddp.load_state_dict(checkpoint["model"]) if (not args.eval and "optimizer" in checkpoint and "lr_scheduler" in checkpoint and "epoch" in checkpoint): optimizer.load_state_dict(checkpoint["optimizer"]) lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) args.start_epoch = checkpoint["epoch"] + 1 if args.eval: test_stats, coco_evaluator = evaluate( model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir, ) if args.output_dir: with PathManager.open(os.path.join(args.output_dir, "eval.pth"), "wb") as f: utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: sampler_train.set_epoch(epoch) train_stats = train_one_epoch( model, criterion, data_loader_train, optimizer, device, epoch, args.clip_max_norm, ) lr_scheduler.step() if args.output_dir: checkpoint_paths = [ ] # os.path.join(args.output_dir, 'checkpoint.pth')] # extra checkpoint before LR drop and every 10 epochs if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0: checkpoint_paths.append( os.path.join(args.output_dir, f"checkpoint{epoch:04}.pth")) for checkpoint_path in checkpoint_paths: with PathManager.open(checkpoint_path, "wb") as f: if args.gpu == 0 and args.machine_rank == 0: utils.save_on_master( { "model": model_without_ddp.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args, }, f, ) test_stats, coco_evaluator = evaluate( model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir, ) log_stats = { **{f"train_{k}": v for k, v in train_stats.items()}, **{f"test_{k}": v for k, v in test_stats.items()}, "epoch": epoch, "n_parameters": n_parameters, } if args.output_dir and utils.is_main_process(): with PathManager.open(os.path.join(args.output_dir, "log.txt"), "w") as f: f.write(json.dumps(log_stats) + "\n") # for evaluation logs if coco_evaluator is not None: PathManager.mkdirs(os.path.join(args.output_dir, "eval")) if "bbox" in coco_evaluator.coco_eval: filenames = ["latest.pth"] if epoch % 50 == 0: filenames.append(f"{epoch:03}.pth") for name in filenames: with PathManager.open( os.path.join(args.output_dir, "eval", name), "wb") as f: torch.save(coco_evaluator.coco_eval["bbox"].eval, f) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("Training time {}".format(total_time_str))
def create_dir_on_local_main_process(dir): if get_local_rank() == 0 and dir: PathManager.mkdirs(dir)
def create_dir_on_global_main_process(dir): if comm.get_rank() == 0 and dir: PathManager.mkdirs(dir)
def dump_torchscript_IR(model, dir): """ Dump IR of a TracedModule/ScriptModule at various levels. Useful for debugging. Args: model (TracedModule or ScriptModule): traced or scripted module dir (str): output directory to dump files. """ PathManager.mkdirs(dir) def _get_script_mod(mod): if isinstance(mod, torch.jit.TracedModule): return mod._actual_script_module return mod # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: def get_code(mod): # Try a few ways to get code using private attributes. try: # This contains more information than just `mod.code` return _get_script_mod(mod)._c.code except AttributeError: pass try: return mod.code except AttributeError: return None def dump_code(prefix, mod): code = get_code(mod) name = prefix or "root model" if code is None: f.write(f"Could not found code for {name} (type={mod.original_name})\n") f.write("\n") else: f.write(f"\nCode for {name}, type={mod.original_name}:\n") f.write(code) f.write("\n") f.write("-" * 80) for name, m in mod.named_children(): dump_code(prefix + "." + name, m) dump_code("", model) # Recursively dump IR of all modules with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: try: f.write(_get_script_mod(model)._c.dump_to_str(True, False, False)) except AttributeError: pass # Dump IR of the entire graph (all submodules inlined) with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: f.write(str(model.inlined_graph)) # Dump the model structure in pytorch style with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: f.write(str(model))
def create_dir_on_global_main_process(path: str) -> None: if comm.get_rank() == 0 and path: PathManager.mkdirs(path) # Add a barrier to make sure the existance of the dir for non-master process comm.synchronize()