예제 #1
0
파일: shape.py 프로젝트: zhDai/meshrcnn
def read_voxel(voxelfile):
    """
    Reads voxel and transforms it in the form of verts
    """
    with PathManager.open(voxelfile, "rb") as f:
        voxel = sio.loadmat(f)["voxel"]
    voxel = np.rot90(voxel, k=3, axes=(1, 2))
    verts = np.argwhere(voxel > 0).astype(np.float32, copy=False)

    # centering and normalization
    min_x = np.min(verts[:, 0])
    max_x = np.max(verts[:, 0])
    min_y = np.min(verts[:, 1])
    max_y = np.max(verts[:, 1])
    min_z = np.min(verts[:, 2])
    max_z = np.max(verts[:, 2])
    verts[:, 0] = verts[:, 0] - (max_x + min_x) / 2
    verts[:, 1] = verts[:, 1] - (max_y + min_y) / 2
    verts[:, 2] = verts[:, 2] - (max_z + min_z) / 2
    scale = np.sqrt(np.max(np.sum(verts ** 2, axis=1))) * 2
    verts /= scale
    verts = torch.tensor(verts, dtype=torch.float32)

    return verts
예제 #2
0
파일: lazy.py 프로젝트: qilei123/detectron2
 def new_import(name, globals=None, locals=None, fromlist=(), level=0):
     if (
             # Only deal with relative imports inside config files
             level != 0 and globals is not None
             and (globals.get("__package__", "")
                  or "").startswith(_CFG_PACKAGE_NAME)):
         cur_file = find_relative_file(globals["__file__"], name, level)
         _validate_py_syntax(cur_file)
         spec = importlib.machinery.ModuleSpec(
             _random_package_name(cur_file), None, origin=cur_file)
         module = importlib.util.module_from_spec(spec)
         module.__file__ = cur_file
         with PathManager.open(cur_file) as f:
             content = f.read()
         exec(compile(content, cur_file, "exec"), module.__dict__)
         for name in fromlist:  # turn imported dict into DictConfig automatically
             val = _cast_to_config(module.__dict__[name])
             module.__dict__[name] = val
         return module
     return old_import(name,
                       globals,
                       locals,
                       fromlist=fromlist,
                       level=level)
예제 #3
0
    def evaluate(self, img_ids=None):
        """
        Args:
            img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
        """
        if self._distributed:
            comm.synchronize()
            predictions = comm.gather(self._predictions, dst=0)
            predictions = list(itertools.chain(*predictions))

            if not comm.is_main_process():
                return {}
        else:
            predictions = self._predictions

        if len(predictions) == 0:
            self._logger.warning(
                "[COCOEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(predictions, f)

        self._results = OrderedDict()
        if "proposals" in predictions[0]:
            self._eval_box_proposals(predictions)
        if "instances" in predictions[0]:
            self._eval_predictions(set(self._tasks),
                                   predictions,
                                   img_ids=img_ids)
        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)
예제 #4
0
def prepare_for_launch(args):
    """
    Load config, figure out working directory, create runner.
        - when args.config_file is empty, returned cfg will be the default one
        - returned output_dir will always be non empty, args.output_dir has higher
            priority than cfg.OUTPUT_DIR.
    """
    print(args)
    runner = create_runner(args.runner)

    cfg = runner.get_default_cfg()
    if args.config_file:
        with PathManager.open(reroute_config_path(args.config_file), "r") as f:
            print("Loaded config file {}:\n{}".format(args.config_file,
                                                      f.read()))
        cfg.merge_from_file(args.config_file)
        cfg.merge_from_list(args.opts)
    else:
        cfg = create_cfg_from_cli_args(args, default_cfg=cfg)
    cfg.freeze()

    assert args.output_dir or args.config_file
    output_dir = args.output_dir or cfg.OUTPUT_DIR
    return cfg, output_dir, runner
예제 #5
0
def dump_trained_model_configs(
        output_dir: str, trained_cfgs: Dict[str, CfgNode]) -> Dict[str, str]:
    """Writes trained model config files to output_dir.

    Args:
        output_dir: output file directory.
        trained_cfgs: map from model name to the config of trained model.

    Returns:
        A map of model name to model config path.
    """
    trained_model_configs = {}
    trained_model_config_dir = os.path.join(output_dir,
                                            "trained_model_configs")
    PathManager.mkdirs(trained_model_config_dir)
    for name, trained_cfg in trained_cfgs.items():
        config_file = os.path.join(trained_model_config_dir,
                                   "{}.yaml".format(name))
        trained_model_configs[name] = config_file
        if comm.is_main_process():
            logger.info("Dump trained config file: {}".format(config_file))
            with PathManager.open(config_file, "w") as f:
                f.write(trained_cfg.dump())
    return trained_model_configs
예제 #6
0
def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name

    # first load gt
    # read list of images
    with PathManager.open(imagesetfile, "r") as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    # load annots
    recs = {}
    for imagename in imagenames:
        recs[imagename] = parse_rec(annopath.format(imagename))

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj["name"] == classname]
        bbox = np.array([x["bbox"] for x in R])
        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
        # difficult = np.array([False for x in R]).astype(np.bool)  # treat all "difficult" as GT
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, "r") as f:
        lines = f.readlines()

    splitlines = [x.strip().split(" ") for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R["bbox"].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
            ih = np.maximum(iymax - iymin + 1.0, 0.0)
            inters = iw * ih

            # union
            uni = (
                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
                - inters
            )

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R["difficult"][jmax]:
                if not R["det"][jmax]:
                    tp[d] = 1.0
                    R["det"][jmax] = 1
                else:
                    fp[d] = 1.0
        else:
            fp[d] = 1.0

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap
예제 #7
0
def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList:
    """
    Traverses all keyframes of a video file. Returns a list of keyframe
    timestamps. Timestamps are counts in timebase units.

    Args:
       video_fpath (str): Video file path
       video_stream_idx (int): Video stream index (default: 0)
    Returns:
       List[int]: list of keyframe timestaps (timestamp is a count in timebase
           units)
    """
    try:
        with PathManager.open(video_fpath, "rb") as io:
            container = av.open(io, mode="r")
            stream = container.streams.video[video_stream_idx]
            keyframes = []
            pts = -1
            # Note: even though we request forward seeks for keyframes, sometimes
            # a keyframe in backwards direction is returned. We introduce tolerance
            # as a max count of ignored backward seeks
            tolerance_backward_seeks = 2
            while True:
                try:
                    container.seek(pts + 1, backward=False, any_frame=False, stream=stream)
                except av.AVError as e:
                    # the exception occurs when the video length is exceeded,
                    # we then return whatever data we've already collected
                    logger = logging.getLogger(__name__)
                    logger.debug(
                        f"List keyframes: Error seeking video file {video_fpath}, "
                        f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}"
                    )
                    return keyframes
                except OSError as e:
                    logger = logging.getLogger(__name__)
                    logger.warning(
                        f"List keyframes: Error seeking video file {video_fpath}, "
                        f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}"
                    )
                    return []
                packet = next(container.demux(video=video_stream_idx))
                if packet.pts is not None and packet.pts <= pts:
                    logger = logging.getLogger(__name__)
                    logger.warning(
                        f"Video file {video_fpath}, stream {video_stream_idx}: "
                        f"bad seek for packet {pts + 1} (got packet {packet.pts}), "
                        f"tolerance {tolerance_backward_seeks}."
                    )
                    tolerance_backward_seeks -= 1
                    if tolerance_backward_seeks == 0:
                        return []
                    pts += 1
                    continue
                tolerance_backward_seeks = 2
                pts = packet.pts
                if pts is None:
                    return keyframes
                if packet.is_keyframe:
                    keyframes.append(pts)
            return keyframes
    except OSError as e:
        logger = logging.getLogger(__name__)
        logger.warning(
            f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}"
        )
    except RuntimeError as e:
        logger = logging.getLogger(__name__)
        logger.warning(
            f"List keyframes: Error opening video file container {video_fpath}, "
            f"Runtime error: {e}"
        )
    return []
예제 #8
0
    def _eval_predictions(self, predictions, img_ids=None):
        """
        Evaluate predictions. Fill self._results with the metrics of the tasks.
        """
        self._logger.info("Preparing results for COCO format ...")
        coco_results = list(
            itertools.chain(*[x["instances"] for x in predictions]))
        tasks = self._tasks or self._tasks_from_predictions(coco_results)

        # unmap the category ids for COCO
        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
            all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
            num_classes = len(all_contiguous_ids)
            assert min(all_contiguous_ids) == 0 and max(
                all_contiguous_ids) == num_classes - 1

            reverse_id_mapping = {
                v: k
                for k, v in dataset_id_to_contiguous_id.items()
            }
            for result in coco_results:
                category_id = result["category_id"]
                assert category_id < num_classes, (
                    f"A prediction has class={category_id}, "
                    f"but the dataset only has {num_classes} classes and "
                    f"predicted class id should be in [0, {num_classes - 1}].")
                result["category_id"] = reverse_id_mapping[category_id]

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "coco_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(coco_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions with {} COCO API...".format(
            "unofficial" if self._use_fast_impl else "official"))
        for task in sorted(tasks):
            coco_eval = (
                _evaluate_predictions_on_coco(
                    self._coco_api,
                    coco_results,
                    task,
                    kpt_oks_sigmas=self._kpt_oks_sigmas,
                    use_fast_impl=self._use_fast_impl,
                    img_ids=img_ids,
                ) if len(coco_results) > 0 else
                None  # cocoapi does not handle empty results very well
            )

            res = self._derive_coco_results(
                coco_eval,
                task,
                class_names=self._metadata.get("thing_classes"))
            self._results[task] = res
예제 #9
0
    def evaluate(self):
        """
        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
        * Mean intersection-over-union averaged across classes (mIoU)
        * Frequency Weighted IoU (fwIoU)
        * Mean pixel accuracy averaged across classes (mACC)
        * Pixel Accuracy (pACC)
        """
        if self._distributed:
            synchronize()
            conf_matrix_list = all_gather(self._conf_matrix)
            self._predictions = all_gather(self._predictions)
            self._predictions = list(itertools.chain(*self._predictions))
            if not is_main_process():
                return

            self._conf_matrix = np.zeros_like(self._conf_matrix)
            for conf_matrix in conf_matrix_list:
                self._conf_matrix += conf_matrix

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "sem_seg_predictions.json")
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._predictions))

        acc = np.full(self._num_classes, np.nan, dtype=np.float)
        iou = np.full(self._num_classes, np.nan, dtype=np.float)
        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
        class_weights = pos_gt / np.sum(pos_gt)
        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
        acc_valid = pos_gt > 0
        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
        iou_valid = (pos_gt + pos_pred) > 0
        union = pos_gt + pos_pred - tp
        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
        macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
        miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
        fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
        pacc = np.sum(tp) / np.sum(pos_gt)

        res = {}
        res["mIoU"] = 100 * miou
        res["fwIoU"] = 100 * fiou
        for i, name in enumerate(self._class_names):
            res["IoU-{}".format(name)] = 100 * iou[i]
        res["mACC"] = 100 * macc
        res["pACC"] = 100 * pacc
        for i, name in enumerate(self._class_names):
            res["ACC-{}".format(name)] = 100 * acc[i]

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "sem_seg_evaluation.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(res, f)
        results = OrderedDict({"sem_seg": res})
        self._logger.info(results)
        return results
예제 #10
0
def load_pix2pix_json(
    json_path,
    input_folder,
    gt_folder,
    mask_folder,
    real_json_path=None,
    real_folder=None,
    max_num=1e10,
):
    """
    Args:
        json_path (str): the directory where the json file exists which saves the filenames and labels.
        input_folder (str): the directory for the input/source images
        input_folder (str): the directory for the ground_truth/target images
        mask_folder (str): the directory for the masks
    Returns:
        list[dict]: a list of dicts
    """
    real_filenames = {}
    if real_json_path is not None:
        with PathManager.open(real_json_path, 'r') as f:
            real_filenames = json.load(f)

    data = []
    with PathManager.open(json_path, 'r') as f:
        filenames = json.load(f)

        in_len = len(filenames)
        real_len = len(real_filenames)
        total_len = min(max(in_len, real_len), max_num)

        real_keys = [*real_filenames.keys()]
        in_keys = [*filenames.keys()]

        cnt = 0
        # for fname in filenames.keys():
        while cnt < total_len:
            fname = in_keys[cnt % in_len]
            input_label = filenames[fname]
            if isinstance(input_label, tuple) or isinstance(input_label, list):
                assert len(input_label) == 2, (
                    "Save (real_name, label) as the value of the json dict for resampling"
                )
                fname, input_label = input_label

            f = {
                "file_name": fname,
                "input_folder": input_folder,
                "gt_folder": gt_folder,
                "mask_folder": mask_folder,
                "input_label": input_label,
                "real_folder": real_folder
            }
            if real_len > 0:
                real_fname = real_keys[cnt % real_len]
                f["real_file_name"] = real_fname
            data.append(f)
            cnt += 1
            # 5000 is the general number of images used to calculate FID in GANs
            # if max_num > 0 and len(data) == max_num:
            #     logger.info("Reach maxinum of test data: {} ".format(len(data)))
            #     return data
    logger.info("Total number of data dicts: {} ".format(len(data)))
    return data
예제 #11
0
 def _open(self, path, mode="r", **kwargs):
     return PathManager.open(self._get_local_path(path), mode, **kwargs)
예제 #12
0
 def _load_image(self, id: int) -> Image.Image:
     path = self.coco.loadImgs(id)[0]["file_name"]
     with PathManager.open(os.path.join(self.root, path), "rb") as f:
         image = Image.open(f).convert("RGB")
     return image
예제 #13
0
def dump_flops_info(model, inputs, output_dir, use_eval_mode=True):
    """
    Dump flops information about model, using the given model inputs.
    Information are dumped to output_dir using various flop counting tools
    in different formats. Only a simple table is printed to terminal.

    Args:
        inputs: a tuple of positional arguments used to call model with.
        use_eval_mode: turn the model into eval mode for flop counting. Otherwise,
            will use the original mode. It's recommended to use eval mode, because
            training mode typically follows a different codepath.
    """
    if not comm.is_main_process():
        return
    logger.info("Evaluating model's number of parameters and FLOPS")

    try:
        model = copy.deepcopy(model)
    except Exception:
        logger.info("Failed to deepcopy the model and skip FlopsEstimation.")
        return

    # delete other forward_pre_hooks so they are not simultaneously called
    for k in model._forward_pre_hooks:
        del model._forward_pre_hooks[k]

    if use_eval_mode:
        model.eval()
    inputs = copy.deepcopy(inputs)

    # 1. using mobile_cv flop counter
    try:
        fest = flops_utils.FlopsEstimation(model)
        with fest.enable():
            model(*inputs)
            fest.add_flops_info()
            model_str = str(model)
        output_file = os.path.join(output_dir, "flops_str_mobilecv.txt")
        with PathManager.open(output_file, "w") as f:
            f.write(model_str)
            logger.info(f"Flops info written to {output_file}")
    except Exception:
        logger.exception(
            "Failed to estimate flops using mobile_cv's FlopsEstimation")

    # 2. using d2/fvcore's flop counter
    output_file = os.path.join(output_dir, "flops_str_fvcore.txt")
    try:
        flops = FlopCountAnalysis(model, inputs)

        # 2.1: dump as model str
        model_str = flop_count_str(flops)
        with PathManager.open(output_file, "w") as f:
            f.write(model_str)
            logger.info(f"Flops info written to {output_file}")

        # 2.2: dump as table
        flops_table = flop_count_table(flops, max_depth=10)
        output_file = os.path.join(output_dir, "flops_table_fvcore.txt")
        with PathManager.open(output_file, "w") as f:
            f.write(flops_table)
            logger.info(f"Flops table (full version) written to {output_file}")

        # 2.3: print a table with a shallow depth
        flops_table = flop_count_table(flops, max_depth=3)
        logger.info("Flops table:\n" + flops_table)
    except Exception:
        with PathManager.open(output_file, "w") as f:
            traceback.print_exc(file=f)
        logger.warning(
            "Failed to estimate flops using detectron2's FlopCountAnalysis. "
            f"Error written to {output_file}.")
        flops = float("nan")
    return flops
예제 #14
0
    parser.add_argument("--input",
                        required=True,
                        help="JSON file produced by the model")
    parser.add_argument("--output", required=True, help="output directory")
    parser.add_argument("--dataset",
                        help="name of the dataset",
                        default="coco_2017_val")
    parser.add_argument("--conf-threshold",
                        default=0.5,
                        type=float,
                        help="confidence threshold")
    args = parser.parse_args()

    logger = setup_logger()

    with PathManager.open(args.input, "r") as f:
        predictions = json.load(f)

    pred_by_image = defaultdict(list)
    for p in predictions:
        pred_by_image[p["image_id"]].append(p)

    dicts = list(DatasetCatalog.get(args.dataset))
    metadata = MetadataCatalog.get(args.dataset)
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):

        def dataset_id_map(ds_id):
            return metadata.thing_dataset_id_to_contiguous_id[ds_id]

    elif "lvis" in args.dataset:
        # LVIS results are in the same format as COCO results, but have a different
예제 #15
0
def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta):
    """
    Args:
        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".

    Returns:
        list[dict]: a list of dicts in Detectron2 standard format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )
    """
    def _convert_category_id(segment_info, meta):
        if segment_info["category_id"] in meta[
                "thing_dataset_id_to_contiguous_id"]:
            segment_info["category_id"] = meta[
                "thing_dataset_id_to_contiguous_id"][
                    segment_info["category_id"]]
            segment_info["isthing"] = True
        else:
            segment_info["category_id"] = meta[
                "stuff_dataset_id_to_contiguous_id"][
                    segment_info["category_id"]]
            segment_info["isthing"] = False
        return segment_info

    with PathManager.open(json_file) as f:
        json_info = json.load(f)

    ret = []
    if not "fake" in gt_dir:
        for ann in json_info["annotations"]:
            image_id = int(ann["image_id"])
            # TODO: currently we assume image and label has the same filename but
            # different extension, and images have extension ".jpg" for COCO. Need
            # to make image extension a user-provided argument if we extend this
            # function to support other COCO-like datasets.
            image_file = os.path.join(
                image_dir,
                os.path.splitext(ann["file_name"])[0] + ".jpg")
            assert PathManager.isfile(image_file), image_file
            label_file = os.path.join(gt_dir, ann["file_name"])
            assert PathManager.isfile(label_file), label_file
            segments_info = [
                _convert_category_id(x, meta) for x in ann["segments_info"]
            ]
            ret.append({
                "file_name": image_file,
                "image_id": image_id,
                "pan_seg_file_name": label_file,
                "segments_info": segments_info,
            })
    else:
        print('Assuming fake labels dataset.')
        for img in json_info["images"]:
            image_id = int(img["id"])
            # TODO: currently we assume image and label has the same filename but
            # different extension, and images have extension ".jpg" for COCO. Need
            # to make image extension a user-provided argument if we extend this
            # function to support other COCO-like datasets.
            image_file = os.path.join(
                image_dir,
                os.path.splitext(img["file_name"])[0] + ".jpg")
            assert PathManager.isfile(image_file), image_file
            label_file = os.path.join(gt_dir,
                                      img["file_name"].replace(".jpg", ".png"))
            assert PathManager.isfile(label_file), label_file
            segments_info = [
            ]  #_convert_category_id(x, meta) for x in ann["segments_info"]]
            ret.append({
                "file_name": image_file,
                "image_id": image_id,
                "pan_seg_file_name": label_file,
                "segments_info": segments_info,
            })

    assert len(ret), f"No images found in {image_dir}!"
    assert PathManager.isfile(
        ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
    return ret
예제 #16
0
def evaluate_for_pix3d(
    predictions,
    dataset,
    metadata,
    filter_iou,
    mesh_models=None,
    iou_thresh=0.5,
    mask_thresh=0.5,
    device=None,
    vis_preds=False,
):
    from PIL import Image

    if device is None:
        device = torch.device("cpu")

    F1_TARGET = "[email protected]"

    # classes
    cat_ids = sorted(dataset.getCatIds())
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    # initialize tensors to record box & mask AP, number of gt positives
    box_apscores, box_aplabels = {}, {}
    mask_apscores, mask_aplabels = {}, {}
    mesh_apscores, mesh_aplabels = {}, {}
    npos = {}
    for cat_id in cat_ids:
        box_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        box_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mask_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mask_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mesh_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mesh_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        npos[cat_id] = 0.0
    box_covered = []
    mask_covered = []
    mesh_covered = []

    # number of gt positive instances per class
    for gt_ann in dataset.dataset["annotations"]:
        gt_label = gt_ann["category_id"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        image_file_name = dataset.loadImgs([gt_ann["image_id"]
                                            ])[0]["file_name"]
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue
        npos[gt_label] += 1.0

    for prediction in predictions:

        original_id = prediction["image_id"]
        image_width = dataset.loadImgs([original_id])[0]["width"]
        image_height = dataset.loadImgs([original_id])[0]["height"]
        image_size = [image_height, image_width]
        image_file_name = dataset.loadImgs([original_id])[0]["file_name"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue

        if "instances" not in prediction:
            continue

        num_img_preds = len(prediction["instances"])
        if num_img_preds == 0:
            continue

        # predictions
        scores = prediction["instances"].scores
        boxes = prediction["instances"].pred_boxes.to(device)
        labels = prediction["instances"].pred_classes
        masks_rles = prediction["instances"].pred_masks_rle
        if hasattr(prediction["instances"], "pred_meshes"):
            meshes = prediction["instances"].pred_meshes  # preditected meshes
            verts = [mesh[0] for mesh in meshes]
            faces = [mesh[1] for mesh in meshes]
            meshes = Meshes(verts=verts, faces=faces).to(device)
        else:
            meshes = ico_sphere(4, device)
            meshes = meshes.extend(num_img_preds).to(device)
        if hasattr(prediction["instances"], "pred_dz"):
            pred_dz = prediction["instances"].pred_dz
            heights = boxes.tensor[:, 3] - boxes.tensor[:, 1]
            # NOTE see appendix for derivation of pred dz
            pred_dz = pred_dz[:, 0] * heights.cpu()
        else:
            raise ValueError("Z range of box not predicted")
        assert prediction["instances"].image_size[0] == image_height
        assert prediction["instances"].image_size[1] == image_width

        # ground truth
        # anotations corresponding to original_id (aka coco image_id)
        gt_ann_ids = dataset.getAnnIds(imgIds=[original_id])
        assert len(
            gt_ann_ids) == 1  # note that pix3d has one annotation per image
        gt_anns = dataset.loadAnns(gt_ann_ids)[0]
        assert gt_anns["image_id"] == original_id

        # get original ground truth mask, box, label & mesh
        maskfile = os.path.join(metadata.image_root, gt_anns["segmentation"])
        with PathManager.open(maskfile, "rb") as f:
            gt_mask = torch.tensor(
                np.asarray(Image.open(f), dtype=np.float32) / 255.0)
        assert gt_mask.shape[0] == image_height and gt_mask.shape[
            1] == image_width

        gt_mask = (gt_mask > 0).to(dtype=torch.uint8)  # binarize mask
        gt_mask_rle = [
            mask_util.encode(np.array(gt_mask[:, :, None], order="F"))[0]
        ]
        gt_box = np.array(gt_anns["bbox"]).reshape(-1, 4)  # xywh from coco
        gt_box = BoxMode.convert(gt_box, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        gt_label = gt_anns["category_id"]
        faux_gt_targets = Boxes(
            torch.tensor(gt_box, dtype=torch.float32, device=device))

        # load gt mesh and extrinsics/intrinsics
        gt_R = torch.tensor(gt_anns["rot_mat"]).to(device)
        gt_t = torch.tensor(gt_anns["trans_mat"]).to(device)
        gt_K = torch.tensor(gt_anns["K"]).to(device)
        if mesh_models is not None:
            modeltype = gt_anns["model"]
            gt_verts, gt_faces = (
                mesh_models[modeltype][0].clone(),
                mesh_models[modeltype][1].clone(),
            )
            gt_verts = gt_verts.to(device)
            gt_faces = gt_faces.to(device)
        else:
            # load from disc
            raise NotImplementedError
        gt_verts = shape_utils.transform_verts(gt_verts, gt_R, gt_t)
        gt_zrange = torch.stack([gt_verts[:, 2].min(), gt_verts[:, 2].max()])
        gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces])

        # box iou
        boxiou = pairwise_iou(boxes, faux_gt_targets)

        # filter predictions with iou > filter_iou
        valid_pred_ids = boxiou > filter_iou

        # mask iou
        miou = mask_util.iou(masks_rles, gt_mask_rle, [0])

        # # gt zrange (zrange stores min_z and max_z)
        # # zranges = torch.stack([gt_zrange] * len(meshes), dim=0)

        # predicted zrange (= pred_dz)
        assert hasattr(prediction["instances"], "pred_dz")
        # It's impossible to predict the center location in Z (=tc)
        # from the image. See appendix for more.
        tc = (gt_zrange[1] + gt_zrange[0]) / 2.0
        # Given a center location (tc) and a focal_length,
        # pred_dz = pred_dz * box_h * tc / focal_length
        # See appendix for more.
        zranges = torch.stack(
            [
                torch.stack([
                    tc - tc * pred_dz[i] / 2.0 / gt_K[0],
                    tc + tc * pred_dz[i] / 2.0 / gt_K[0]
                ]) for i in range(len(meshes))
            ],
            dim=0,
        )

        gt_Ks = gt_K.view(1, 3).expand(len(meshes), 3)
        meshes = transform_meshes_to_camera_coord_system(
            meshes, boxes.tensor, zranges, gt_Ks, image_size)

        if vis_preds:
            vis_utils.visualize_predictions(
                original_id,
                image_file_name,
                scores,
                labels,
                boxes.tensor,
                masks_rles,
                meshes,
                metadata,
                "/tmp/output",
            )

        shape_metrics = compare_meshes(meshes, gt_mesh, reduce=False)

        # sort predictions in descending order
        scores_sorted, idx_sorted = torch.sort(scores, descending=True)

        for pred_id in range(num_img_preds):
            # remember we only evaluate the preds that have overlap more than
            # iou_filter with the ground truth prediction
            if valid_pred_ids[idx_sorted[pred_id], 0] == 0:
                continue
            # map to dataset category id
            pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]].item()]
            pred_miou = miou[idx_sorted[pred_id]].item()
            pred_biou = boxiou[idx_sorted[pred_id]].item()
            pred_score = scores[idx_sorted[pred_id]].view(1).to(device)
            # note that metrics returns f1 in % (=x100)
            pred_f1 = shape_metrics[F1_TARGET][
                idx_sorted[pred_id]].item() / 100.0

            # mask
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_miou > iou_thresh)
                    and (original_id not in mask_covered)):
                tpfp[0] = 1
                mask_covered.append(original_id)
            mask_apscores[pred_label].append(pred_score)
            mask_aplabels[pred_label].append(tpfp)

            # box
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_biou > iou_thresh)
                    and (original_id not in box_covered)):
                tpfp[0] = 1
                box_covered.append(original_id)
            box_apscores[pred_label].append(pred_score)
            box_aplabels[pred_label].append(tpfp)

            # mesh
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_f1 > iou_thresh)
                    and (original_id not in mesh_covered)):
                tpfp[0] = 1
                mesh_covered.append(original_id)
            mesh_apscores[pred_label].append(pred_score)
            mesh_aplabels[pred_label].append(tpfp)

    # check things for eval
    # assert npos.sum() == len(dataset.dataset["annotations"])
    # convert to tensors
    pix3d_metrics = {}
    boxap, maskap, meshap = 0.0, 0.0, 0.0
    valid = 0.0
    for cat_id in cat_ids:
        cat_name = dataset.loadCats([cat_id])[0]["name"]
        if npos[cat_id] == 0:
            continue
        valid += 1

        cat_box_ap = VOCap.compute_ap(torch.cat(box_apscores[cat_id]),
                                      torch.cat(box_aplabels[cat_id]),
                                      npos[cat_id])
        boxap += cat_box_ap
        pix3d_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap

        cat_mask_ap = VOCap.compute_ap(torch.cat(mask_apscores[cat_id]),
                                       torch.cat(mask_aplabels[cat_id]),
                                       npos[cat_id])
        maskap += cat_mask_ap
        pix3d_metrics["mask_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mask_ap

        cat_mesh_ap = VOCap.compute_ap(torch.cat(mesh_apscores[cat_id]),
                                       torch.cat(mesh_aplabels[cat_id]),
                                       npos[cat_id])
        meshap += cat_mesh_ap
        pix3d_metrics["mesh_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mesh_ap

    pix3d_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid
    pix3d_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid
    pix3d_metrics["mesh_ap@%.1f" % iou_thresh] = meshap / valid

    # print test ground truth
    vis_utils.print_instances_class_histogram(
        [npos[cat_id] for cat_id in cat_ids],  # number of instances
        [dataset.loadCats([cat_id])[0]["name"]
         for cat_id in cat_ids],  # class names
        pix3d_metrics,
    )

    return pix3d_metrics
예제 #17
0
def dump_cfg(cfg, path):
    if comm.is_main_process():
        with PathManager.open(path, "w") as f:
            f.write(cfg.dump())
        logger.info("Full config saved to {}".format(path))
예제 #18
0
    def _eval_predictions(self, predictions, img_ids=None):
        """
        Evaluate predictions. Fill self._results with the metrics of the tasks.
        """
        self._logger.info("Preparing results for COCO format ...")
        # list[dict] len=num_val_images
        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
        # "bbox" for detection 
        tasks = self._tasks or self._tasks_from_predictions(coco_results)
        ### 2021.3.1
        # unmap the category ids for COCO
        # 有 在__init__ 里面有设置
        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
            all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
            num_classes = len(all_contiguous_ids)
            assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
            # 反转id映射,把连续的类别id映射为原始(可能不连续的)的类别id
            reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
            for result in coco_results:
                category_id = result["category_id"]
                assert category_id < num_classes, (
                    f"A prediction has class={category_id}, "
                    f"but the dataset only has {num_classes} classes and "
                    f"predicted class id should be in [0, {num_classes - 1}]."
                )
                result["category_id"] = reverse_id_mapping[category_id]

        if self._output_dir:
            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            # 此时的coco_results: list[dict:{"image_id":,"category_id":, "bbox":, "score":}]
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(coco_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info(
            "Evaluating predictions with {} COCO API...".format(
                "unofficial" if self._use_fast_impl else "official"
            )
        )
        # 如果有多个任务,# bbox, segmentation, keypoints
        for task in sorted(tasks):

            coco_eval = (
                # 在coco_api 上评估 predictions
                # return: COCOeval 对象
                _evaluate_predictions_on_coco(
                    self._coco_api,
                    coco_results,
                    task,
                    kpt_oks_sigmas=self._kpt_oks_sigmas,
                    use_fast_impl=self._use_fast_impl,
                    img_ids=img_ids, # img_ids is None
                )
                if len(coco_results) > 0
                else None  # cocoapi does not handle empty results very well
            )
            """
            关于COCOeval 对象有:
            self.eval = {
            'params': p,# 保存参数配置
            'counts': [T, R, K, A, M], # number of threshold, recThres, class, areaRng, maxDets
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'precision': precision,[T,R,K,A,M]
            'recall':   recall, [T,K,A,M]
            'scores': scores, [T,R,K,A,M]
            }
            self.stats:list[12] # 保存了一些总结信息
            AP(default):iou=[0.5,1), recall=[0,1), num_class=80, area='all', maxdet=100
            0:AP(all)
            1:AP(iou>=0.5)
            2:AP(iou>=0.75)
            3:AP(area='small')
            4:AP(area='medium')
            5:AP(area='large')
            Recall(default):iou=[0.5,1),num_class=80, area='all', maxdet=100
            6:AR(maxdet=1)
            7:AR(maxdet=10)
            8:AR(maxdet=100)
            9:AR(area='small')
            10:AR(area='medium')
            11:AR(area='large')
            """
            res = self._derive_coco_results(
                coco_eval, task, class_names=self._metadata.get("thing_classes")
            )
            self._results[task] = res
예제 #19
0
파일: mesh.py 프로젝트: Oxyriser/detectron2
def load_mesh_auxiliary_data(fpath: str) -> Optional[torch.Tensor]:
    fpath_local = PathManager.get_local_path(fpath)
    with PathManager.open(fpath_local, "rb") as hFile:
        return torch.as_tensor(pickle.load(hFile), dtype=torch.float)
예제 #20
0
    if args.type == "instance":
        dicts = load_cityscapes_instances(args.image_dir,
                                          args.gt_dir,
                                          from_json=True,
                                          to_polygons=True)
        logger.info("Done loading {} samples.".format(len(dicts)))

        thing_classes = [
            k.name for k in labels if k.hasInstances and not k.ignoreInEval
        ]
        meta = Metadata().set(thing_classes=thing_classes)

    else:
        dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
        logger.info("Done loading {} samples.".format(len(dicts)))

        stuff_names = [k.name for k in labels if k.trainId != 255]
        stuff_colors = [k.color for k in labels if k.trainId != 255]
        meta = Metadata().set(stuff_names=stuff_names,
                              stuff_colors=stuff_colors)

    for d in dicts:
        img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
        visualizer = Visualizer(img, metadata=meta)
        vis = visualizer.draw_dataset_dict(d)
        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
        # cv2.waitKey()
        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
        vis.save(fpath)
예제 #21
0
def load_mesh_geodists(geodists_fpath: str) -> Optional[torch.Tensor]:
    geodists_fpath_local = PathManager.get_local_path(geodists_fpath, timeout_sec=600)
    with PathManager.open(geodists_fpath_local, "rb") as hFile:
        return torch.as_tensor(pickle.load(hFile), dtype=torch.float)
예제 #22
0
def _cityscapes_files_to_dict(files, from_json, to_polygons):
    """
    Parse cityscapes annotation files to a instance segmentation dataset dict.

    Args:
        files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
        from_json (bool): whether to read annotations from the raw json file or the png files.
        to_polygons (bool): whether to represent the segmentation as polygons
            (COCO's format) instead of masks (cityscapes's format).

    Returns:
        A dict in Detectron2 Dataset format.
    """
    from cityscapesscripts.helpers.labels import id2label, name2label

    image_file, instance_id_file, _, json_file = files

    annos = []

    if from_json:
        from shapely.geometry import MultiPolygon, Polygon

        with PathManager.open(json_file, "r") as f:
            jsonobj = json.load(f)
        ret = {
            "file_name": image_file,
            "image_id": os.path.basename(image_file),
            "height": jsonobj["imgHeight"],
            "width": jsonobj["imgWidth"],
        }

        # `polygons_union` contains the union of all valid polygons.
        polygons_union = Polygon()

        # CityscapesScripts draw the polygons in sequential order
        # and each polygon *overwrites* existing ones. See
        # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
        # We use reverse order, and each polygon *avoids* early ones.
        # This will resolve the ploygon overlaps in the same way as CityscapesScripts.
        for obj in jsonobj["objects"][::-1]:
            if "deleted" in obj:  # cityscapes data format specific
                continue
            label_name = obj["label"]

            try:
                label = name2label[label_name]
            except KeyError:
                if label_name.endswith("group"):  # crowd area
                    label = name2label[label_name[:-len("group")]]
                else:
                    raise
            if label.id < 0:  # cityscapes data format
                continue

            # Cityscapes's raw annotations uses integer coordinates
            # Therefore +0.5 here
            poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
            # CityscapesScript uses PIL.ImageDraw.polygon to rasterize
            # polygons for evaluation. This function operates in integer space
            # and draws each pixel whose center falls into the polygon.
            # Therefore it draws a polygon which is 0.5 "fatter" in expectation.
            # We therefore dilate the input polygon by 0.5 as our input.
            poly = Polygon(poly_coord).buffer(0.5, resolution=4)

            if not label.hasInstances or label.ignoreInEval:
                # even if we won't store the polygon it still contributes to overlaps resolution
                polygons_union = polygons_union.union(poly)
                continue

            # Take non-overlapping part of the polygon
            poly_wo_overlaps = poly.difference(polygons_union)
            if poly_wo_overlaps.is_empty:
                continue
            polygons_union = polygons_union.union(poly)

            anno = {}
            anno["iscrowd"] = label_name.endswith("group")
            anno["category_id"] = label.id

            if isinstance(poly_wo_overlaps, Polygon):
                poly_list = [poly_wo_overlaps]
            elif isinstance(poly_wo_overlaps, MultiPolygon):
                poly_list = poly_wo_overlaps.geoms
            else:
                raise NotImplementedError(
                    "Unknown geometric structure {}".format(poly_wo_overlaps))

            poly_coord = []
            for poly_el in poly_list:
                # COCO API can work only with exterior boundaries now, hence we store only them.
                # TODO: store both exterior and interior boundaries once other parts of the
                # codebase support holes in polygons.
                poly_coord.append(list(chain(*poly_el.exterior.coords)))
            anno["segmentation"] = poly_coord
            (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds

            anno["bbox"] = (xmin, ymin, xmax, ymax)
            anno["bbox_mode"] = BoxMode.XYXY_ABS

            annos.append(anno)
    else:
        # See also the official annotation parsing scripts at
        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py  # noqa
        with PathManager.open(instance_id_file, "rb") as f:
            inst_image = np.asarray(Image.open(f), order="F")
        # ids < 24 are stuff labels (filtering them first is about 5% faster)
        flattened_ids = np.unique(inst_image[inst_image >= 24])

        ret = {
            "file_name": image_file,
            "image_id": os.path.basename(image_file),
            "height": inst_image.shape[0],
            "width": inst_image.shape[1],
        }

        for instance_id in flattened_ids:
            # For non-crowd annotations, instance_id // 1000 is the label_id
            # Crowd annotations have <1000 instance ids
            label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
            label = id2label[label_id]
            if not label.hasInstances or label.ignoreInEval:
                continue

            anno = {}
            anno["iscrowd"] = instance_id < 1000
            anno["category_id"] = label.id

            mask = np.asarray(inst_image == instance_id,
                              dtype=np.uint8,
                              order="F")

            inds = np.nonzero(mask)
            ymin, ymax = inds[0].min(), inds[0].max()
            xmin, xmax = inds[1].min(), inds[1].max()
            anno["bbox"] = (xmin, ymin, xmax, ymax)
            if xmax <= xmin or ymax <= ymin:
                continue
            anno["bbox_mode"] = BoxMode.XYXY_ABS
            if to_polygons:
                # This conversion comes from D4809743 and D5171122,
                # when Mask-RCNN was first developed.
                contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
                                            cv2.CHAIN_APPROX_NONE)[-2]
                polygons = [
                    c.reshape(-1).tolist() for c in contours if len(c) >= 3
                ]
                # opencv's can produce invalid polygons
                if len(polygons) == 0:
                    continue
                anno["segmentation"] = polygons
            else:
                anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
            annos.append(anno)
    ret["annotations"] = annos
    return ret
예제 #23
0
 def _open_cfg(cls, filename):
     return PathManager.open(filename, "r")
예제 #24
0
    def draw_dataset_dict(self, dic):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [
                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
                if len(x["bbox"]) == 4
                else x["bbox"]
                for x in annos
            ]

            colors = None
            category_ids = [x["category_id"] for x in annos]
            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
                colors = [
                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
                    for c in category_ids
                ]
            names = self.metadata.get("thing_classes", None)
            labels = _create_text_labels(
                category_ids,
                scores=None,
                class_names=names,
                is_crowd=[x.get("iscrowd", 0) for x in annos],
            )
            self.overlay_instances(
                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
            )

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
                sem_seg = Image.open(f)
                sem_seg = np.asarray(sem_seg, dtype="uint8")
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)

        pan_seg = dic.get("pan_seg", None)
        if pan_seg is None and "pan_seg_file_name" in dic:
            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
                pan_seg = Image.open(f)
                pan_seg = np.asarray(pan_seg)
                from panopticapi.utils import rgb2id

                pan_seg = rgb2id(pan_seg)
        if pan_seg is not None:
            segments_info = dic["segments_info"]
            pan_seg = torch.tensor(pan_seg)
            self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5)
        return self.output
예제 #25
0
파일: api.py 프로젝트: yeonh2/d2go
def default_export_predictor(
    cfg, pytorch_model, predictor_type, output_dir, data_loader
):
    # The default implementation acts based on the PredictorExportConfig returned by
    # calling "prepare_for_export". It'll export all sub models in standard way
    # according to the "predictor_type".
    assert hasattr(pytorch_model, "prepare_for_export"), pytorch_model
    inputs = next(iter(data_loader))
    export_config = pytorch_model.prepare_for_export(cfg, inputs, predictor_type)
    model_inputs = (
        export_config.data_generator(inputs)
        if export_config.data_generator is not None
        else (inputs,)
    )

    predictor_path = os.path.join(output_dir, predictor_type)
    PathManager.mkdirs(predictor_path)

    predictor_init_kwargs = {
        "preprocess_info": export_config.preprocess_info,
        "postprocess_info": export_config.postprocess_info,
        "run_func_info": export_config.run_func_info,
    }

    if isinstance(export_config.model, dict):
        models_info = {}
        for name, model in export_config.model.items():
            save_path = os.path.join(predictor_path, name)
            model_info = _export_single_model(
                predictor_path=predictor_path,
                model=model,
                input_args=model_inputs[name] if model_inputs is not None else None,
                save_path=save_path,
                model_export_method=(
                    predictor_type
                    if export_config.model_export_method is None
                    else export_config.model_export_method[name]
                ),
                model_export_kwargs=(
                    {}
                    if export_config.model_export_kwargs is None
                    else export_config.model_export_kwargs[name]
                ),
            )
            models_info[name] = model_info
        predictor_init_kwargs["models"] = models_info
    else:
        save_path = predictor_path  # for single model exported files are put under `predictor_path` together with predictor_info.json
        model_info = _export_single_model(
            predictor_path=predictor_path,
            model=export_config.model,
            input_args=model_inputs,
            save_path=save_path,
            model_export_method=export_config.model_export_method or predictor_type,
            model_export_kwargs=export_config.model_export_kwargs or {},
        )
        predictor_init_kwargs["model"] = model_info

    # assemble predictor
    predictor_info = PredictorInfo(**predictor_init_kwargs)
    with PathManager.open(
        os.path.join(predictor_path, "predictor_info.json"), "w"
    ) as f:
        json.dump(predictor_info.to_dict(), f, indent=4)

    return predictor_path
예제 #26
0
파일: logger.py 프로젝트: njumagus/VROID
def _cached_log_stream(filename):
    io = PathManager.open(filename, "a")
    atexit.register(io.close)
    return io
예제 #27
0
def read_keyframes(
    video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0
) -> FrameList:  # pyre-ignore[11]
    """
    Reads keyframe data from a video file.

    Args:
        video_fpath (str): Video file path
        keyframes (List[int]): List of keyframe timestamps (as counts in
            timebase units to be used in container seek operations)
        video_stream_idx (int): Video stream index (default: 0)
    Returns:
        List[Frame]: list of frames that correspond to the specified timestamps
    """
    try:
        with PathManager.open(video_fpath, "rb") as io:
            container = av.open(io)
            stream = container.streams.video[video_stream_idx]
            frames = []
            for pts in keyframes:
                try:
                    container.seek(pts, any_frame=False, stream=stream)
                    frame = next(container.decode(video=0))
                    frames.append(frame)
                except av.AVError as e:
                    logger = logging.getLogger(__name__)
                    logger.warning(
                        f"Read keyframes: Error seeking video file {video_fpath}, "
                        f"video stream {video_stream_idx}, pts {pts}, AV error: {e}"
                    )
                    container.close()
                    return frames
                except OSError as e:
                    logger = logging.getLogger(__name__)
                    logger.warning(
                        f"Read keyframes: Error seeking video file {video_fpath}, "
                        f"video stream {video_stream_idx}, pts {pts}, OS error: {e}"
                    )
                    container.close()
                    return frames
                except StopIteration:
                    logger = logging.getLogger(__name__)
                    logger.warning(
                        f"Read keyframes: Error decoding frame from {video_fpath}, "
                        f"video stream {video_stream_idx}, pts {pts}"
                    )
                    container.close()
                    return frames

            container.close()
            return frames
    except OSError as e:
        logger = logging.getLogger(__name__)
        logger.warning(
            f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}"
        )
    except RuntimeError as e:
        logger = logging.getLogger(__name__)
        logger.warning(
            f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}"
        )
    return []
예제 #28
0
def main(args):
    # utils.init_distributed_mode(args)

    if args.frozen_weights is not None:
        assert args.masks, "Frozen training is meant for segmentation only"
    print(args)

    device = torch.device(args.device)

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    model, criterion, postprocessors = build_model(args)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module
    n_parameters = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
    print("number of params:", n_parameters)

    param_dicts = [
        {
            "params": [
                p for n, p in model_without_ddp.named_parameters()
                if "backbone" not in n and p.requires_grad
            ]
        },
        {
            "params": [
                p for n, p in model_without_ddp.named_parameters()
                if "backbone" in n and p.requires_grad
            ],
            "lr":
            args.lr_backbone,
        },
    ]
    optimizer = torch.optim.AdamW(param_dicts,
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

    dataset_train = build_dataset(image_set="train", args=args)
    dataset_val = build_dataset(image_set="val", args=args)

    if args.distributed:
        sampler_train = DistributedSampler(dataset_train)
        sampler_val = DistributedSampler(dataset_val, shuffle=False)
    else:
        sampler_train = torch.utils.data.RandomSampler(dataset_train)
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(sampler_train,
                                                        args.batch_size,
                                                        drop_last=True)

    data_loader_train = DataLoader(
        dataset_train,
        batch_sampler=batch_sampler_train,
        collate_fn=utils.collate_fn,
        num_workers=args.num_workers,
    )
    data_loader_val = DataLoader(
        dataset_val,
        args.batch_size,
        sampler=sampler_val,
        drop_last=False,
        collate_fn=utils.collate_fn,
        num_workers=args.num_workers,
    )

    if args.dataset_file == "coco_panoptic":
        # We also evaluate AP during panoptic training, on original coco DS
        coco_val = datasets.coco.build("val", args)
        base_ds = get_coco_api_from_dataset(coco_val)
    else:
        base_ds = get_coco_api_from_dataset(dataset_val)

    if args.frozen_weights is not None:
        checkpoint = torch.load(args.frozen_weights, map_location="cpu")
        model_without_ddp.detr.load_state_dict(checkpoint["model"])

    if args.resume:
        if args.resume.startswith("https"):
            checkpoint = torch.hub.load_state_dict_from_url(args.resume,
                                                            map_location="cpu",
                                                            check_hash=True)
        else:
            checkpoint = torch.load(args.resume, map_location="cpu")
        model_without_ddp.load_state_dict(checkpoint["model"])
        if (not args.eval and "optimizer" in checkpoint
                and "lr_scheduler" in checkpoint and "epoch" in checkpoint):
            optimizer.load_state_dict(checkpoint["optimizer"])
            lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
            args.start_epoch = checkpoint["epoch"] + 1

    if args.eval:
        test_stats, coco_evaluator = evaluate(
            model,
            criterion,
            postprocessors,
            data_loader_val,
            base_ds,
            device,
            args.output_dir,
        )
        if args.output_dir:
            with PathManager.open(os.path.join(args.output_dir, "eval.pth"),
                                  "wb") as f:
                utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            sampler_train.set_epoch(epoch)
        train_stats = train_one_epoch(
            model,
            criterion,
            data_loader_train,
            optimizer,
            device,
            epoch,
            args.clip_max_norm,
        )
        lr_scheduler.step()
        if args.output_dir:
            checkpoint_paths = [
            ]  # os.path.join(args.output_dir, 'checkpoint.pth')]
            # extra checkpoint before LR drop and every 10 epochs
            if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0:
                checkpoint_paths.append(
                    os.path.join(args.output_dir, f"checkpoint{epoch:04}.pth"))
            for checkpoint_path in checkpoint_paths:
                with PathManager.open(checkpoint_path, "wb") as f:
                    if args.gpu == 0 and args.machine_rank == 0:
                        utils.save_on_master(
                            {
                                "model": model_without_ddp.state_dict(),
                                "optimizer": optimizer.state_dict(),
                                "lr_scheduler": lr_scheduler.state_dict(),
                                "epoch": epoch,
                                "args": args,
                            },
                            f,
                        )

        test_stats, coco_evaluator = evaluate(
            model,
            criterion,
            postprocessors,
            data_loader_val,
            base_ds,
            device,
            args.output_dir,
        )

        log_stats = {
            **{f"train_{k}": v
               for k, v in train_stats.items()},
            **{f"test_{k}": v
               for k, v in test_stats.items()},
            "epoch": epoch,
            "n_parameters": n_parameters,
        }

        if args.output_dir and utils.is_main_process():
            with PathManager.open(os.path.join(args.output_dir, "log.txt"),
                                  "w") as f:
                f.write(json.dumps(log_stats) + "\n")

            # for evaluation logs
            if coco_evaluator is not None:
                PathManager.mkdirs(os.path.join(args.output_dir, "eval"))
                if "bbox" in coco_evaluator.coco_eval:
                    filenames = ["latest.pth"]
                    if epoch % 50 == 0:
                        filenames.append(f"{epoch:03}.pth")
                    for name in filenames:
                        with PathManager.open(
                                os.path.join(args.output_dir, "eval", name),
                                "wb") as f:
                            torch.save(coco_evaluator.coco_eval["bbox"].eval,
                                       f)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))
예제 #29
0
def dump_torchscript_IR(model, dir):
    """
    Dump IR of a TracedModule/ScriptModule at various levels.
    Useful for debugging.

    Args:
        model (TracedModule or ScriptModule): traced or scripted module
        dir (str): output directory to dump files.
    """
    PathManager.mkdirs(dir)

    def _get_script_mod(mod):
        if isinstance(mod, torch.jit.TracedModule):
            return mod._actual_script_module
        return mod

    # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
    with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:

        def get_code(mod):
            # Try a few ways to get code using private attributes.
            try:
                # This contains more information than just `mod.code`
                return _get_script_mod(mod)._c.code
            except AttributeError:
                pass
            try:
                return mod.code
            except AttributeError:
                return None

        def dump_code(prefix, mod):
            code = get_code(mod)
            name = prefix or "root model"
            if code is None:
                f.write(f"Could not found code for {name} (type={mod.original_name})\n")
                f.write("\n")
            else:
                f.write(f"\nCode for {name}, type={mod.original_name}:\n")
                f.write(code)
                f.write("\n")
                f.write("-" * 80)

            for name, m in mod.named_children():
                dump_code(prefix + "." + name, m)

        dump_code("", model)

    # Recursively dump IR of all modules
    with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
        try:
            f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
        except AttributeError:
            pass

    # Dump IR of the entire graph (all submodules inlined)
    with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
        f.write(str(model.inlined_graph))

    # Dump the model structure in pytorch style
    with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
        f.write(str(model))
예제 #30
0
    def evaluate(self):
        comm.synchronize()
        self._predictions = comm.gather(self._predictions)
        self._predictions = list(itertools.chain(*self._predictions))
        if not comm.is_main_process():
            return

        # PanopticApi requires local files
        gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
        gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)

        with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
            logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
            for p in self._predictions:
                with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
                    f.write(p.pop("png_string"))

            with open(gt_json, "r") as f:
                json_data = json.load(f)
            json_data["annotations"] = self._predictions
            with PathManager.open(self._predictions_json, "w") as f:
                f.write(json.dumps(json_data))

            from panopticapi.evaluation import pq_compute
            import string
            import random
            import shutil
            rand_str = lambda n: ''.join([random.choice(string.ascii_lowercase) for i in range(n)])
            results_dir = None
            # while results_dir is None or os.path.exists(results_dir):
            #     results_dir = '/BS/ahmed_projects/work/data/panoptic_eval/' +  rand_str(10)
            # logger.info("Writing all panoptic predictions for future use to {} ...\n These files can be used to get results from evaluation server. Tested on COCO and Cityscapes. ".format(results_dir))
            # shutil.copytree(os.path.dirname(self._predictions_json), results_dir)
            # png_path = os.path.join(results_dir, os.path.splitext(os.path.basename(self._predictions_json))[0])
            # shutil.copytree(pred_dir, png_path)

            with contextlib.redirect_stdout(io.StringIO()):
                pq_res, pq_per_image_res = pq_compute(
                    gt_json,
                    PathManager.get_local_path(self._predictions_json),
                    gt_folder=gt_folder,
                    pred_folder=pred_dir,
                )

        res = {}
        res["PQ"] = 100 * pq_res["All"]["pq"]
        res["SQ"] = 100 * pq_res["All"]["sq"]
        res["RQ"] = 100 * pq_res["All"]["rq"]
        res["PQ_th"] = 100 * pq_res["Things"]["pq"]
        res["SQ_th"] = 100 * pq_res["Things"]["sq"]
        res["RQ_th"] = 100 * pq_res["Things"]["rq"]
        res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
        res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
        res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
        
        results = OrderedDict({"panoptic_seg": res})
        # Convert class ids to names:
        per_class = pq_res['per_class']
        new_per_class = {}
        for label in per_class.keys():
            isthing = label in self._metadata.thing_dataset_id_to_contiguous_id.keys()
            if isthing:
                class_name = self._metadata.thing_classes[self._metadata.thing_dataset_id_to_contiguous_id[label]]
            else:
                class_name = self._metadata.stuff_classes[self._metadata.stuff_dataset_id_to_contiguous_id[label]]
            new_per_class[class_name] = per_class[label]
        pq_res['per_class'] = new_per_class
        _print_panoptic_results(pq_res)
        _print_panoptic_results_per_image(pq_per_image_res)
        return results