Ejemplo n.º 1
0
def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
    files = []
    # scan through the directory
    cities = PathManager.ls(image_dir)
    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
    image_dict = {}
    for city in cities:
        city_img_dir = os.path.join(image_dir, city)
        for basename in PathManager.ls(city_img_dir):
            image_file = os.path.join(city_img_dir, basename)

            suffix = "_leftImg8bit.png"
            assert basename.endswith(suffix), basename
            basename = os.path.basename(basename)[: -len(suffix)]

            image_dict[basename] = image_file

    for ann in json_info["annotations"]:
        image_file = image_dict.get(ann["image_id"], None)
        assert image_file is not None, "No image {} found for annotation {}".format(
            ann["image_id"], ann["file_name"]
        )
        label_file = os.path.join(gt_dir, ann["file_name"])
        segments_info = ann["segments_info"]

        files.append((image_file, label_file, segments_info))

    assert len(files), "No images found in {}".format(image_dir)
    assert PathManager.isfile(files[0][0]), files[0][0]
    assert PathManager.isfile(files[0][1]), files[0][1]
    return files
Ejemplo n.º 2
0
def get_cityscapes_files(image_dir, gt_dir):
    files = []
    # scan through the directory
    cities = PathManager.ls(image_dir)
    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
    for city in cities:
        city_img_dir = os.path.join(image_dir, city)
        city_gt_dir = os.path.join(gt_dir, city)
        for basename in PathManager.ls(city_img_dir):
            image_file = os.path.join(city_img_dir, basename)

            suffix = "leftImg8bit.png"
            assert basename.endswith(suffix)
            basename = basename[:-len(suffix)]

            instance_file = os.path.join(city_gt_dir,
                                         basename + "gtFine_instanceIds.png")
            label_file = os.path.join(city_gt_dir,
                                      basename + "gtFine_labelIds.png")
            json_file = os.path.join(city_gt_dir,
                                     basename + "gtFine_polygons.json")

            files.append((image_file, instance_file, label_file, json_file))
    assert len(files), "No images found in {}".format(image_dir)
    for f in files[0]:
        assert PathManager.isfile(f), f
    return files
Ejemplo n.º 3
0
    def merge(self):
        """merge all clip features of a video into one/several 
           fix-size matrix(es)
        """
        if not PathManager.exists(self.merge_dir):
            PathManager.mkdirs(self.merge_dir)

        for video_name in PathManager.ls(self.save_dir):
            video_dir = os.path.join(self.save_dir, video_name)
            num_feats = len(PathManager.ls(video_dir))

            if self.min_length <= num_feats <= self.max_length:
                merged_feat = torch.zeros((num_feats, self.dim),
                                          dtype=torch.float32)

                for clip_idx in range(num_feats):
                    feat = torch.load(
                        os.path.join(video_dir, f'{clip_idx}.pth'))
                    merged_feat[clip_idx, :] = torch.from_numpy(feat)

                torch.save(merged_feat,
                           os.path.join(self.merge_dir, f'{video_name}.pth'))
            else:
                # TODO
                print(video_name)
Ejemplo n.º 4
0
    def test_bad_args(self) -> None:
        with self.assertRaises(NotImplementedError):
            PathManager.copy(
                self._remote_uri,
                self._remote_uri,
                foo="foo"  # type: ignore
            )
        with self.assertRaises(NotImplementedError):
            PathManager.exists(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.get_local_path(
                self._remote_uri,
                foo="foo"  # type: ignore
            )
        with self.assertRaises(NotImplementedError):
            PathManager.isdir(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.isfile(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.ls(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.mkdirs(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.open(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.rm(self._remote_uri, foo="foo")  # type: ignore

        PathManager.set_strict_kwargs_checking(False)

        PathManager.get_local_path(self._remote_uri, foo="foo")  # type: ignore
        f = PathManager.open(self._remote_uri, foo="foo")  # type: ignore
        f.close()
        PathManager.set_strict_kwargs_checking(True)
Ejemplo n.º 5
0
def load_pan_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
    # We match input images with ground truth based on their relative filepaths (without file
    # extensions) starting from 'image_root' and 'gt_root' respectively.
    def file2id(folder_path, file_path):
        # extract relative path starting from `folder_path`
        image_id = os.path.normpath(
            os.path.relpath(file_path, start=folder_path))
        # remove file extension
        image_id = os.path.splitext(image_id)[0]
        return image_id

    input_files = sorted(
        (os.path.join(image_root, f)
         for f in PathManager.ls(image_root) if f.endswith(image_ext)),
        key=lambda file_path: file2id(image_root, file_path),
    )
    gt_files = sorted(
        (os.path.join(gt_root, f)
         for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
        key=lambda file_path: file2id(gt_root, file_path),
    )

    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)

    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
    if len(input_files) != len(gt_files):
        logger.warn(
            "Directory {} and {} has {} and {} files, respectively.".format(
                image_root, gt_root, len(input_files), len(gt_files)))
        input_basenames = [
            os.path.basename(f)[:-len(image_ext)] for f in input_files
        ]
        gt_basenames = [os.path.basename(f)[:-len(gt_ext)] for f in gt_files]
        intersect = list(set(input_basenames) & set(gt_basenames))
        # sort, otherwise each worker may obtain a list[dict] in different order
        intersect = sorted(intersect)
        logger.warn("Will use their intersection of {} files.".format(
            len(intersect)))
        input_files = [
            os.path.join(image_root, f + image_ext) for f in intersect
        ]
        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]

    logger.info("Loaded {} images with semantic segmentation from {}".format(
        len(input_files), image_root))

    dataset_dicts = []
    for (img_path, gt_path) in zip(input_files, gt_files):
        record = {}
        record["file_name"] = img_path
        record["pan_seg_file_name"] = gt_path
        dataset_dicts.append(record)

    return dataset_dicts
Ejemplo n.º 6
0
    def get_shard_file_names(input_dir: str, split: str,
                             layer: str) -> List[ExtractedFeaturesShardPaths]:
        """
        Get the list of files needed to load the extracted features
        """

        # List all the files that are containing the features for a given
        # dataset split and a given layer
        feature_regex = re.compile(rf"(.*)_{split}_{layer}_features.npy")
        prefixes = []
        for file_path in PathManager.ls(input_dir):
            match = feature_regex.match(file_path)
            if match is not None:
                prefixes.append(match.group(1))

        # Yield all the files needed to merge the features dumped on
        # the different GPUs
        shard_paths = []
        for prefix in prefixes:
            feat_file = os.path.join(input_dir,
                                     f"{prefix}_{split}_{layer}_features.npy")
            targets_file = os.path.join(
                input_dir, f"{prefix}_{split}_{layer}_targets.npy")
            indices_file = os.path.join(input_dir,
                                        f"{prefix}_{split}_{layer}_inds.npy")
            shard_paths.append(
                ExtractedFeaturesShardPaths(
                    feature_file=feat_file,
                    targets_file=targets_file,
                    indices_file=indices_file,
                ))
        return shard_paths
Ejemplo n.º 7
0
def get_filelist_labels_images_paths(input_path):
    dataset_summary, metadata = {}, {}
    img_paths, gender_labels, race_labels, age_labels = [], [], [], []
    inp_image_names = PathManager.ls(input_path)
    print(f"{len(inp_image_names)} images found.")

    total_examples = 0
    # Populate the img_paths and labels labels based on folder file structure.
    for img_name in inp_image_names:
        if not img_name.endswith(".jpg"):
            continue
        img_path = os.path.join(input_path, img_name)
        img_paths.append(img_path)

        img_age = int(str(img_name).split("_")[0])
        img_gender = GENDER_MAPPING[int(str(img_name).split("_")[1])]
        img_race = RACE_MAPPING[int(str(img_name).split("_")[2])]
        # import pdb; pdb.set_trace()
        age_labels.append(img_age)
        gender_labels.append(img_gender)
        race_labels.append(img_race)
        metadata[img_name] = {
            "age": img_age,
            "gender": img_gender,
            "race": img_race,
        }
        total_examples += 1

    # print the dataset summary
    print(f"Dataset has {total_examples} images")
    dataset_summary["num_images"] = total_examples
    return dataset_summary, metadata, img_paths, age_labels, gender_labels, race_labels
Ejemplo n.º 8
0
    def test_bad_args(self) -> None:
        # TODO (T58240718): Replace with dynamic checks
        with self.assertRaises(ValueError):
            PathManager.copy(
                self._tmpfile,
                self._tmpfile,
                foo="foo"  # type: ignore
            )
        with self.assertRaises(ValueError):
            PathManager.exists(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.get_local_path(self._tmpfile,
                                       foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.isdir(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.isfile(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.ls(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.mkdirs(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.open(self._tmpfile, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.rm(self._tmpfile, foo="foo")  # type: ignore

        PathManager.set_strict_kwargs_checking(False)

        PathManager.copy(
            self._tmpfile,
            self._tmpfile,
            foo="foo"  # type: ignore
        )
        PathManager.exists(self._tmpfile, foo="foo")  # type: ignore
        PathManager.get_local_path(self._tmpfile, foo="foo")  # type: ignore
        PathManager.isdir(self._tmpfile, foo="foo")  # type: ignore
        PathManager.isfile(self._tmpfile, foo="foo")  # type: ignore
        PathManager.ls(self._tmpdir, foo="foo")  # type: ignore
        PathManager.mkdirs(self._tmpdir, foo="foo")  # type: ignore
        f = PathManager.open(self._tmpfile, foo="foo")  # type: ignore
        f.close()
        # pyre-ignore
        with open(os.path.join(self._tmpdir, "test_rm.txt"), "w") as f:
            rm_file = f.name
            f.write(self._tmpfile_contents)
            f.flush()
        PathManager.rm(rm_file, foo="foo")  # type: ignore
Ejemplo n.º 9
0
def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
    def file2id(folder_path, file_path):
        image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
        image_id = os.path.splitext(image_id)[0]
        return image_id

    input_files = sorted(
        (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
        key=lambda file_path: file2id(image_root, file_path),
    )
    gt_files = sorted(
        (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
        key=lambda file_path: file2id(gt_root, file_path),
    )

    assert len(gt_files) > 0, f"No annotations found in {gt_root}."

    if len(input_files) != len(gt_files):
        logger.warn(
            "Directory {} and {} has {} and {} files, respectively.".format(
                image_root,
                gt_root,
                len(input_files),
                len(gt_files)
            )
        )
        input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
        gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
        intersect = list(set(input_basenames) & set(gt_basenames))
        intersect = sorted(intersect)
        logger.warn(f"Will use their intersection of {len(intersect)} files.")
        input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]

    logger.info(
        f"Loaded {len(input_files)} images with semantic segmentation from {image_root}"
    )

    dataset_dicts = []
    for (img_path, gt_path) in zip(input_files, gt_files):
        record = {}
        record["file_name"] = img_path
        record["sem_seg_file_name"] = gt_path
        dataset_dicts.append(record)

    return dataset_dicts
Ejemplo n.º 10
0
def bias_pascal_voc(
    dirname: str,
    noise_ratio: float,
    bias_rule: Dict[str, str]
):
    """
    Add Noise to Pascal VOC detection annotations.

    Args:
        dirname: Contain "Annotations", "ImageSets", "JPEGImages"
        noise_ratio: Noise ratio of biased annotations
        bias_rule: Assymetric mislabel rules between classes 
    """

    annotation_dirname = PathManager.get_local_path(
        os.path.join(dirname, "Annotations/")
    )

    annotation_files = np.array(PathManager.ls(annotation_dirname))
    
    num_biased_files = round(len(annotation_files) * noise_ratio)
    np.random.shuffle(annotation_files)
    biased_files = set(annotation_files[:num_biased_files])

    bias_stats = dict.fromkeys(["total", "mislabeled", "skipped"], 0)

    for filename in tqdm(annotation_files):
        anno_file = os.path.join(annotation_dirname, filename)

        with PathManager.open(anno_file) as f:
            tree = ET.parse(f)
        
        instances = tree.findall("object")
        num_instances = len(instances)
        bias_stats['total'] += num_instances
       
        if filename in biased_files:
            mislabel_ratio = round(num_instances*0.7)
            np.random.shuffle(instances)
            biased_instances = instances[:mislabel_ratio]
            for instance in biased_instances:
                cls_name = instance.find("name")
                
                if cls_name.text in bias_rule.keys():
                    biased_cls_name = bias_rule[cls_name.text]
                    cls_name.text = biased_cls_name
                    mislabel_attr = ET.SubElement(instance, "mislabeled")
                    mislabel_attr.text = '1'
                    bias_stats['mislabeled'] += 1
                else:
                    tree.getroot().remove(instance)
                    bias_stats['skipped'] +=1
                  
            tree.write(anno_file)


    return bias_stats
Ejemplo n.º 11
0
def has_checkpoint(path_to_job):
    """
    Determines if the given directory contains a checkpoint.
    Args:
        path_to_job (string): the path to the folder of the current job.
    """
    d = get_checkpoint_dir(path_to_job)
    files = PathManager.ls(d) if PathManager.exists(d) else []
    return any("checkpoint" in f for f in files)
def has_checkpoint(path_to_checkpoint):
    """
    check if checkpoint exist
    :param path_to_checkpoint:
    :return:
    """
    d = get_checkpoint_dir(path_to_checkpoint)
    files = PathManager.ls(d) if PathManager.exists(d) else []

    return any("checkpoint" in f for f in files)
Ejemplo n.º 13
0
    def _get_filenames(self, data_path: str):
        fnames = []

        for fname in sorted(PathManager.ls(data_path)):
            # Only put images in fnames.
            if not fname.endswith(".jpg"):
                continue

            full_fname = os.path.join(data_path, fname)
            fnames.append(full_fname)

        return np.array(fnames)
Ejemplo n.º 14
0
def get_checkpoint_resume_files(
    checkpoint_folder: str,
    config: AttrDict,
    skip_final: bool = False,
    latest_checkpoint_resume_num: int = 1,
):
    """
    Get the checkpoint file from which the model should be resumed. We look at all
    the checkpoints in the checkpoint_folder and if the final model checkpoint exists
    (starts with `model_final_`) and not overriding it, then return the final
    checkpoint. Otherwise find the latest checkpoint.

    Args:
        checkpoint_folder (str): path to the checkpoint folder.
        config (AttrDict): root config
        skip_final (bool): whether the final model checkpoint should be skipped or not
        latest_checkpoint_resume_num (int): what Nth latest checkpoint to resume from.
                   Sometimes the latest checkpoints could be corrupt so this option
                   helps to resume from instead a few checkpoints before the last checkpoint.
    """
    all_files = PathManager.ls(checkpoint_folder)
    all_iters = []
    replace_prefix = "model_phase"
    # if we checkpoint at iterations too, we start from an iteration checkpoint
    # since that's latest than the phase end checkpoint. Sometimes, it's also
    # possible that there is no phase.
    if config.CHECKPOINT.CHECKPOINT_ITER_FREQUENCY > 0:
        replace_prefix = "model_iteration"

    for f in all_files:
        # if we have the finished training, we pick the finished training file
        # the checkpoint is saved as "model_final_checkpoint". Otherwise, we pick
        # the latest phase checkpoint
        if "model_final" in f and not skip_final:
            return f
        if replace_prefix in f:
            iter_num = f.replace(".torch", "").replace(replace_prefix, "")
            if iter_num.isdigit():
                all_iters.append(int(iter_num))

    # make sure the checkpoint resume number is in bounds
    checkpoint_resume_num = max(0, latest_checkpoint_resume_num - 1)
    # len(all_iters) - 1 is the last index, checkpoint_resume_num can't be beyond that.
    checkpoint_resume_num = min(len(all_iters) - 1, checkpoint_resume_num)
    logging.info(f"checkpoint_resume_num: {checkpoint_resume_num}")
    if len(all_iters) > 0:
        all_iters.sort(reverse=True)
        last_iter = int(all_iters[checkpoint_resume_num])
        filename = f"{replace_prefix}{last_iter}.torch"
        return filename
    else:
        return None
Ejemplo n.º 15
0
 def get_all_checkpoint_files(self) -> List[str]:
     """
     Returns:
         list: All available checkpoint files (.pth files) in target
             directory.
     """
     all_model_checkpoints = [
         os.path.join(self.save_dir, file)
         for file in PathManager.ls(self.save_dir)
         if PathManager.isfile(os.path.join(self.save_dir, file))
         and file.endswith(".pth")
     ]
     return all_model_checkpoints
Ejemplo n.º 16
0
    def test_ls(self):
        # Create some files in the tempdir to ls out.
        root_dir = os.path.join(self._tmpdir, "ls")
        os.makedirs(root_dir, exist_ok=True)
        files = sorted(["foo.txt", "bar.txt", "baz.txt"])
        for f in files:
            open(os.path.join(root_dir, f), "a").close()

        children = sorted(PathManager.ls(root_dir))
        self.assertListEqual(children, files)

        # Cleanup the tempdir
        shutil.rmtree(root_dir)
Ejemplo n.º 17
0
def get_last_checkpoint(path_to_job):
    """
    Get the last checkpoint from the checkpointing folder.
    Args:
        path_to_job (string): the path to the folder of the current job.
    """

    d = get_checkpoint_dir(path_to_job)
    names = PathManager.ls(d) if PathManager.exists(d) else []
    names = [f for f in names if "checkpoint" in f]
    assert len(names), "No checkpoints found in '{}'.".format(d)
    # Sort the checkpoints by epoch.
    name = sorted(names)[-1]
    return os.path.join(d, name)
def get_special_checkpoint(path_to_checkpoint, special_epoch):
    """
    get one special checkpoint
    :param path_to_checkpoint:
    :return:
    """

    d = get_checkpoint_dir(path_to_checkpoint)

    names = PathManager.ls(d) if PathManager.exists(d) else []
    special_name = "checkpoint_epoch_{:.05d}.pyth".format(special_epoch)
    names = [f for f in names if special_name in f]
    name = names[0]
    logger.info("load mode in special epoch : {}".format(os.path.join(d,
                                                                      name)))
    return os.path.join(d, name)
Ejemplo n.º 19
0
def has_final_checkpoint(checkpoint_folder: str,
                         final_checkpoint_pattern: str = "model_final"):
    """
    Check whether the final checkpoint exists in the checkpoint folder. The
    final checkpoint is recognized by the prefix "model_final_" in VISSL.

    Args:
        checkpoint_folder (str): path to the checkpoint folder.
        final_checkpoint_pattern (str): what prefix is used to save the final checkpoint.

    Returns:
        has_final_checkpoint: whether the final checkpoint exists or not
    """
    checkpointed_files = PathManager.ls(checkpoint_folder)
    torch_files = filter(lambda x: x.endswith(".torch"), checkpointed_files)
    final_files = filter(lambda x: final_checkpoint_pattern in x, torch_files)
    return len(list(final_files)) > 0
Ejemplo n.º 20
0
def has_checkpoint(checkpoint_folder: str, skip_final: bool = False):
    """
    Check whether there are any checkpoints at all in the checkpoint folder.

    Args:
        checkpoint_folder (str): path to the checkpoint folder
        skip_final (bool): if the checkpoint with `model_final_` prefix exist, whether
                           to skip it and train.

    Returns:
        checkpoint_exists (bool): whether checkpoint exists or not
    """
    checkpointed_files = PathManager.ls(checkpoint_folder)
    checkpoint_exists = False
    for f in checkpointed_files:
        if f.endswith(".torch") and ("model_final" not in f or not skip_final):
            checkpoint_exists = True
            break
    return checkpoint_exists
Ejemplo n.º 21
0
    def _construct_loader(self):
        """
        Construct the video loader.
        """
        # TODO: merge into the annotation file, and modify the action duration.
        with open(os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR,
                               "duration.json")) as f:
            duration_dict = json.load(f)

        self.raw_video_path = os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR,
                                           'raw')
        _video_names = PathManager.ls(self.raw_video_path)
        _video_durations = [duration_dict[k] for k in _video_names]

        self._video_names = list()
        self._clip_idx = list()
        self._video_durations = list()
        for i, duration in enumerate(_video_durations):
            if duration >= self._clip_size:
                _num_clips = int(duration - self._clip_size) + 1
                for j in range(_num_clips):
                    self._video_names.append(_video_names[i])
                    self._clip_idx.append(j)
                    self._video_durations.append(duration)
Ejemplo n.º 22
0
    return parser


if __name__ == "__main__":
    """
    Example usage:

    buck-out/gen/deeplearning/projects/ssl_framework/extra_scripts/fb/convert_folder_to_filelist.par \  # NOQA
        -i "manifold://ssl_framework/tree/datasets/food_101/" \
        -o "manifold://ssl_framework/tree/datasets/food_101/"
    """
    args = get_argument_parser().parse_args()

    setup_path_manager()

    splits = PathManager.ls(args.input)
    print(f"The following splits are found: { ','.join(splits) }")

    dataset_summary = {}

    for split in ["train", "trainval", "val", "test"]:
        if not PathManager.exists(os.path.join(args.input, split)):
            continue

        dataset_summary[split] = {}
        img_paths = []
        img_labels = []

        split_path = os.path.join(args.input, split)
        label_paths = PathManager.ls(split_path)
        dataset_summary[split]["labels"] = label_paths
Ejemplo n.º 23
0
    def load(self, num_samples=None):
        """
        Load the data ground truth and parse the data so it's ready to be used.
        """
        # Load the dataset GT
        self.lab_root = f"{self.path}/lab/"
        self.img_root = f"{self.path}/jpg/"
        logging.info(f"Loading data: {self.path}")
        lab_filenames = np.sort(PathManager.ls(self.lab_root))
        # Get the filenames without the extension
        self.img_filenames = [
            e[:-4] for e in np.sort(PathManager.ls(self.img_root))
            if e[:-4] not in self.blacklisted
        ]

        # Parse the label files. Some challenges as filenames do not correspond
        # exactly to query names. Go through all the labels to:
        # i) map names to filenames and vice versa
        # ii) get the relevant regions of interest of the queries,
        # iii) get the indexes of the dataset images that are queries
        # iv) get the relevants / non-relevants list
        self.relevants = {}
        self.junk = {}
        self.non_relevants = {}

        self.filename_to_name = {}
        self.name_to_filename = OrderedDict()
        self.q_roi = {}
        for e in lab_filenames:
            if e.endswith("_query.txt"):
                q_name = e[:-len("_query.txt")]
                with PathManager.open(f"{self.lab_root}/{e}") as fopen:
                    q_data = fopen.readline().split(" ")
                if q_data[0].startswith("oxc1_"):
                    q_filename = q_data[0][5:]
                else:
                    q_filename = q_data[0]
                self.filename_to_name[q_filename] = q_name
                self.name_to_filename[q_name] = q_filename
                with PathManager.open(
                        f"{self.lab_root}/{q_name}_ok.txt") as fopen:
                    good = {e.strip() for e in fopen}
                with PathManager.open(
                        f"{self.lab_root}/{q_name}_good.txt") as fopen:
                    good = good.union({e.strip() for e in fopen})
                with PathManager.open(
                        f"{self.lab_root}/{q_name}_junk.txt") as fopen:
                    junk = {e.strip() for e in fopen}
                good_plus_junk = good.union(junk)
                self.relevants[q_name] = [
                    i for i in range(len(self.img_filenames))
                    if self.img_filenames[i] in good
                ]
                self.junk[q_name] = [
                    i for i in range(len(self.img_filenames))
                    if self.img_filenames[i] in junk
                ]
                self.non_relevants[q_name] = [
                    i for i in range(len(self.img_filenames))
                    if self.img_filenames[i] not in good_plus_junk
                ]
                self.q_roi[q_name] = np.array([float(q) for q in q_data[1:]],
                                              dtype=np.float32)

        self.q_names = list(self.name_to_filename.keys())
        self.q_index = np.array([
            self.img_filenames.index(self.name_to_filename[qn])
            for qn in self.q_names
        ])

        self.N_images = len(self.img_filenames)
        self.N_queries = len(self.q_index)

        if num_samples is not None:
            self.N_queries = min(self.N_queries, num_samples)
            self.N_images = min(self.N_images, num_samples)
Ejemplo n.º 24
0
def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
    """
    Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
    treated as ground truth annotations and all files under "image_root" with "image_ext" extension
    as input images. Ground truth and input images are matched using file paths relative to
    "gt_root" and "image_root" respectively without taking into account file extensions.
    This works for COCO as well as some other datasets.

    Args:
        gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
            annotations are stored as images with integer values in pixels that represent
            corresponding semantic labels.
        image_root (str): the directory where the input images are.
        gt_ext (str): file extension for ground truth annotations.
        image_ext (str): file extension for input images.

    Returns:
        list[dict]:
            a list of dicts in detectron2 standard format without instance-level
            annotation.

    Notes:
        1. This function does not read the image and ground truth files.
           The results do not have the "image" and "sem_seg" fields.
    """

    # We match input images with ground truth based on their relative filepaths (without file
    # extensions) starting from 'image_root' and 'gt_root' respectively.
    def file2id(folder_path, file_path):
        # extract relative path starting from `folder_path`
        image_id = os.path.normpath(
            os.path.relpath(file_path, start=folder_path))
        # remove file extension
        image_id = os.path.splitext(image_id)[0]
        return image_id

    input_files = sorted(
        (os.path.join(image_root, f)
         for f in PathManager.ls(image_root) if f.endswith(image_ext)),
        key=lambda file_path: file2id(image_root, file_path),
    )
    gt_files = sorted(
        (os.path.join(gt_root, f)
         for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
        key=lambda file_path: file2id(gt_root, file_path),
    )

    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)

    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
    if len(input_files) != len(gt_files):
        logger.warn(
            "Directory {} and {} has {} and {} files, respectively.".format(
                image_root, gt_root, len(input_files), len(gt_files)))
        input_basenames = [
            os.path.basename(f)[:-len(image_ext)] for f in input_files
        ]
        gt_basenames = [os.path.basename(f)[:-len(gt_ext)] for f in gt_files]
        intersect = list(set(input_basenames) & set(gt_basenames))
        # sort, otherwise each worker may obtain a list[dict] in different order
        intersect = sorted(intersect)
        logger.warn("Will use their intersection of {} files.".format(
            len(intersect)))
        input_files = [
            os.path.join(image_root, f + image_ext) for f in intersect
        ]
        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]

    logger.info("Loaded {} images with semantic segmentation from {}".format(
        len(input_files), image_root))

    dataset_dicts = []
    for (img_path, gt_path) in zip(input_files, gt_files):
        record = {}
        record["file_name"] = img_path
        record["sem_seg_file_name"] = gt_path
        dataset_dicts.append(record)

    return dataset_dicts
Ejemplo n.º 25
0
 def _get_lightning_checkpoints(path: str):
     return [
         os.path.join(path, x) for x in PathManager.ls(path)
         if x.endswith(ModelCheckpoint.FILE_EXTENSION)
         and not x.startswith(ModelCheckpoint.CHECKPOINT_NAME_LAST)
     ]
Ejemplo n.º 26
0
 def test_PathManager(self):
     x = LazyPath(lambda: "./")
     output = PathManager.ls(x)
     output_gt = PathManager.ls("./")
     self.assertEqual(sorted(output), sorted(output_gt))
Ejemplo n.º 27
0
 def ls(path: str) -> List[str]:
     if FVCorePathManager:
         return FVCorePathManager.ls(path)
     return os.listdir(path)
Ejemplo n.º 28
0
def merge_features(input_dir: str, split: str, layer: str):
    """
    For multi-gpu feature extraction, each gpu saves features corresponding to its
    share of the data. We can merge the features across all gpus to get the features
    for the full data.

    The features are saved along with the data indexes and label. The data indexes can
    be used to sort the data and ensure the uniqueness.

    We organize the features, targets corresponding to the data index of each feature,
    ensure the uniqueness and return.

    Args:
        input_dir (str): input path where the features are dumped
        split (str): whether the features are train or test data features
        layer (str): the features correspond to what layer of the model

    Returns:
        output (Dict): contains features, targets, inds as the keys
    """
    logging.info(f"Merging features: {split} {layer}")

    feature_regex = re.compile(rf"(.*)_{split}_{layer}_features.npy")

    # List all the files that are containing the features for a given
    # dataset split and a given layer
    prefixes = []
    for file_path in PathManager.ls(input_dir):
        match = feature_regex.match(file_path)
        if match is not None:
            prefixes.append(match.group(1))

    # Reassemble each feature shard (dumped by a given rank)
    output_feats, output_targets = {}, {}
    for prefix in prefixes:
        feat_file = os.path.join(input_dir,
                                 f"{prefix}_{split}_{layer}_features.npy")
        targets_file = os.path.join(input_dir,
                                    f"{prefix}_{split}_{layer}_targets.npy")
        inds_file = os.path.join(input_dir,
                                 f"{prefix}_{split}_{layer}_inds.npy")
        logging.info(f"Loading:\n{feat_file}\n{targets_file}\n{inds_file}")
        feats = load_file(feat_file)
        targets = load_file(targets_file)
        indices = load_file(inds_file)
        num_samples = feats.shape[0]
        for idx in range(num_samples):
            index = indices[idx]
            if index not in output_feats:
                output_feats[index] = feats[idx]
                output_targets[index] = targets[idx]

    # Sort the entries by sample index
    indices = sorted(output_targets.keys())
    features = [output_feats[i] for i in indices]
    targets = [output_targets[i] for i in indices]

    # Cast the entries as numpy arrays
    N = len(indices)
    output = {
        "features": np.array(features).reshape(N, -1),
        "targets": np.array(targets),
        "inds": np.array(indices),
    }
    logging.info(f"Features: {output['features'].shape}")
    logging.info(f"Targets: {output['targets'].shape}")
    logging.info(f"Indices: {output['inds'].shape}")
    return output
Ejemplo n.º 29
0
 def test_PathManager(self) -> None:
     x = LazyPath(lambda: "./")
     output = PathManager.ls(x)  # pyre-ignore
     output_gt = PathManager.ls("./")
     self.assertEqual(sorted(output), sorted(output_gt))