Example #1
0
def combine_detection_dataset_dicts(
    dataset_names: Collection[str],
    keep_instance_predicate: Optional[InstancePredicate] = None,
    proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
    """
    Load and prepare dataset dicts for training / testing

    Args:
        dataset_names (Collection[str]): a list of dataset names
        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
            applied to instance dicts which defines whether to keep the instance
        proposal_files (Collection[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    if proposal_files is None:
        proposal_files = [None] * len(dataset_names)
    assert len(dataset_names) == len(proposal_files)
    # load datasets and metadata
    dataset_name_to_dicts = {}
    for dataset_name in dataset_names:
        dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name)
        assert len(
            dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!"
    # merge categories, requires category metadata to be loaded
    # cat_id -> [(orig_cat_id, cat_name, dataset_name)]
    merged_categories = _merge_categories(dataset_names)
    _warn_if_merged_different_categories(merged_categories)
    merged_category_names = [
        merged_categories[cat_id][0].mapped_name
        for cat_id in sorted(merged_categories)
    ]
    # map to contiguous category IDs
    _add_category_id_to_contiguous_id_maps_to_metadata(merged_categories)
    # load annotations and dataset metadata
    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
        dataset_dicts = dataset_name_to_dicts[dataset_name]
        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
        if proposal_file is not None:
            dataset_dicts = load_proposals_into_dataset(
                dataset_dicts, proposal_file)
        dataset_dicts = _maybe_filter_and_map_categories(
            dataset_name, dataset_dicts)
        print_instances_class_histogram(dataset_dicts, merged_category_names)
        dataset_name_to_dicts[dataset_name] = dataset_dicts

    if keep_instance_predicate is not None:
        all_datasets_dicts_plain = [
            d for d in itertools.chain.from_iterable(
                dataset_name_to_dicts.values()) if keep_instance_predicate(d)
        ]
    else:
        all_datasets_dicts_plain = list(
            itertools.chain.from_iterable(dataset_name_to_dicts.values()))
    return all_datasets_dicts_plain
Example #2
0
def combine_detection_dataset_dicts(
    dataset_names: Collection[str],
    keep_instance_predicate: Optional[InstancePredicate] = None,
    proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
    """
    Load and prepare dataset dicts for training / testing

    Args:
        dataset_names (Collection[str]): a list of dataset names
        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
            applied to instance dicts which defines whether to keep the instance
        proposal_files (Collection[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    if proposal_files is None:
        proposal_files = [None] * len(dataset_names)
    assert len(dataset_names) == len(proposal_files)
    # load annotations and dataset metadata
    dataset_map = {}
    for dataset_name in dataset_names:
        dataset_dicts = DatasetCatalog.get(dataset_name)
        dataset_map[dataset_name] = dataset_dicts
    # initialize category maps
    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
    # apply category maps
    all_datasets_dicts = []
    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
        dataset_dicts = dataset_map[dataset_name]
        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
        if proposal_file is not None:
            dataset_dicts = load_proposals_into_dataset(
                dataset_dicts, proposal_file)
        dataset_dicts = _maybe_filter_and_map_categories(
            dataset_name, dataset_dicts)
        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
        print_instances_class_histogram(
            dataset_dicts,
            MetadataCatalog.get(dataset_name).thing_classes)
        all_datasets_dicts.append(dataset_dicts)

    if keep_instance_predicate is not None:
        all_datasets_dicts_plain = [
            d for d in itertools.chain.from_iterable(all_datasets_dicts)
            if keep_instance_predicate(d)
        ]
    else:
        all_datasets_dicts_plain = list(
            itertools.chain.from_iterable(all_datasets_dicts))
    return all_datasets_dicts_plain
Example #3
0
def fsod_get_detection_dataset_dicts(dataset_names,
                                     filter_empty=True,
                                     min_keypoints=0,
                                     proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
    Args:
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    dataset_dicts_original = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts_original):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts_original = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts_original,
                                                      proposal_files)
        ]

    if 'train' not in dataset_names[0]:
        dataset_dicts = list(
            itertools.chain.from_iterable(dataset_dicts_original))
    else:
        dataset_dicts_original = list(
            itertools.chain.from_iterable(dataset_dicts_original))
        dataset_dicts_original = filter_images_with_only_crowd_annotations(
            dataset_dicts_original)
        ###################################################################################
        # split image-based annotations to instance-based annotations for few-shot learning
        dataset_dicts = []
        index_dicts = []
        split_flag = True
        if split_flag:
            for record in dataset_dicts_original:
                file_name = record['file_name']
                height = record['height']
                width = record['width']
                image_id = record['image_id']
                annotations = record['annotations']
                category_dict = {}
                for ann_id, ann in enumerate(annotations):

                    ann.pop("segmentation", None)
                    ann.pop("keypoints", None)

                    category_id = ann['category_id']
                    if category_id not in category_dict.keys():
                        category_dict[category_id] = [ann]
                    else:
                        category_dict[category_id].append(ann)

                for key, item in category_dict.items():
                    instance_ann = {}
                    instance_ann['file_name'] = file_name
                    instance_ann['height'] = height
                    instance_ann['width'] = width

                    instance_ann['annotations'] = item

                    dataset_dicts.append(instance_ann)

    has_instances = "annotations" in dataset_dicts[0]
    # Keep images without instance-level GT if the dataset has semantic labels.
    if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[
            0]:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)

    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass
    return dataset_dicts