Ejemplo n.º 1
0
def get_detection_dataset_dicts_support(dataset_name,
                                        filter_empty=True,
                                        min_keypoints=0,
                                        proposal_file=None):

    dataset_dict = DatasetCatalog.get(dataset_name)
    dataset_dict_flattened = []
    for id_class, annotations_class in dataset_dict.items():
        dataset_dict_flattened.extend(annotations_class)

    # pre-extracted proposal: need to think about this later
    """
    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
        ]
    """
    # data distribution
    class_names = MetadataCatalog.get(dataset_name).thing_classes
    check_metadata_consistency("thing_classes", [dataset_name])
    print_instances_class_histogram(dataset_dict_flattened, class_names)

    return dataset_dict
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(
        description='This script support analyzing data.')
    parser.add_argument('--data_dir',
                        type=str,
                        default=None,
                        required=True,
                        help='path to annotation files directory.')
    parser.add_argument('--format',
                        type=str,
                        default='coco',
                        help='format to analyze.(coco or voc)')

    args = parser.parse_args()

    logger = logging.getLogger('detectron2')
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(stream=sys.stdout)
    ch.setLevel(logging.DEBUG)
    logger.addHandler(ch)
    if args.format == 'coco':
        dataset_dicts = load_coco_json(
            os.path.join(args.data_dir, 'annotations/train.json'),
            os.path.join(args.data_dir, 'images'), '1')
        class_names = MetadataCatalog.get('1').thing_classes
    elif args.format == 'voc':
        dataset_dicts, class_names = load_voc_instances(args.data_dir)
    else:
        raise Exception("only support coco or voc format")

    print_instances_class_histogram(dataset_dicts, class_names)
Ejemplo n.º 3
0
def combine_detection_dataset_dicts(
    dataset_names: Collection[str],
    keep_instance_predicate: Optional[InstancePredicate] = None,
    proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
    """
    Load and prepare dataset dicts for training / testing

    Args:
        dataset_names (Collection[str]): a list of dataset names
        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
            applied to instance dicts which defines whether to keep the instance
        proposal_files (Collection[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    if proposal_files is None:
        proposal_files = [None] * len(dataset_names)
    assert len(dataset_names) == len(proposal_files)
    # load datasets and metadata
    dataset_name_to_dicts = {}
    for dataset_name in dataset_names:
        dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name)
        assert len(
            dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!"
    # merge categories, requires category metadata to be loaded
    # cat_id -> [(orig_cat_id, cat_name, dataset_name)]
    merged_categories = _merge_categories(dataset_names)
    _warn_if_merged_different_categories(merged_categories)
    merged_category_names = [
        merged_categories[cat_id][0].mapped_name
        for cat_id in sorted(merged_categories)
    ]
    # map to contiguous category IDs
    _add_category_id_to_contiguous_id_maps_to_metadata(merged_categories)
    # load annotations and dataset metadata
    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
        dataset_dicts = dataset_name_to_dicts[dataset_name]
        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
        if proposal_file is not None:
            dataset_dicts = load_proposals_into_dataset(
                dataset_dicts, proposal_file)
        dataset_dicts = _maybe_filter_and_map_categories(
            dataset_name, dataset_dicts)
        print_instances_class_histogram(dataset_dicts, merged_category_names)
        dataset_name_to_dicts[dataset_name] = dataset_dicts

    if keep_instance_predicate is not None:
        all_datasets_dicts_plain = [
            d for d in itertools.chain.from_iterable(
                dataset_name_to_dicts.values()) if keep_instance_predicate(d)
        ]
    else:
        all_datasets_dicts_plain = list(
            itertools.chain.from_iterable(dataset_name_to_dicts.values()))
    return all_datasets_dicts_plain
Ejemplo n.º 4
0
def get_detection_dataset_dicts(wogt,
                                dataset_names,
                                filter_empty=True,
                                min_keypoints=0,
                                proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
    Args:
        wogt(bool): whether to use gt
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    Returns:
        list[dict]: a list of dicts following the standard dataset dict format.
    """
    assert len(dataset_names)
    dataset_dicts = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts,
                                                      proposal_files)
        ]

    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    has_instances = "annotations" in dataset_dicts[0] and not wogt
    if filter_empty and has_instances:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)
    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass

    assert len(dataset_dicts), "No valid data found in {}.".format(
        ",".join(dataset_names))
    return dataset_dicts
Ejemplo n.º 5
0
def combine_detection_dataset_dicts(
    dataset_names: Collection[str],
    keep_instance_predicate: Optional[InstancePredicate] = None,
    proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
    """
    Load and prepare dataset dicts for training / testing

    Args:
        dataset_names (Collection[str]): a list of dataset names
        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
            applied to instance dicts which defines whether to keep the instance
        proposal_files (Collection[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    if proposal_files is None:
        proposal_files = [None] * len(dataset_names)
    assert len(dataset_names) == len(proposal_files)
    # load annotations and dataset metadata
    dataset_map = {}
    for dataset_name in dataset_names:
        dataset_dicts = DatasetCatalog.get(dataset_name)
        dataset_map[dataset_name] = dataset_dicts
    # initialize category maps
    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
    # apply category maps
    all_datasets_dicts = []
    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
        dataset_dicts = dataset_map[dataset_name]
        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
        if proposal_file is not None:
            dataset_dicts = load_proposals_into_dataset(
                dataset_dicts, proposal_file)
        dataset_dicts = _maybe_filter_and_map_categories(
            dataset_name, dataset_dicts)
        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
        print_instances_class_histogram(
            dataset_dicts,
            MetadataCatalog.get(dataset_name).thing_classes)
        all_datasets_dicts.append(dataset_dicts)

    if keep_instance_predicate is not None:
        all_datasets_dicts_plain = [
            d for d in itertools.chain.from_iterable(all_datasets_dicts)
            if keep_instance_predicate(d)
        ]
    else:
        all_datasets_dicts_plain = list(
            itertools.chain.from_iterable(all_datasets_dicts))
    return all_datasets_dicts_plain
Ejemplo n.º 6
0
def get_detection_dataset_dicts_with_source(dataset_names,
                                            filter_empty=True,
                                            min_keypoints=0,
                                            proposal_files=None):
    assert len(dataset_names)
    dataset_dicts = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    for source_id, (dataset_name, dicts) in \
        enumerate(zip(dataset_names, dataset_dicts)):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
        for d in dicts:
            d['dataset_source'] = source_id

        if "annotations" in dicts[0]:
            try:
                class_names = MetadataCatalog.get(dataset_name).thing_classes
                check_metadata_consistency("thing_classes", dataset_name)
                print_instances_class_histogram(dicts, class_names)
            except AttributeError:  # class names are not available for this dataset
                pass

    assert proposal_files is None

    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    has_instances = "annotations" in dataset_dicts[0]
    if filter_empty and has_instances:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)
    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    return dataset_dicts
Ejemplo n.º 7
0
def fsod_get_detection_dataset_dicts(dataset_names,
                                     filter_empty=True,
                                     min_keypoints=0,
                                     proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
    Args:
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    dataset_dicts_original = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts_original):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts_original = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts_original,
                                                      proposal_files)
        ]

    if 'train' not in dataset_names[0]:
        dataset_dicts = list(
            itertools.chain.from_iterable(dataset_dicts_original))
    else:
        dataset_dicts_original = list(
            itertools.chain.from_iterable(dataset_dicts_original))
        dataset_dicts_original = filter_images_with_only_crowd_annotations(
            dataset_dicts_original)
        ###################################################################################
        # split image-based annotations to instance-based annotations for few-shot learning
        dataset_dicts = []
        index_dicts = []
        split_flag = True
        if split_flag:
            for record in dataset_dicts_original:
                file_name = record['file_name']
                height = record['height']
                width = record['width']
                image_id = record['image_id']
                annotations = record['annotations']
                category_dict = {}
                for ann_id, ann in enumerate(annotations):

                    ann.pop("segmentation", None)
                    ann.pop("keypoints", None)

                    category_id = ann['category_id']
                    if category_id not in category_dict.keys():
                        category_dict[category_id] = [ann]
                    else:
                        category_dict[category_id].append(ann)

                for key, item in category_dict.items():
                    instance_ann = {}
                    instance_ann['file_name'] = file_name
                    instance_ann['height'] = height
                    instance_ann['width'] = width

                    instance_ann['annotations'] = item

                    dataset_dicts.append(instance_ann)

    has_instances = "annotations" in dataset_dicts[0]
    # Keep images without instance-level GT if the dataset has semantic labels.
    if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[
            0]:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)

    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass
    return dataset_dicts