Esempio n. 1
0
def get_detection_dataset_dicts_support(dataset_name,
                                        filter_empty=True,
                                        min_keypoints=0,
                                        proposal_file=None):

    dataset_dict = DatasetCatalog.get(dataset_name)
    dataset_dict_flattened = []
    for id_class, annotations_class in dataset_dict.items():
        dataset_dict_flattened.extend(annotations_class)

    # pre-extracted proposal: need to think about this later
    """
    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
        ]
    """
    # data distribution
    class_names = MetadataCatalog.get(dataset_name).thing_classes
    check_metadata_consistency("thing_classes", [dataset_name])
    print_instances_class_histogram(dataset_dict_flattened, class_names)

    return dataset_dict
Esempio n. 2
0
def get_hoi_dataset_dicts(dataset_names, filter_empty=True):
    """
    Load and prepare dataset dicts for HOI detection.

    Args:
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    has_instances = "annotations" in dataset_dicts[0]

    if filter_empty and has_instances:
        dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)

    if filter_empty and has_instances and "actions" in dataset_dicts[0]["annotations"][0]:
        dataset_dicts = filter_images_without_any_hois(dataset_dicts)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass
    return dataset_dicts
Esempio n. 3
0
def get_detection_dataset_dicts(dataset_names,
                                filter_empty=True,
                                min_keypoints=0,
                                proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.

    Args:
        dataset_names (str or list[str]): a dataset name or a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.

    Returns:
        list[dict]: a list of dicts following the standard dataset dict format.
    """
    if isinstance(dataset_names, str):
        dataset_names = [dataset_names]
    assert len(dataset_names)
    dataset_dicts = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts,
                                                      proposal_files)
        ]

    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    has_instances = "annotations" in dataset_dicts[0]
    if filter_empty and has_instances:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)
    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass

    assert len(dataset_dicts), "No valid data found in {}.".format(
        ",".join(dataset_names))
    return dataset_dicts
Esempio n. 4
0
def get_detection_dataset_dicts(dataset_names,
                                filter_empty=True,
                                min_keypoints=0,
                                proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
    Args:
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    debug = 1
    dataset_dicts = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
    debug = 1
    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts,
                                                      proposal_files)
        ]
    debug = 1
    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
    debug = 1
    has_instances = "annotations" in dataset_dicts[0]
    # Keep images without instance-level GT if the dataset has semantic labels.
    if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[
            0]:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)

    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass
    debug = 1
    return dataset_dicts
Esempio n. 5
0
def get_classification_dataset_dicts(dataset_names):

    # retrieve datasets from Catalog
    assert len(dataset_names)
    dataset_dicts = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    # Merge all datasets into one
    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    try:
        class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
        utils.check_metadata_consistency("thing_classes", dataset_names)
        print_instances_class_histogram(dataset_dicts, class_names)
    except AttributeError:  # class names are not available for this dataset
        pass

    return dataset_dicts
Esempio n. 6
0
def fsod_get_detection_dataset_dicts(dataset_names,
                                     filter_empty=True,
                                     min_keypoints=0,
                                     proposal_files=None):
    """
    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
    Args:
        dataset_names (list[str]): a list of dataset names
        filter_empty (bool): whether to filter out images without instance annotations
        min_keypoints (int): filter out images with fewer keypoints than
            `min_keypoints`. Set to 0 to do nothing.
        proposal_files (list[str]): if given, a list of object proposal files
            that match each dataset in `dataset_names`.
    """
    assert len(dataset_names)
    dataset_dicts_original = [
        DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
    ]
    for dataset_name, dicts in zip(dataset_names, dataset_dicts_original):
        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)

    if proposal_files is not None:
        assert len(dataset_names) == len(proposal_files)
        # load precomputed proposals from proposal files
        dataset_dicts_original = [
            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
            for dataset_i_dicts, proposal_file in zip(dataset_dicts_original,
                                                      proposal_files)
        ]

    if 'train' not in dataset_names[0]:
        dataset_dicts = list(
            itertools.chain.from_iterable(dataset_dicts_original))
    else:
        dataset_dicts_original = list(
            itertools.chain.from_iterable(dataset_dicts_original))
        dataset_dicts_original = filter_images_with_only_crowd_annotations(
            dataset_dicts_original)
        ###################################################################################
        # split image-based annotations to instance-based annotations for few-shot learning
        dataset_dicts = []
        index_dicts = []
        split_flag = True
        if split_flag:
            for record in dataset_dicts_original:
                file_name = record['file_name']
                height = record['height']
                width = record['width']
                image_id = record['image_id']
                annotations = record['annotations']
                category_dict = {}
                for ann_id, ann in enumerate(annotations):

                    ann.pop("segmentation", None)
                    ann.pop("keypoints", None)

                    category_id = ann['category_id']
                    if category_id not in category_dict.keys():
                        category_dict[category_id] = [ann]
                    else:
                        category_dict[category_id].append(ann)

                for key, item in category_dict.items():
                    instance_ann = {}
                    instance_ann['file_name'] = file_name
                    instance_ann['height'] = height
                    instance_ann['width'] = width

                    instance_ann['annotations'] = item

                    dataset_dicts.append(instance_ann)

    has_instances = "annotations" in dataset_dicts[0]
    # Keep images without instance-level GT if the dataset has semantic labels.
    if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[
            0]:
        dataset_dicts = filter_images_with_only_crowd_annotations(
            dataset_dicts)

    if min_keypoints > 0 and has_instances:
        dataset_dicts = filter_images_with_few_keypoints(
            dataset_dicts, min_keypoints)

    if has_instances:
        try:
            class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
            check_metadata_consistency("thing_classes", dataset_names)
            print_instances_class_histogram(dataset_dicts, class_names)
        except AttributeError:  # class names are not available for this dataset
            pass
    return dataset_dicts