def get_detection_dataset_dicts_support(dataset_name, filter_empty=True, min_keypoints=0, proposal_file=None): dataset_dict = DatasetCatalog.get(dataset_name) dataset_dict_flattened = [] for id_class, annotations_class in dataset_dict.items(): dataset_dict_flattened.extend(annotations_class) # pre-extracted proposal: need to think about this later """ if proposal_files is not None: assert len(dataset_names) == len(proposal_files) # load precomputed proposals from proposal files dataset_dicts = [ load_proposals_into_dataset(dataset_i_dicts, proposal_file) for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) ] """ # data distribution class_names = MetadataCatalog.get(dataset_name).thing_classes check_metadata_consistency("thing_classes", [dataset_name]) print_instances_class_histogram(dataset_dict_flattened, class_names) return dataset_dict
def get_hoi_dataset_dicts(dataset_names, filter_empty=True): """ Load and prepare dataset dicts for HOI detection. Args: dataset_names (list[str]): a list of dataset names filter_empty (bool): whether to filter out images without instance annotations min_keypoints (int): filter out images with fewer keypoints than `min_keypoints`. Set to 0 to do nothing. proposal_files (list[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] for dataset_name, dicts in zip(dataset_names, dataset_dicts): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) has_instances = "annotations" in dataset_dicts[0] if filter_empty and has_instances: dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) if filter_empty and has_instances and "actions" in dataset_dicts[0]["annotations"][0]: dataset_dicts = filter_images_without_any_hois(dataset_dicts) if has_instances: try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass return dataset_dicts
def get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None): """ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. Args: dataset_names (str or list[str]): a dataset name or a list of dataset names filter_empty (bool): whether to filter out images without instance annotations min_keypoints (int): filter out images with fewer keypoints than `min_keypoints`. Set to 0 to do nothing. proposal_files (list[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. Returns: list[dict]: a list of dicts following the standard dataset dict format. """ if isinstance(dataset_names, str): dataset_names = [dataset_names] assert len(dataset_names) dataset_dicts = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] for dataset_name, dicts in zip(dataset_names, dataset_dicts): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) if proposal_files is not None: assert len(dataset_names) == len(proposal_files) # load precomputed proposals from proposal files dataset_dicts = [ load_proposals_into_dataset(dataset_i_dicts, proposal_file) for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) ] dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) has_instances = "annotations" in dataset_dicts[0] if filter_empty and has_instances: dataset_dicts = filter_images_with_only_crowd_annotations( dataset_dicts) if min_keypoints > 0 and has_instances: dataset_dicts = filter_images_with_few_keypoints( dataset_dicts, min_keypoints) if has_instances: try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass assert len(dataset_dicts), "No valid data found in {}.".format( ",".join(dataset_names)) return dataset_dicts
def get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None): """ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. Args: dataset_names (list[str]): a list of dataset names filter_empty (bool): whether to filter out images without instance annotations min_keypoints (int): filter out images with fewer keypoints than `min_keypoints`. Set to 0 to do nothing. proposal_files (list[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) debug = 1 dataset_dicts = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] for dataset_name, dicts in zip(dataset_names, dataset_dicts): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) debug = 1 if proposal_files is not None: assert len(dataset_names) == len(proposal_files) # load precomputed proposals from proposal files dataset_dicts = [ load_proposals_into_dataset(dataset_i_dicts, proposal_file) for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) ] debug = 1 dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) debug = 1 has_instances = "annotations" in dataset_dicts[0] # Keep images without instance-level GT if the dataset has semantic labels. if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[ 0]: dataset_dicts = filter_images_with_only_crowd_annotations( dataset_dicts) if min_keypoints > 0 and has_instances: dataset_dicts = filter_images_with_few_keypoints( dataset_dicts, min_keypoints) if has_instances: try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass debug = 1 return dataset_dicts
def get_classification_dataset_dicts(dataset_names): # retrieve datasets from Catalog assert len(dataset_names) dataset_dicts = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] for dataset_name, dicts in zip(dataset_names, dataset_dicts): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) # Merge all datasets into one dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes utils.check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass return dataset_dicts
def fsod_get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None): """ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. Args: dataset_names (list[str]): a list of dataset names filter_empty (bool): whether to filter out images without instance annotations min_keypoints (int): filter out images with fewer keypoints than `min_keypoints`. Set to 0 to do nothing. proposal_files (list[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) dataset_dicts_original = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] for dataset_name, dicts in zip(dataset_names, dataset_dicts_original): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) if proposal_files is not None: assert len(dataset_names) == len(proposal_files) # load precomputed proposals from proposal files dataset_dicts_original = [ load_proposals_into_dataset(dataset_i_dicts, proposal_file) for dataset_i_dicts, proposal_file in zip(dataset_dicts_original, proposal_files) ] if 'train' not in dataset_names[0]: dataset_dicts = list( itertools.chain.from_iterable(dataset_dicts_original)) else: dataset_dicts_original = list( itertools.chain.from_iterable(dataset_dicts_original)) dataset_dicts_original = filter_images_with_only_crowd_annotations( dataset_dicts_original) ################################################################################### # split image-based annotations to instance-based annotations for few-shot learning dataset_dicts = [] index_dicts = [] split_flag = True if split_flag: for record in dataset_dicts_original: file_name = record['file_name'] height = record['height'] width = record['width'] image_id = record['image_id'] annotations = record['annotations'] category_dict = {} for ann_id, ann in enumerate(annotations): ann.pop("segmentation", None) ann.pop("keypoints", None) category_id = ann['category_id'] if category_id not in category_dict.keys(): category_dict[category_id] = [ann] else: category_dict[category_id].append(ann) for key, item in category_dict.items(): instance_ann = {} instance_ann['file_name'] = file_name instance_ann['height'] = height instance_ann['width'] = width instance_ann['annotations'] = item dataset_dicts.append(instance_ann) has_instances = "annotations" in dataset_dicts[0] # Keep images without instance-level GT if the dataset has semantic labels. if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[ 0]: dataset_dicts = filter_images_with_only_crowd_annotations( dataset_dicts) if min_keypoints > 0 and has_instances: dataset_dicts = filter_images_with_few_keypoints( dataset_dicts, min_keypoints) if has_instances: try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass return dataset_dicts