def test_meta_provider(): ds_tagged_data = [] for subset in ["train", "val"]: for category_index in range(10): for img_index in range(100): new_object = MockTaggedData( relative_path= f"{subset}/cat{category_index}/{img_index}.png", data=category_index) ds_tagged_data.append(new_object) template = "{{subset}}/{{label}}/*.png" add_label = LabelByTemplate(template=template) split_into_subset = SplitByTemplate(template=template) label_to_index = LabelToIndex() operations = [split_into_subset, add_label, label_to_index] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) ds_mapping = ds.meta.index_to_label for label, value in ds.train: assert f"cat{value}" == ds_mapping[label]
def test_selective_operation(): ds_tagged_data = [] for subset in ["train", "val"]: for category_index in range(10): for img_index in range(100): new_object = MockTaggedData( relative_path=f"{subset}/cat{category_index}/{img_index}.png", data=category_index ) ds_tagged_data.append(new_object) template = "{{subset}}/{{label}}/*.png" add_label = LabelByTemplate(template=template) split_into_subset = SplitByTemplate(template=template) limit_train = LimitSamplesByBin(sample_limit=50, bin_creator=lambda x: x.label) limit_val = LimitSamplesByBin(sample_limit=10, bin_creator=lambda x: x.label) limit_both = SelectiveSubsetOperation({"train": limit_train, "val": limit_val}) operations = [split_into_subset, add_label, limit_both] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) assert len(ds.train) == 50 * 10 assert len(ds.val) == 10 * 10
def test_splitting(mask_dataset): operation_list = [SplitByTemplate("{{ subset }}/*")] ds = Dataset(operations=operation_list) ds.form(mask_dataset) assert len(ds.train) + len(ds.test) + len(ds.val) == len(ds) assert len(ds) == len(mask_dataset)
def test_dropping(mask_dataset): operation_list = [DropByTemplate("*/mask*.jpg")] ds = Dataset(operations=operation_list) ds.form(mask_dataset) assert len(ds) * 2 == len(mask_dataset) for ds_object in ds.objects: assert "mask" not in ds_object.relative_path
def test_different_input(): ds_tagged_data = [MockTaggedData(f"{i}.png", i) for i in range(10)] ds = Dataset(output_function=triple_output) ds.form(ds_tagged_data) possible_values = {i * 3 for i in range(10)} for ds_object in ds: assert ds_object in possible_values possible_values.remove(ds_object)
def test_splitting_and_combining(mask_dataset): mask_template = MaskTemplate(image="{{subset}}/image_{{img_id}}.jpg", mask="{{subset}}/mask_{{img_id}}.jpg") operation_list = [ SplitByTemplate("{{ subset }}/*"), MaskByTemplate(mask_template), ] ds1 = Dataset(operations=operation_list) ds1.form(mask_dataset) assert (len(ds1.train) + len(ds1.test) + len(ds1.val)) * 2 == len(ds1)
def test_coco(detection_collection): images, label_mapping = detection_collection coco_dict = detection_collection_to_coco_dict(images, label_mapping) annotation_file = MockTaggedData(relative_path="train.json", data=coco_dict) tagged_data = [annotation_file ] + [detection_input for detection_input in images] coco_transform = SingleCoco(annotation_file="train.json", data_folder="") ds = Dataset(operations=[coco_transform]) ds.form(tagged_data) assert len(ds) == len(images)
def test_adding(): ds_tagged_data = [] for subset in ["train", "val"]: for img_index in range(100): new_object = MockTaggedData( relative_path=f"{subset}/{img_index}.png", data=f"{subset}/{img_index}.png") ds_tagged_data.append(new_object) template = "{{subset}}/*.png" split_into_subset = SplitByTemplate(template=template) operations = [split_into_subset] ds = Dataset(operations=operations, output_function=classification_output) ds.form(ds_tagged_data) assert len(ds) == len(ds.train + ds.val) assert set(ds.objects) == set((ds.train + ds.val).objects)
def test_combining_mask_img(mask_dataset): mask_template = MaskTemplate(image="{{subset}}/image_{{img_id}}.jpg", mask="{{subset}}/mask_{{img_id}}.jpg") operation_list = [MaskByTemplate(mask_template)] ds = Dataset(operations=operation_list, output_function=single_mask_output) ds.form(mask_dataset) assert len(ds.objects) == len(mask_dataset) // 2 for ds_object in ds.objects: img, mask = ds_object.output() assert isinstance(img, np.ndarray) assert isinstance(mask, np.ndarray) img_path = ds_object.relative_path mask_path = ds_object.annotations[0].relative_path assert "image" in img_path assert "mask" in mask_path img_data = img_path.replace("image", "") mask_data = mask_path.replace("mask", "") assert img_data == mask_data