def execute(parser, args): if args.zoo_dataset: # View a zoo dataset name = args.zoo_dataset splits = args.splits dataset_dir = args.dataset_dir dataset = foz.load_zoo_dataset(name, splits=splits, dataset_dir=dataset_dir) elif args.dataset_dir: # View a dataset from a directory name = args.name dataset_dir = args.dataset_dir dataset_type = etau.get_class(args.type) dataset = fod.Dataset.from_dir(dataset_dir, dataset_type, name=name) elif args.json_path: # View a dataset from a JSON file name = args.name json_path = args.json_path dataset = fod.Dataset.from_json(json_path, name=name) else: raise ValueError( "Either `zoo_dataset`, `dataset_dir`, or `json_path` must be " "provided") session = fos.launch_app(dataset=dataset, port=args.port, remote=args.remote) _watch_session(session, remote=args.remote)
def test_custom_classification_dataset_imports(basedir): # Types of classification datasets to test dataset_types = [ fo.types.FiftyOneImageClassificationDataset, fo.types.ImageClassificationDirectoryTree, fo.types.TFImageClassificationDataset, ] # Load a small classification dataset cdataset = foz.load_zoo_dataset( "cifar10", split="test", dataset_name="classification-dataset", shuffle=True, max_samples=100, ) # Remove labeles from some samples for s in cdataset.take(10): s.ground_truth = None s.save() # Test custom imports for dataset_type in dataset_types: print(dataset_type.__name__) export_dir = os.path.join( basedir, "custom-imports", dataset_type.__name__ ) _run_custom_imports( cdataset, export_dir, dataset_type, num_unlabeled=10, max_samples=3 )
def test_custom_multitask_image_dataset_imports(basedir): # Types of multitask datasets to test dataset_types = [ fo.types.FiftyOneImageLabelsDataset, fo.types.BDDDataset, ] # Load a small multitask image dataset idataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="image-labels-dataset", shuffle=True, max_samples=100, ) # Remove labeles from some samples for s in idataset.take(10): s.ground_truth = None s.save() # Test custom imports for dataset_type in dataset_types: print(dataset_type.__name__) export_dir = os.path.join( basedir, "custom-imports", dataset_type.__name__ ) _run_custom_imports( idataset, export_dir, dataset_type, max_samples=3, label_prefix="", )
def execute(parser, args): if args.zoo_dataset: # View a zoo dataset name = args.zoo_dataset splits = args.splits dataset_dir = args.dataset_dir kwargs = _parse_dataset_import_kwargs(args) dataset = foz.load_zoo_dataset(name, splits=splits, dataset_dir=dataset_dir, **kwargs) elif args.dataset_dir: # View a dataset from a directory name = args.name dataset_dir = args.dataset_dir dataset_type = etau.get_class(args.type) kwargs = _parse_dataset_import_kwargs(args) dataset = fod.Dataset.from_dir(dataset_dir, dataset_type, name=name, **kwargs) elif args.images_dir: # View a directory of images name = args.name images_dir = args.images_dir dataset = fod.Dataset.from_images_dir(images_dir, name=name) elif args.images_patt: # View a glob pattern of images name = args.name images_patt = args.images_patt dataset = fod.Dataset.from_images_patt(images_patt, name=name) elif args.videos_dir: # View a directory of images name = args.name videos_dir = args.videos_dir dataset = fod.Dataset.from_videos_dir(videos_dir, name=name) elif args.videos_patt: # View a glob pattern of videos name = args.name videos_patt = args.videos_patt dataset = fod.Dataset.from_videos_patt(videos_patt, name=name) elif args.json_path: # View a dataset from a JSON file name = args.name json_path = args.json_path dataset = fod.Dataset.from_json(json_path, name=name) else: raise ValueError( "Either `zoo_dataset`, `dataset_dir`, or `json_path` must be " "provided") session = fos.launch_app(dataset=dataset, port=args.port, remote=args.remote) _watch_session(session, remote=args.remote)
def execute(parser, args): name = args.name splits = args.splits dataset_dir = args.dataset_dir dataset = foz.load_zoo_dataset(name, splits=splits, dataset_dir=dataset_dir) dataset.persistent = True print("Dataset '%s' created" % dataset.name)
def _video_quickstart(interactive): dataset = foz.load_zoo_dataset("quickstart-video") session = fos.launch_app(dataset=dataset) if interactive: print(_VIDEO_QUICKSTART_GUIDE) return dataset, session print(_VIDEO_QUICKSTART_GUIDE) session.wait() return None
def _quickstart(interactive): dataset = foz.load_zoo_dataset("quickstart") session = fos.launch_app(dataset=dataset) if interactive: print(_QUICKSTART_GUIDE % _FILTER_DETECTIONS_IN_PYTHON) return dataset, session print(_QUICKSTART_GUIDE % "") session.wait() return None
def test_custom_detection_dataset_imports(basedir): # Types of detection datasets to test dataset_types = [ fo.types.FiftyOneImageDetectionDataset, fo.types.COCODetectionDataset, fo.types.VOCDetectionDataset, fo.types.KITTIDetectionDataset, fo.types.TFObjectDetectionDataset, fo.types.CVATImageDataset, ] # Load a small detection dataset ddataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="detection-dataset", shuffle=True, max_samples=100, ) # Remove labeles from some samples for s in ddataset.take(10): s.ground_truth = None s.save() # Test custom imports for dataset_type in dataset_types: print(dataset_type.__name__) # COCODetectionDataset and TFObjectDetectionDataset formats cannot # distinguish between an unlabeled image and a labeled image with zero # detections num_unlabeled = 10 if dataset_type in ( fo.types.COCODetectionDataset, fo.types.TFObjectDetectionDataset, ): num_unlabeled = None export_dir = os.path.join( basedir, "custom-imports", dataset_type.__name__ ) _run_custom_imports( ddataset, export_dir, dataset_type, num_unlabeled=num_unlabeled, max_samples=3, )
def execute(parser, args): name = args.name splits = args.splits dataset_name = args.dataset_name dataset_dir = args.dataset_dir kwargs = _parse_dataset_import_kwargs(args) dataset = foz.load_zoo_dataset(name, splits=splits, dataset_name=dataset_name, dataset_dir=dataset_dir, **kwargs) dataset.persistent = True print("Dataset '%s' created" % dataset.name)
def test_zoo(): # List available datasets print(foz.list_zoo_datasets()) # Load a dataset dataset = foz.load_zoo_dataset("cifar10", drop_existing_dataset=True) # Print the dataset summary print(dataset) # Print a few random samples from the dataset view = dataset.take(5) for sample in view: label = sample.ground_truth.label print("%s: %s" % (label, sample.filepath))
def test_custom_generic_dataset_imports(basedir): # Types of generic datasets to test dataset_types = [ fo.types.FiftyOneDataset, ] # Load a small generic dataset gdataset = foz.load_zoo_dataset( "quickstart", dataset_name="generic-dataset", shuffle=True, max_samples=100, ) for dataset_type in dataset_types: print(dataset_type.__name__) export_dir = os.path.join(basedir, "custom-imports", dataset_type.__name__) _run_custom_imports(gdataset, export_dir, dataset_type, max_samples=3)
def test_custom_unlabeled_image_dataset_imports(basedir): # Types of unlabeled image datasets to test dataset_types = [ fo.types.ImageDirectory, ] # Load a small unlabeled image dataset udataset = foz.load_zoo_dataset( "cifar10", split="test", dataset_name="unlabeled-dataset", shuffle=True, max_samples=100, ) udataset.delete_sample_field("ground_truth") # Test custom imports for dataset_type in dataset_types: print(dataset_type.__name__) export_dir = os.path.join(basedir, "custom-imports", dataset_type.__name__) _run_custom_imports(udataset, export_dir, dataset_type, max_samples=3)
def app_test(): dataset = foz.load_zoo_dataset("quickstart") #dataset = foz.load_zoo_dataset("quickstart-video") #dataset = foz.load_zoo_dataset("cifar10") session = fo.launch_app(dataset, port=5151) #session.show() if False: # View the dataset in the App. session.dataset = dataset elif False: # Object patches. # Convert to ground truth patches. gt_patches = dataset.to_patches("ground_truth") print(gt_patches) # View patches in the App. session.view = gt_patches elif False: # Evaluation patches. # Evaluate 'predictions' w.r.t. labels in 'ground_truth' field. dataset.evaluate_detections("predictions", gt_field="ground_truth", eval_key="eval") # Convert to evaluation patches. eval_patches = dataset.to_evaluation_patches("eval") print(eval_patches) print(eval_patches.count_values("type")) # View patches in the App. session.view = eval_patches # Blocks execution until the App is closed. session.wait()
def test_accuracy_yolov4(capsys): data_path = "/home/Develop/Dataset/Coco2017/validation/val2017" labels_path = "/home/Develop/Dataset/Coco2017/validation/valAnnotation/instances_val2017.json" # The type of the dataset being imported dataset_type = fo.types.COCODetectionDataset # Coco By default has 90 class # Detection use only 80 output # This is needed to map the 80 output from model prediction directly CocoMap=[1,2,3,4,5,6,7,8, 9,10,11,13,14,15,16,17, 18,19,20,21,22,23,24,25, 27,28,31,32,33,34,35,36, 37,38,39,40,41,42,43,44, 46,47,48,49,50,51,52,53, 54,55,56,57,58,59,60,61, 62,63,64,65,67,70,72,73, 74,75,76,77,78,79,80,81, 82,84,85,86,87,88,89,90] dataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="evaluate-detections-tutorial", ) dataset.persistent = True detectionDir="/home/Develop/Dataset/Coco2017/validation/val2017Pred/" #dataset.delete_sample_field("faster_rcnn") # KEY # Need to map Output classes from Ai4prod Yolo evaluation to Coco Class. # This happen beacuse on Coco annotation we have 90 classes while from Yolov4 prediction we have only 80 Class # So if Yolov4 output class is 0 on Coco annoation is class 1 CocoMap[0]=1 with capsys.disabled(): classes = dataset.default_classes with fo.ProgressBar() as pb: for sample in pb(dataset): image = Image.open(sample.filepath) w, h = image.size head, tail = os.path.split(sample.filepath) filename, file_extension = os.path.splitext(tail) # one image can have multiple detections detections=[] cvsPath= detectionDir + filename +".txt" if (os.path.isfile(cvsPath)): with open(cvsPath,"r") as file: reader = csv.reader(file) for row in reader: if(row[0]=="x1"): pass else: # Detection must be int value respect to original image size # left_x ,left_y , width , height x1= float(row[0]) y1= float(row[1]) width= float(row[2]) height= float(row[3]) # Coordinate Normalization rel_box = [x1/w, y1/h, width/w, height/h] score= row[4] cls = CocoMap[int(row[5])] # fiftyone require the correct Coco class label = classes[CocoMap[int(row[5])]] detections.append( fo.Detection( label=label, bounding_box=rel_box, confidence=score ) ) else: print("FILE NOT FOUND") #Save predictions data in Dataset sample["yolov4"] = fo.Detections(detections=detections) sample.save() print("EVALUATION") # eval_key= use always the same key to evaluate yolov4 predictions results = dataset.evaluate_detections( "yolov4", gt_field="ground_truth", eval_key="yolov4_eval", compute_mAP=True, ) print(results.mAP()) print(results.metrics()) assert results.mAp()> 0.47
return ( subprocess.check_output(["git", "rev-parse", "HEAD"]) .strip() .decode("utf-8") ) DATASET_NAME = "cifar10" RESULT = OrderedDict({"githash": get_git_revision_hash()}) # Ensure the dataset is downloaded foz.download_zoo_dataset(DATASET_NAME) # CREATE: load the dataset start_time = time.time() dataset = foz.load_zoo_dataset(DATASET_NAME, drop_existing_dataset=True) RESULT["load_dataset"] = time.time() - start_time # READ: load from view read_sample_times = [] for _ in range(9): view = dataset.take(1000) start_time = time.time() samples = [s for s in view] read_sample_times.append(time.time() - start_time) RESULT["read_samples"] = np.median(read_sample_times) # UPDATE: modify a field update_sample_times = [] for _ in range(9):
import eta.core.logging as etal import fiftyone as fo import fiftyone.zoo as foz logger = logging.getLogger(__name__) # Logs everything written by a `logger` in this benchmark etal.custom_setup( etal.LoggingConfig( dict( filename=os.path.splitext(os.path.abspath(__file__))[0] + ".log", file_format="%(message)s", )), verbose=False, ) # # Add samples benchmark # dataset = foz.load_zoo_dataset("cifar10", split="train") samples = [s.copy() for s in dataset] logger.info("\nStarting test") for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, None]: logger.info("\nBatch size: %s" % batch_size) dataset2 = fo.Dataset() dataset2.add_samples(samples, _batch_size=batch_size)
import fiftyone as fo import fiftyone.zoo as foz # Blog: https://blog.csdn.net/fengbingchun/article/details/121284157 # reference: https://voxel51.com/docs/fiftyone/tutorials/evaluate_detections.html datasets = foz.list_zoo_datasets() print("available datasets:", datasets) dataset = foz.load_zoo_dataset("coco-2017", split="validation", dataset_name="evaluate-detections-tutorial") dataset.persistent = True session = fo.launch_app(dataset) # print some information about the dataset print("dataset info:", dataset) # print a ground truth detection sample = dataset.first() print("ground truth:", sample.ground_truth.detections[0]) session.wait()
def dataset_test(): if True: my_dataset_name = "quickstart" if my_dataset_name in fo.list_datasets(): dataset = fo.load_dataset(my_dataset_name) else: dataset = foz.load_zoo_dataset(my_dataset_name) elif False: dataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="evaluate-detections-tutorial", ) elif False: dataset = foz.load_zoo_dataset( "open-images-v6", split="validation", max_samples=100, seed=51, shuffle=True, ) elif True: print("Datasets = {}.".format(fo.list_datasets())) my_dataset_name = "my_dataset" try: # REF [site] >> https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html if True: dataset_dir_path = "/path/to/data" dataset = fo.Dataset.from_dir( dataset_dir=dataset_dir_path, dataset_type=fo.types.ImageClassificationDirectoryTree, name=my_dataset_name, ) elif False: # The directory containing the source images. data_dir_path = "/path/to/images" # The path to the COCO labels JSON file. label_filepath = "/path/to/coco-labels.json" dataset = fo.Dataset.from_dir( dataset_type=fo.types.COCODetectionDataset, data_path=data_dir_path, labels_path=label_filepath, ) except ValueError: dataset = fo.load_dataset(my_dataset_name) #fo.delete_dataset(my_dataset_name) elif False: if True: # Create a dataset from a list of images. dataset = fo.Dataset.from_images( ["/path/to/image1.jpg", "/path/to/image2.jpg",] ) elif False: # Create a dataset from a directory of images. dataset = fo.Dataset.from_images_dir("/path/to/images") elif False: # Create a dataset from a glob pattern of images. dataset = fo.Dataset.from_images_patt("/path/to/images/*.jpg") elif False: if True: # Create a dataset from a list of videos dataset = fo.Dataset.from_videos( ["/path/to/video1.mp4", "/path/to/video2.mp4",] ) elif False: # Create a dataset from a directory of videos. dataset = fo.Dataset.from_videos_dir("/path/to/videos") elif False: # Create a dataset from a glob pattern of videos. dataset = fo.Dataset.from_videos_patt("/path/to/videos/*.mp4") dataset.persistent = True print("Media type = {}.".format(dataset.media_type)) print("Persistence = {}.".format(dataset.persistence)) print("#examples = {}.".format(len(dataset))) #print("#examples = {}.".format(dataset.count())) # Print some information about the dataset. print(dataset) # Print a ground truth detection. sample = dataset.first() if sample.ground_truth and hasattr(sample.ground_truth, "detections"): print(sample.ground_truth.detections[0])
def dataset_view_test(): dataset = foz.load_zoo_dataset("quickstart") view = dataset.view() print(view) print("Media type = {}.".format(view.media_type)) print("#examples = {}.".format(len(view))) #print("#examples = {}.".format(view.count())) #-------------------- #for sample in view: # print(sample) #-------------------- sample = view.take(1).first() print(type(sample)) # fiftyone.core.sample.SampleView. same_sample = view[sample.id] also_same_sample = view[sample.filepath] #view[other_sample_id] # KeyError: sample non-existent or not in view. # List available view operations on a dataset. print(dataset.list_view_stages()) #-------------------- # View stages. # Random set of 100 samples from the dataset random_view = dataset.take(100) print("#examples = {}.".format(len(random_view))) # Sort 'random_view' by filepath. sorted_random_view = random_view.sort_by("filepath") #-------------------- # Slicing. # Skip the first 2 samples and take the next 3. range_view1 = dataset.skip(2).limit(3) # Equivalently, using array slicing. range_view2 = dataset[2:5] view = dataset[10:100] sample10 = view.first() sample100 = view.last() also_sample10 = view[sample10.id] assert also_sample10.filepath == sample10.filepath also_sample100 = view[sample100.filepath] assert sample100.id == also_sample100.id assert sample10 is not also_sample10 # A boolean array encoding the samples to extract. bool_array = np.array(dataset.values("uniqueness")) > 0.7 view = dataset[bool_array] print("#examples = {}.".format(len(view))) ids = itertools.compress(dataset.values("id"), bool_array) view = dataset.select(ids) print("#examples = {}.".format(len(view))) # ViewExpression defining the samples to match. expr = fo.ViewField("uniqueness") > 0.7 # Use a match() expression to define the view. view = dataset.match(expr) print("#examples = {}.".format(len(view))) # Equivalent: using boolean expression indexing is allowed too. view = dataset[expr] print("#examples = {}.".format(len(view))) #-------------------- # Sorting. view = dataset.sort_by("filepath") view = dataset.sort_by("filepath", reverse=True) # Sort by number of detections in 'Detections' field 'ground_truth'. view = dataset.sort_by(fo.ViewField("ground_truth.detections").length(), reverse=True) print(len(view.first().ground_truth.detections)) print(len(view.last().ground_truth.detections)) #-------------------- # Shuffling. # Randomly shuffle the order of the samples in the dataset. view1 = dataset.shuffle() # Randomly shuffle the samples in the dataset with a fixed seed. view2 = dataset.shuffle(seed=51) print(view2.first().id) also_view2 = dataset.shuffle(seed=51) print(also_view2.first().id) #-------------------- # Random sampling. # Take 5 random samples from the dataset. view1 = dataset.take(5) # Take 5 random samples from the dataset with a fixed seed. view2 = dataset.take(5, seed=51) print(view2.first().id) also_view2 = dataset.take(5, seed=51) print(also_view2.first().id) #-------------------- # Filtering. # Populate metadata on all samples. dataset.compute_metadata() # Samples whose image is less than 48 KB. small_images_view = dataset.match(fo.ViewField("metadata.size_bytes") < 48 * 1024) # Samples that contain at least one prediction with confidence above 0.99 or whose label ifs "cat" or "dog". match = (fo.ViewField("confidence") > 0.99) | (fo.ViewField("label").is_in(("cat", "dog"))) matching_view = dataset.match(fo.ViewField("predictions.detections").filter(match).length() > 0) # The validation split of the dataset. val_view = dataset.match_tags("validation") # Union of the validation and test splits. val_test_view = dataset.match_tags(("validation", "test")) # The subset of samples where predictions have been computed. predictions_view = dataset.exists("predictions") # Get the IDs of two random samples. sample_ids = [ dataset.take(1).first().id, dataset.take(1).first().id, ] # Include only samples with the given IDs in the view. selected_view = dataset.select(sample_ids) # Exclude samples with the given IDs from the view. excluded_view = dataset.exclude(sample_ids) for sample in dataset.select_fields("ground_truth"): print(sample.id) # OKAY: 'id' is always available print(sample.ground_truth) # OKAY: 'ground_truth' was selected #print(sample.predictions) # AttributeError: 'predictions' was not selected for sample in dataset.exclude_fields("predictions"): print(sample.id) # OKAY: 'id' is always available print(sample.ground_truth) # OKAY: 'ground_truth' was not excluded #print(sample.predictions) # AttributeError: 'predictions' was excluded #-------------------- # Date-based views. dataset = fo.Dataset() dataset.add_samples( [ fo.Sample( filepath="image1.png", capture_date=datetime(2021, 8, 24, 1, 0, 0), ), fo.Sample( filepath="image2.png", capture_date=datetime(2021, 8, 24, 2, 0, 0), ), fo.Sample( filepath="image3.png", capture_date=datetime(2021, 8, 24, 3, 0, 0), ), ] ) query_date = datetime(2021, 8, 24, 2, 1, 0) query_delta = timedelta(minutes=30) # Samples with capture date after 2021-08-24 02:01:00. view = dataset.match(fo.ViewField("capture_date") > query_date) print(view) # Samples with capture date within 30 minutes of 2021-08-24 02:01:00. view = dataset.match(abs(fo.ViewField("capture_date") - query_date) < query_delta) print(view)