def select_multi_class_train_eval_dataset(dataset_name, prediction_field, train_size): dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("auto_aug_predict_tiled.detections").length()>0).shuffle() print("{}: {}".format(norm_model,len(view))) if len(view) >= 200: for sample in view[:100]: sample.tags.append("multi_class_train") sample.save() for sample in view[100:]: sample.tags.append("multi_class_eval") sample.save()
def main( dataset_name, label_map_path, groundtruth_loc_field_name, groundtruth_img_labels_field_name, prediction_field_name, iou_threshold, ): dataset = fo.load_dataset(dataset_name) evaluate_dataset( dataset=dataset, label_map_path=label_map_path, groundtruth_loc_field_name=groundtruth_loc_field_name, groundtruth_img_labels_field_name=groundtruth_img_labels_field_name, prediction_field_name=prediction_field_name, iou_threshold=iou_threshold, ) print("Cloning True Positives to a new field...") tp_view = dataset.filter_detections(prediction_field_name, F("eval") == "true_positive") tp_view.clone_sample_field(prediction_field_name, prediction_field_name + "_TP") print("Cloning False Positives to a new field...") fp_view = dataset.filter_detections(prediction_field_name, F("eval") == "false_positive") fp_view.clone_sample_field(prediction_field_name, prediction_field_name + "_FP")
def test_accuracy_resnet50(capsys): detectionDir = "/home/Develop/Dataset/Imagenet/Validation-2012/prediction" dataset = fo.load_dataset("imagenet_validation") classes = dataset.default_classes with capsys.disabled(): with fo.ProgressBar() as pb: detections = [] for sample in pb(dataset): head, tail = os.path.split(sample.filepath) filename, file_extension = os.path.splitext(tail) cvsPath = detectionDir + "/" + filename + ".txt" with open(cvsPath, "r") as file: reader = csv.reader(file) for row in reader: cls_index = row[0] sample["resnet50"] = fo.Classification( label=classes[int(cls_index)], ) sample.save() results = dataset.evaluate_classifications( "resnet50", gt_field="ground_truth", eval_key="resnet50_eval", ) print(results.metrics()) assert results.metrics()["accuracy"] > 0.74
def normalize_model_values(dataset_name): """Standardize plane model string values. The plane model string values received from ADS-B broadcasts are not standardized. An A319 model, for instance, could be represented as A319-112 or A319-115 or A39-132. This function helps standardize all model strings. Args: dataset - a voxel51 dataset object Returns: dataset - a voxel51 dataset object """ # TODO: Need to add testing. dataset = fo.load_dataset(dataset_name) # json file storing plane model strings as key and standardized model # as value with open("plane_model_dict.json", "r") as file_path: plane_model_dict = json.load(file_path) # Loop thru each row of model column for sample in dataset.exists("model_name"): model = sample["model_name"].label norm_model = plane_model_dict.get(model, None) #print("{} = {}".format(model, norm_model)) if norm_model is not None: sample["norm_model"] = fo.Classification(label=norm_model) sample.save() else: logging.info("Match not found for: %s", model) return dataset
def load_fo_dataset(dir, name): if not fo.dataset_exists(name): dataset = fo.Dataset.from_dir(dir, fo.types.COCODetectionDataset, name) dataset.persistent = True else: dataset = fo.load_dataset(name) return dataset
def cleanup_fo_dataset(): dataset_test_names = ["_iv_test", "_iv_test_1"] yield for dataset_name in dataset_test_names: if dataset_name in list_datasets(): ds = load_dataset(dataset_name) ds.delete() del ds
def add_normalized_model_to_plane_detection(dataset_name, prediction_field, output_field): dataset = fo.load_dataset(dataset_name) for sample in dataset.exists("norm_model"): new_detections = sample[prediction_field].copy() for detection in new_detections["detections"]: detection["label"] = sample["norm_model"]["label"] sample[output_field] = new_detections sample.save()
def build_multi_class_train_eval_dataset(dataset_name): dataset = fo.load_dataset(dataset_name) norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24") unique_aircraft = view.distinct("icao24.label") num_unique_aircrarft = len(unique_aircraft) if num_unique_aircrarft > 1: _tag_samples_by_icao24(dataset,unique_aircraft[0], "multi_class_train") for icao24 in unique_aircraft[1:]: _tag_samples_by_icao24(dataset,icao24, "multi_class_eval") print("{}: {}".format(norm_model,len(unique_aircraft))) print("\tTrain:{}".format(unique_aircraft[0])) print("\tEval:{}".format(unique_aircraft[1:]))
def evaluate_detection_model(dataset_name, prediction_field, evaluation_key, ground_truth_field): dataset = fo.load_dataset(dataset_name) view = dataset.match_tags("multi_class_eval") # setting an empty detections field if there isn't one for sample in view: if sample[ground_truth_field] == None: sample[ground_truth_field] = fo.Detections(detections=[]) sample.save() if sample[prediction_field] == None: sample[prediction_field] = fo.Detections(detections=[]) sample.save() results = view.evaluate_detections(prediction_field, gt_field=ground_truth_field, eval_key=evaluation_key, compute_mAP=True) # Get the 10 most common classes in the dataset counts = view.count_values( "{}.detections.label".format(ground_truth_field)) classes = sorted(counts, key=counts.get, reverse=True)[:15] # Print a classification report for the top-10 classes results.print_report(classes=classes) # Print some statistics about the total TP/FP/FN counts logging.info("TP: %d" % dataset.sum(evaluation_key + "_tp")) logging.info("FP: %d" % dataset.sum(evaluation_key + "_fp")) logging.info("FN: %d" % dataset.sum(evaluation_key + "_fn")) # Create a view that has samples with the most false positives first, and # only includes false positive boxes in the `predictions` field eval_view = view.sort_by(evaluation_key + "_fp", reverse=True).filter_labels( prediction_field, F(evaluation_key) == "fp") logging.info("mAP: {}".format(results.mAP())) plot = results.plot_pr_curves(classes=classes, backend="matplotlib") plot.savefig("/tf/dataset-export/" + evaluation_key + '_pr_curves.png') plot = results.plot_confusion_matrix(classes=classes, backend="matplotlib") plot.savefig("/tf/dataset-export/" + evaluation_key + '_confusion_matrix.png')
def random_multi_class_train_eval_dataset(dataset_name): """Splits the dataset into Training and Eval samples. For aircraft models with more than one example, the aircraft bodies will be divide, 75% to Train and 25% to Eval. The samples are separated using tags. Args: dataset_name ([type]): [description] """ dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("multi_class_detections.detections").length()>0).shuffle() unique_aircraft = view.distinct("icao24.label") train_count = math.floor(len(view)*.75) eval_count = math.floor(len(view)*.25) for sample in view[:train_count]: sample.tags.append("multi_class_train") sample.save() for sample in view[train_count:]: sample.tags.append("multi_class_eval") sample.save() print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(view),train_count,eval_count)) view = dataset.match(F("multi_class_detections.detections").length()==0).take(250) for sample in view: sample.tags.append("multi_class_train") sample.save()
def export_yolo_multi_class_dataset(dataset_name, label_field, tag, export_name): export_title = tag + "_" + export_name export_dir = "/tf/dataset-export/" + export_title + "/" logging.info("Export samples tagged: {} to {} and labeling them using: {}".format(tag,export_dir,label_field)) dataset = fo.load_dataset(dataset_name) view = dataset.match_tags(tag).sort_by("norm_model.label") # The type of dataset to export # Any subclass of `fiftyone.types.Dataset` is supported dataset_type = fo.types.YOLOv4Dataset # Export the dataset! view.export( export_dir=export_dir, dataset_type=dataset_type, label_field=label_field) export_file = "/tf/dataset-export/" + export_title + ".tar.gz" # nosemgrep:github.workflows.config.subprocess-shell-true subprocess.run("/bin/tar -zcvf {} {}".format(export_file, export_dir), shell=True)
def delete_dataset(name): """Deletes the FiftyOne dataset with the given name. If reference to the dataset exists in memory, only `Dataset.name` and `Dataset.deleted` will be valid attributes. Accessing any other attributes or methods will raise a :class:`DatasetError` If reference to a sample exists in memory, the sample's dataset will be "unset" such that `sample.in_dataset == False` Args: name: the name of the dataset Raises: ValueError: if the dataset is not found """ dataset = fo.load_dataset(name) dataset.delete()
def split_multi_class_train_eval_dataset(dataset_name): """Splits the dataset into Training and Eval samples. For aircraft models with more than one example, the aircraft bodies will be divide, 75% to Train and 25% to Eval. The samples are separated using tags. Args: dataset_name (): The name of the Voxel51 dataset to use """ dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") # Remove any existing tags from the dataset to ensure that you are starting fresh logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass # find all of the unique normalized aircraft models norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24").shuffle() unique_aircraft = view.distinct("icao24.label") if len(unique_aircraft) > 1: train_aircraft = unique_aircraft[:math.floor(len(unique_aircraft)*.75)] eval_aircraft = unique_aircraft[math.floor(len(unique_aircraft)*.75):] print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(unique_aircraft),len(train_aircraft),len(eval_aircraft))) for icao24 in train_aircraft[:1]: _tag_samples_by_icao24(dataset,icao24, "multi_class_train", False) for icao24 in train_aircraft[1:]: _tag_samples_by_icao24(dataset,icao24, "multi_class_train", True) for icao24 in eval_aircraft: _tag_samples_by_icao24(dataset,icao24, "multi_class_eval", True)
def load_sample( self, sample: Mapping[str, str] ) -> Mapping[str, Union[torch.Tensor, torch.Size]]: _fo_dataset = fo.load_dataset(self._fo_dataset_name) img_path = sample[DefaultDataKeys.INPUT] fo_sample = _fo_dataset[img_path] img: torch.Tensor = torchvision.io.read_image(img_path) # CxHxW img_labels: torch.Tensor = torch.from_numpy( fo_sample[self.label_field].mask) # HxW sample[DefaultDataKeys.INPUT] = img.float() sample[DefaultDataKeys.TARGET] = img_labels.float() sample[DefaultDataKeys.METADATA] = { "filepath": img_path, "size": img.shape, } return sample
def create_voxel51_dataset(dataset_name): """Create a voxel51 dataset or load existing one. Args: dataset_name: name of the voxel51 dataset to create or load Returns: dataset (voxel51 dataset object) """ # attempt to open dataset try: dataset = fo.Dataset(name=dataset_name) dataset.persistent = True logging.info("Created %s dataset", dataset_name) # If the dataset already exists, load it instead except ValueError: dataset = fo.load_dataset(name=dataset_name) logging.info("Dataset already exists. Loaded %s dataset", dataset_name) return dataset
def merge_labelbox_dataset_with_voxel51(voxel51_dataset_name, labelbox_json_path, labelbox_id_field="labelbox_id"): """Merge the labels created via labelbox with the voxel51 dataset. The json referenced in the labelbox_json_path must be manually downloaded from the labelbox website. Args: voxel51_dataset_name (str) labelbox_json_path (str) - a path to lablebox_id_field (str) - unique ID required for merging of dataset Returns: None """ dataset = fo.load_dataset(voxel51_dataset_name) foul.import_from_labelbox(dataset, labelbox_json_path, labelbox_id_field=labelbox_id_field)
def test_accuracy_u2Squared(capsys): """ Test U2Squared FScore on DUST-TE Dataset """ dataset = fo.load_dataset("duts_te_validation") DatasetPathPrediction = "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Image-Pred-Mask/" with capsys.disabled(): with fo.ProgressBar() as pb: for sample in dataset: head, tail = os.path.split(sample.filepath) maskPredPath = DatasetPathPrediction + tail print(maskPredPath) maskPred = cv2.imread(maskPredPath, cv2.IMREAD_UNCHANGED) ret, maskPred = cv2.threshold(maskPred, 127, 255, cv2.THRESH_BINARY) sample["u2squared"] = fo.Segmentation(mask=maskPred) sample.save() results = dataset.evaluate_segmentations( "u2squared", gt_field="ground_truth", eval_key="eval_segmentation", ) print(results.metrics()) # 0.80 is the value reported in the paper as weighted F-measure for DUTS-TE Dataset assert (results.metrics()["fscore"] > 0.80)
def create_fo_dataset( detections: List[Any], dataset_name: str, exist_ok: bool = False, persistent=False, field_name: str = "icevision_record", transformations=None, # Undo postprocess via sample: icevision.models.inference.postprocess_bbox undo_bbox_tfms_fn: Callable[[Union[ List[int], BBox]], List[int]] = None, # Undo postprocess custom fn ) -> Dataset: """ Takes an iter of either Predictions or records and created or adds them to a fo.Dataset A record contains a preprocessed image of you data. However, fiftyone expects an filepath to you image. Thus, we need to undo the pre-processing of the bounding box. Therefore, we have provide 2 methods - The postprocess_bbox() function form icevision.models.inference - A custom undo_bbox_tfms_fn If sample is not defined, a new sample is created. Parameters ---------- detections: An iterable of iv Predictions or records dataset_name: Name of dataset exist_ok: Whether the data should be added to an existing dataset (active Opt-in) persistent: Whether a dataset should be persistent on creation field_name: The field name that is provided if records are in the iterable transformations: list of model-pre-processing transforms undo_bbox_tfms_fn: Custom function, that undoes transformations Returns ------- fo.Dataset """ if not fiftyone_available: raise ImportError("Fiftyone is not installed on you system.") # Validate input if dataset_name in list_datasets(): if exist_ok: _internal_dataset = load_dataset(dataset_name) else: raise ValueError( f"Dataset with name {dataset_name} already exist and exist_ok is {exist_ok}" ) else: _internal_dataset = Dataset(name=dataset_name, persistent=persistent) _sample_list = [] for element in detections: if isinstance(element, Prediction): _sample_list.append( convert_prediction_to_fo_sample( element, transformations=transformations, undo_bbox_tfms_fn=undo_bbox_tfms_fn, )) else: _sample_list.append( convert_record_to_fo_sample( element, field_name=field_name, transformations=transformations, undo_bbox_tfms_fn=undo_bbox_tfms_fn, )) _internal_dataset.add_samples(_sample_list) return _internal_dataset
def load_zoo_dataset( name, split=None, splits=None, dataset_dir=None, download_if_necessary=True, drop_existing_dataset=False, ): """Loads the dataset of the given name from the FiftyOne Dataset Zoo as a :class:`fiftyone.core.dataset.Dataset`. By default, the dataset will be downloaded if it does not already exist in the specified directory. Args: name: the name of the zoo dataset to load. Call :func:`list_zoo_datasets` to see the available datasets split (None) a split to load, if applicable. Typical values are ``("train", "validation", "test")``. If neither ``split`` nor ``splits`` are provided, all available splits are loaded. Consult the documentation for the :class:`ZooDataset` you specified to see the supported splits splits (None): a list of splits to load, if applicable. Typical values are ``("train", "validation", "test")``. If neither ``split`` nor ``splits`` are provided, all available splits are loaded. Consult the documentation for the :class:`ZooDataset` you specified to see the supported splits dataset_dir (None): the directory in which the dataset is stored or will be downloaded. By default, :func:`fiftyone.core.dataset.get_default_dataset_dir` is used download_if_necessary (True): whether to download the dataset if it is not found in the specified dataset directory drop_existing_dataset (False): whether to drop an existing dataset with the same name if it exists Returns: a :class:`fiftyone.core.dataset.Dataset` """ splits = _parse_splits(split, splits) if download_if_necessary: info, dataset_dir = download_zoo_dataset(name, splits=splits, dataset_dir=dataset_dir) zoo_dataset = info.get_zoo_dataset() else: zoo_dataset, dataset_dir = _parse_dataset_details(name, dataset_dir) info = zoo_dataset.load_info(dataset_dir) dataset_name = zoo_dataset.name if splits is not None: dataset_name += "-" + "-".join(splits) if fo.dataset_exists(dataset_name): if not drop_existing_dataset: logger.info( "Loading existing dataset '%s'. To reload from disk, first " "delete the existing dataset", dataset_name, ) return fo.load_dataset(dataset_name) fo.delete_dataset(dataset_name) if splits is None and zoo_dataset.has_splits: splits = zoo_dataset.supported_splits dataset = fo.Dataset(dataset_name) dataset_type = info.get_dataset_type() if splits: for split in splits: split_dir = zoo_dataset.get_split_dir(dataset_dir, split) tags = [split] logger.info("Loading '%s' split '%s'", zoo_dataset.name, split) dataset.add_dir(split_dir, dataset_type, tags=tags) else: logger.info("Loading '%s'", zoo_dataset.name) dataset.add_dir(dataset_dir, dataset_type) if info.classes is not None: dataset.info["classes"] = info.classes dataset.save() return dataset
def run_detection_model(dataset_name, training_name, prediction_field): model_path = ( "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model" ) min_score = 0.5 # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet. logging.info("Loading model...") start_time = time.time() tf.keras.backend.clear_session() detect_fn = tf.saved_model.load(model_path) infer = detect_fn.signatures["serving_default"] end_time = time.time() elapsed_time = end_time - start_time logging.info("Loading model took: " + str(elapsed_time) + "s") category_index = _load_label_map(training_name) dataset = fo.load_dataset(dataset_name) for sample in dataset.select_fields("filepath"): start_time = time.time() img = load_img(sample.filepath) img_array = img_to_array(img) input_tensor = np.expand_dims(img_array, 0) detections = detect_fn(input_tensor) exportDetections = [] for i, detectScore in enumerate(detections["detection_scores"][0]): if detectScore > min_score: print( "\t- {}: {}".format( _find_class_name(category_index, int(detections["detection_classes"][0][i])), detections["detection_scores"][0][i], ) ) label = _find_class_name(category_index, int(detections["detection_classes"][0][i])) confidence = detections["detection_scores"][0][i] # TF Obj Detect bounding boxes are: [ymin, xmin, ymax, xmax] # For Voxel 51 - Bounding box coordinates should be relative values # in [0, 1] in the following format: # [top-left-x, top-left-y, width, height] x1 = detections["detection_boxes"][0][i][1] y1 = detections["detection_boxes"][0][i][0] x2 = detections["detection_boxes"][0][i][3] y2 = detections["detection_boxes"][0][i][2] w = x2 - x1 h = y2 - y1 bbox = [x1, y1, w, h] exportDetections.append( fo.Detection(label=label, bounding_box=bbox, confidence=confidence) ) # Store detections in a field name of your choice sample[prediction_field] = fo.Detections(detections=exportDetections) sample.save() end_time = time.time() print("Processing {} took: {}s".format(sample.filepath, end_time - start_time))
def dataset_test(): if True: my_dataset_name = "quickstart" if my_dataset_name in fo.list_datasets(): dataset = fo.load_dataset(my_dataset_name) else: dataset = foz.load_zoo_dataset(my_dataset_name) elif False: dataset = foz.load_zoo_dataset( "coco-2017", split="validation", dataset_name="evaluate-detections-tutorial", ) elif False: dataset = foz.load_zoo_dataset( "open-images-v6", split="validation", max_samples=100, seed=51, shuffle=True, ) elif True: print("Datasets = {}.".format(fo.list_datasets())) my_dataset_name = "my_dataset" try: # REF [site] >> https://voxel51.com/docs/fiftyone/user_guide/dataset_creation/datasets.html if True: dataset_dir_path = "/path/to/data" dataset = fo.Dataset.from_dir( dataset_dir=dataset_dir_path, dataset_type=fo.types.ImageClassificationDirectoryTree, name=my_dataset_name, ) elif False: # The directory containing the source images. data_dir_path = "/path/to/images" # The path to the COCO labels JSON file. label_filepath = "/path/to/coco-labels.json" dataset = fo.Dataset.from_dir( dataset_type=fo.types.COCODetectionDataset, data_path=data_dir_path, labels_path=label_filepath, ) except ValueError: dataset = fo.load_dataset(my_dataset_name) #fo.delete_dataset(my_dataset_name) elif False: if True: # Create a dataset from a list of images. dataset = fo.Dataset.from_images( ["/path/to/image1.jpg", "/path/to/image2.jpg",] ) elif False: # Create a dataset from a directory of images. dataset = fo.Dataset.from_images_dir("/path/to/images") elif False: # Create a dataset from a glob pattern of images. dataset = fo.Dataset.from_images_patt("/path/to/images/*.jpg") elif False: if True: # Create a dataset from a list of videos dataset = fo.Dataset.from_videos( ["/path/to/video1.mp4", "/path/to/video2.mp4",] ) elif False: # Create a dataset from a directory of videos. dataset = fo.Dataset.from_videos_dir("/path/to/videos") elif False: # Create a dataset from a glob pattern of videos. dataset = fo.Dataset.from_videos_patt("/path/to/videos/*.mp4") dataset.persistent = True print("Media type = {}.".format(dataset.media_type)) print("Persistence = {}.".format(dataset.persistence)) print("#examples = {}.".format(len(dataset))) #print("#examples = {}.".format(dataset.count())) # Print some information about the dataset. print(dataset) # Print a ground truth detection. sample = dataset.first() if sample.ground_truth and hasattr(sample.ground_truth, "detections"): print(sample.ground_truth.detections[0])
def run_detection_model_tiled( dataset_name, training_name, prediction_field, sample_tag, tile_string, tile_overlap:int, iou_threshold:float, ): """Runs the detection model over the entire dataset using a tiling approach Args: interpreter: The ``tf.lite.Interpreter`` to update. size (tuple): The original image size as (width, height) tuple. resize: A function that takes a (width, height) tuple, and returns an image resized to those dimensions. Returns: The resized tensor with zero-padding as tuple (resized_tensor, resize_ratio). """ model_path = ( "/tf/model-export/" + training_name + "/image_tensor_saved_model/saved_model" ) min_score = 0.50 # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet. input_tensor_size = 512 logging.info("Loading model...") start_time = time.time() tf.keras.backend.clear_session() detect_fn = tf.saved_model.load(model_path) infer = detect_fn.signatures["serving_default"] print(infer.structured_outputs) print(infer) end_time = time.time() elapsed_time = end_time - start_time logging.info("Loading model took: " + str(elapsed_time) + "s") category_index = _load_label_map(training_name) dataset = fo.load_dataset(dataset_name) # Go through all of the samples in the dataset for sample in dataset.match_tags(sample_tag).select_fields("filepath"): start_time = time.time() img = load_img( sample.filepath, ) img_size = img.size img_width, img_height = img_size objects_by_label = dict() exportDetections = [] predicted_objects = [] tile_sizes = [] for tile_size in tile_string.split(","): tile_size = tile_size.split("x") tile_sizes.append([int(tile_size[0]), int(tile_size[1])]) # Collect all of the detections for each tile size: for tile_size in tile_sizes: tile_width, tile_height = tile_size # For tiles that are smaller that the image size, calculated all of the different # Sub images that are needed for tile_location in _tiles_location_gen(img_size, tile_size, tile_overlap): tile = img.crop(tile_location) old_size = tile.size # old_size[0] is in (width, height) format ratio = float(input_tensor_size) / max(old_size) if ratio > 1: continue new_size = tuple([int(x * ratio) for x in old_size]) im = tile.resize(new_size, Image.ANTIALIAS) # create a new image and paste the resized on it new_im = Image.new("RGB", (input_tensor_size, input_tensor_size)) new_im.paste( im, (0, 0) ) # ((input_tensor_size-new_size[0])//2, (input_tensor_size-new_size[1])//2)) img_array = img_to_array(new_im, dtype="uint8") img_batch = np.array([img_array]) detections = detect_fn(img_batch) for i, detectScore in enumerate(detections["detection_scores"][0]): if detectScore > min_score: x1 = ( detections["detection_boxes"][0][i][1].numpy() * input_tensor_size ) # tile_width y1 = ( detections["detection_boxes"][0][i][0].numpy() * input_tensor_size ) # tile_height x2 = ( detections["detection_boxes"][0][i][3].numpy() * input_tensor_size ) # tile_width y2 = ( detections["detection_boxes"][0][i][2].numpy() * input_tensor_size ) # tile_height bbox = [x1, y1, x2, y2] scaled_bbox = [] for number in bbox: scaled_bbox.append(number / ratio) repositioned_bbox = _reposition_bounding_box( scaled_bbox, tile_location ) confidence = detections["detection_scores"][0][i] label = _find_class_name( category_index, int(detections["detection_classes"][0][i]) ) objects_by_label.setdefault(label, []).append( Object(label, confidence, repositioned_bbox) ) predicted_objects.append(Object(label, confidence, repositioned_bbox)) # for label, objects in objects_by_label.items(): # idxs = _non_max_suppression(objects, iou_threshold) # for idx in idxs: # x1 = objects[idx].bbox[0] / img_width # y1 = objects[idx].bbox[1] / img_height # x2 = objects[idx].bbox[2] / img_width # y2 = objects[idx].bbox[3] / img_height # w = x2 - x1 # h = y2 - y1 # bbox = [x1, y1, w, h] # exportDetections.append( # fo.Detection( # label=objects[idx].label, # bounding_box=bbox, # confidence=objects[idx].score, # ) # ) objects = predicted_objects idxs = _non_max_suppression(objects, iou_threshold) for idx in idxs: x1 = objects[idx].bbox[0] / img_width y1 = objects[idx].bbox[1] / img_height x2 = objects[idx].bbox[2] / img_width y2 = objects[idx].bbox[3] / img_height w = x2 - x1 h = y2 - y1 bbox = [x1, y1, w, h] exportDetections.append( fo.Detection( label=objects[idx].label, bounding_box=bbox, confidence=objects[idx].score, ) ) # Store detections in a field name of your choice sample[prediction_field] = fo.Detections(detections=exportDetections) sample.save() end_time = time.time() print("{} - Processing {} took: {}s".format(len(exportDetections),sample.filepath, end_time - start_time)) for detect in exportDetections: print("\t - {} {}%".format(detect.label,detect.confidence))
export_voxel51_dataset_to_tfrecords, get_num_classes_from_label_map, load_base_models_json, save_mapping_to_file, set_filenames, _create_list_of_class_names, ) # from labelbox_utils import merge_labelbox_dataset_with_voxel51 from main import read_config # pylint: disable=C0103, W0107 # delete datasets first to create repeatable test environment try: fo.load_dataset("test").delete() except ValueError: pass try: fo.load_dataset("test_detection_mapping").delete() except ValueError: pass def test_build_image_list(): """Test build_image_list().""" output = build_image_list("test") assert output[0]["bearing"] == "194" assert output[0]["distance"] == "11882" assert output[0]["elevation"] == "50" assert output[0][
def add_faa_data_to_voxel51_dataset( voxel51_dataset_name, faa_master_dataset_path, faa_reference_dataset_path ): """Add FAA data to each entry in voxel51 dataset. Args: voxel51_dataset (str) - the voxel51 dataset name faa_master_dataset_path - path to FAA master dataset .txt faa_reference_dataset_path - path to FAA reference dataset .txt Returns: dataset (voxel51 dataset object) """ subprocess.run("./install_faa_data.sh", check=True) # import master dataset and strip white space from beacon column planes_master = pd.read_csv(faa_master_dataset_path, index_col="MODE S CODE HEX") planes_master.index = planes_master.index.str.strip() planes_reference = pd.read_csv( faa_reference_dataset_path, index_col="CODE", encoding="utf-8-sig" ) dataset = fo.load_dataset(voxel51_dataset_name) for row in dataset: # render plane_id in lowercase letters plane_icao24 = row["icao24"].label.upper() # find plane model code associated with the icao24 code, i.e. mode s code hex try: model_code = planes_master.loc[plane_icao24, "MFR MDL CODE"] except IndexError: logging.info( "Plane ID not found in master dataset. Plane ID: %s", plane_icao24 ) continue except KeyError: logging.info( "Plane ID not found in master dataset. Plane ID: %s", plane_icao24 ) continue # find reference row with all relevant model data plane_reference_row = planes_reference.loc[model_code] # exract all relevant data from plane_reference_row # convert all fields to string manufacturer = str(plane_reference_row["MFR"]).rstrip() model_name = str(plane_reference_row["MODEL"]).rstrip() aircraft_type = str(plane_reference_row["TYPE-ACFT"]) engine_type = str(plane_reference_row["TYPE-ENG"]) num_engines = str(plane_reference_row["NO-ENG"]) num_seats = str(plane_reference_row["NO-SEATS"]) aircraft_weight = str(plane_reference_row["AC-WEIGHT"]) # norm_model = normalize_single_model_value(model_name) # store values in voxel51 dataset row row["model_code"] = fo.Classification(label=model_code) row["manufacturer"] = fo.Classification(label=manufacturer) row["model_name"] = fo.Classification(label=model_name) row["aircraft_type"] = fo.Classification(label=aircraft_type) row["engine_type"] = fo.Classification(label=engine_type) row["num_engines"] = fo.Classification(label=num_engines) row["num_seats"] = fo.Classification(label=num_seats) row["aircraft_weight"] = fo.Classification(label=aircraft_weight) # if norm_model is not None: # sample["norm_model"] = fo.Classification(label=norm_model) row.save() return dataset
def upload_vox51_dataset_to_labelbox( labelbox_api_key, labelbox_dataset_name, labelbox_project_name, voxel51_dataset_name, upload_num_samples: int = 500, upload_tag="train", avoid_tag="eval", resume: bool = False, labelbox_id_field="labelbox_id", ): """Upload a voxel51 dataset to labelbox. Args: labelbox_api_key (str) labelbox_dataset_name (str) labelbox_project_name (str) voxel51_dataset_name (str) lablebox_id_field (str) - unique ID required for upload of dataset upload_num_samples (int) - number of images to randomly choose for upload upload_tag (str) - tag that is added to all of the samples selected for upload avoid_tag (str) - do not select samples with this tag resume (bool) - continue an upload to Labelbox if a prior one failed to complete Returns: None """ # TODO: Some sort of problem related to labelbox ID logging.info("Uploading voxel51 dataset to Labelbox.") client = Client(labelbox_api_key) # must convert PaginatedCollection to list in order to count length projects = list( client.get_projects(where=Project.name == labelbox_project_name)) # ensure there is only labelbox project of specified name num_labelbox_projects = len(projects) if num_labelbox_projects != 1: logging.error( "Expected a single project named: %s but found %s projects", labelbox_project_name, num_labelbox_projects, ) sys.exit(1) project = list(projects)[0] # select proper labelbox dataset # must convert PaginatedCollection to list in order to count length labelbox_datasets = list( project.datasets(where=Dataset.name == labelbox_dataset_name)) # ensure there is only one labelbox dataset of specified name num_labelbox_datasets = len(labelbox_datasets) if num_labelbox_datasets != 1: logging.info( "Expected a single dataset named: {} but found {} projects", labelbox_dataset_name, num_labelbox_datasets, ) sys.exit(1) labelbox_dataset = list(labelbox_datasets)[0] # set up voxel51 and labelbox connections dataset = fo.load_dataset(voxel51_dataset_name) # continue a previous upload that failed if resume: # create a view with the previously selected samples # samples that were successfully uploaded will be skipped view = dataset.match_tags(upload_tag) else: # take random sample of images and upload to labelbox stage = fo.MatchTags(avoid_tag, bool=False) view = dataset.add_stage(stage).shuffle().take(upload_num_samples) # add uplod_tag to all of the samples being sent to labelbox for sample in view: sample.tags.append(upload_tag) sample.save() foul.upload_media_to_labelbox(labelbox_dataset, view, labelbox_id_field)
ret,mask= cv2.threshold(mask, 127,255,cv2.THRESH_BINARY) sample = fo.Sample(filepath=maskPath, ground_truth=fo.Segmentation(mask=mask)) samples.append(sample) dataset.add_samples(samples) # ADD PREDICTION #To add prediction you need # 1) Cycle over all dataset sample GT # 2) For each sample add a custom field u2squared dataset = fo.load_dataset("duts_te_validation") DatasetPathPrediction= "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Image-Pred-Mask/" with fo.ProgressBar() as pb: for sample in dataset: head, tail = os.path.split(sample.filepath) maskPredPath = DatasetPathPrediction + tail maskPred= cv2.imread(maskPredPath,cv2.IMREAD_UNCHANGED) ret,maskPred= cv2.threshold(maskPred, 127,255,cv2.THRESH_BINARY) sample["u2squared"]= fo.Segmentation(mask= maskPred) sample.save()