def select_multi_class_train_eval_dataset(dataset_name, prediction_field, train_size): dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("auto_aug_predict_tiled.detections").length()>0).shuffle() print("{}: {}".format(norm_model,len(view))) if len(view) >= 200: for sample in view[:100]: sample.tags.append("multi_class_train") sample.save() for sample in view[100:]: sample.tags.append("multi_class_eval") sample.save()
def main( dataset_name, label_map_path, groundtruth_loc_field_name, groundtruth_img_labels_field_name, prediction_field_name, iou_threshold, ): dataset = fo.load_dataset(dataset_name) evaluate_dataset( dataset=dataset, label_map_path=label_map_path, groundtruth_loc_field_name=groundtruth_loc_field_name, groundtruth_img_labels_field_name=groundtruth_img_labels_field_name, prediction_field_name=prediction_field_name, iou_threshold=iou_threshold, ) print("Cloning True Positives to a new field...") tp_view = dataset.filter_detections(prediction_field_name, F("eval") == "true_positive") tp_view.clone_sample_field(prediction_field_name, prediction_field_name + "_TP") print("Cloning False Positives to a new field...") fp_view = dataset.filter_detections(prediction_field_name, F("eval") == "false_positive") fp_view.clone_sample_field(prediction_field_name, prediction_field_name + "_FP")
def test_re_match(self): result = list(self.dataset.match(F("filepath").re_match("two\.png$"))) self.assertIs(len(result), 1) self.assertTrue(result[0].filepath.endswith("two.png")) # case-insentive match result = list( self.dataset.match( F("filepath").re_match("TWO\.PNG$", options="i"))) self.assertIs(len(result), 1) self.assertTrue(result[0].filepath.endswith("two.png"))
def test_filter_detections(self): self.sample1["test_dets"] = fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="friend", confidence=0.3, bounding_box=[0.25, 0, 0.5, 0.1], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="big bro", confidence=0.6, bounding_box=[0, 0, 0.1, 0.5], ), ]) self.sample1.save() self.sample2["test_dets"] = fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.99, bounding_box=[0, 0, 1, 1], ), fo.Detection( label="tricam", confidence=0.2, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="hex", confidence=0.8, bounding_box=[0.35, 0, 0.2, 0.25], ), ]) self.sample2.save() view = self.dataset.filter_detections( "test_dets", (F("confidence") > 0.5) & (F("label") == "friend")) for sv in view: for det in sv.test_dets.detections: self.assertGreater(det.confidence, 0.5) self.assertEqual(det.label, "friend")
def _tag_samples_by_icao24(dataset, icao24, tag, only_aircraft_detected=True): """Adds a tag to all samples with a matching ICAO24 Args: dataset (Voxel51 Dataset): Dataset to work with icao24 (string): the ICAO24 identifier to search for tag (string): Tag to add """ if only_aircraft_detected: view = dataset.filter_labels("icao24", (F("label") == icao24)).match(F("multi_class_detections.detections").length()>0) else: view = dataset.filter_labels("icao24", (F("label") == icao24)) logging.info("\t[{}] - Tagging {} aircraft as {}".format(icao24,len(view),tag)) for sample in view: sample.tags.append(tag) sample.save()
def random_multi_class_train_eval_dataset(dataset_name): """Splits the dataset into Training and Eval samples. For aircraft models with more than one example, the aircraft bodies will be divide, 75% to Train and 25% to Eval. The samples are separated using tags. Args: dataset_name ([type]): [description] """ dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).match(F("multi_class_detections.detections").length()>0).shuffle() unique_aircraft = view.distinct("icao24.label") train_count = math.floor(len(view)*.75) eval_count = math.floor(len(view)*.25) for sample in view[:train_count]: sample.tags.append("multi_class_train") sample.save() for sample in view[train_count:]: sample.tags.append("multi_class_eval") sample.save() print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(view),train_count,eval_count)) view = dataset.match(F("multi_class_detections.detections").length()==0).take(250) for sample in view: sample.tags.append("multi_class_train") sample.save()
def test_filter_classifications(self): self.sample1["test_clfs"] = fo.Classifications(classifications=[ fo.Classification( label="friend", confidence=0.9, ), fo.Classification( label="friend", confidence=0.3, ), fo.Classification( label="stopper", confidence=0.1, ), fo.Classification( label="big bro", confidence=0.6, ), ]) self.sample1.save() self.sample2["test_clfs"] = fo.Classifications(classifications=[ fo.Classification( label="friend", confidence=0.99, ), fo.Classification( label="tricam", confidence=0.2, ), fo.Classification( label="hex", confidence=0.8, ), ]) self.sample2.save() view = self.dataset.filter_classifications( "test_clfs", (F("confidence") > 0.5) & (F("label") == "friend")) for sv in view: for clf in sv.test_clfs.classifications: self.assertGreater(clf.confidence, 0.5) self.assertEqual(clf.label, "friend")
def build_multi_class_train_eval_dataset(dataset_name): dataset = fo.load_dataset(dataset_name) norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24") unique_aircraft = view.distinct("icao24.label") num_unique_aircrarft = len(unique_aircraft) if num_unique_aircrarft > 1: _tag_samples_by_icao24(dataset,unique_aircraft[0], "multi_class_train") for icao24 in unique_aircraft[1:]: _tag_samples_by_icao24(dataset,icao24, "multi_class_eval") print("{}: {}".format(norm_model,len(unique_aircraft))) print("\tTrain:{}".format(unique_aircraft[0])) print("\tEval:{}".format(unique_aircraft[1:]))
def evaluate_detection_model(dataset_name, prediction_field, evaluation_key, ground_truth_field): dataset = fo.load_dataset(dataset_name) view = dataset.match_tags("multi_class_eval") # setting an empty detections field if there isn't one for sample in view: if sample[ground_truth_field] == None: sample[ground_truth_field] = fo.Detections(detections=[]) sample.save() if sample[prediction_field] == None: sample[prediction_field] = fo.Detections(detections=[]) sample.save() results = view.evaluate_detections(prediction_field, gt_field=ground_truth_field, eval_key=evaluation_key, compute_mAP=True) # Get the 10 most common classes in the dataset counts = view.count_values( "{}.detections.label".format(ground_truth_field)) classes = sorted(counts, key=counts.get, reverse=True)[:15] # Print a classification report for the top-10 classes results.print_report(classes=classes) # Print some statistics about the total TP/FP/FN counts logging.info("TP: %d" % dataset.sum(evaluation_key + "_tp")) logging.info("FP: %d" % dataset.sum(evaluation_key + "_fp")) logging.info("FN: %d" % dataset.sum(evaluation_key + "_fn")) # Create a view that has samples with the most false positives first, and # only includes false positive boxes in the `predictions` field eval_view = view.sort_by(evaluation_key + "_fp", reverse=True).filter_labels( prediction_field, F(evaluation_key) == "fp") logging.info("mAP: {}".format(results.mAP())) plot = results.plot_pr_curves(classes=classes, backend="matplotlib") plot.savefig("/tf/dataset-export/" + evaluation_key + '_pr_curves.png') plot = results.plot_confusion_matrix(classes=classes, backend="matplotlib") plot.savefig("/tf/dataset-export/" + evaluation_key + '_confusion_matrix.png')
def split_multi_class_train_eval_dataset(dataset_name): """Splits the dataset into Training and Eval samples. For aircraft models with more than one example, the aircraft bodies will be divide, 75% to Train and 25% to Eval. The samples are separated using tags. Args: dataset_name (): The name of the Voxel51 dataset to use """ dataset = fo.load_dataset(dataset_name) train_view = dataset.match_tags("multi_class_train") # Remove any existing tags from the dataset to ensure that you are starting fresh logging.info("Removing existing multi_class_train tags") for sample in train_view: try: sample.tags = list(filter(lambda x: x != "multi_class_train", sample.tags)) sample.save() except ValueError: pass logging.info("Removing existing multi_class_eval tags") eval_view = dataset.match_tags("multi_class_eval") for sample in eval_view: try: sample.tags = list(filter(lambda x: x != "multi_class_eval", sample.tags)) sample.save() except ValueError: pass # find all of the unique normalized aircraft models norm_models = dataset.distinct("norm_model.label") for norm_model in norm_models: view = dataset.filter_labels("norm_model", (F("label") == norm_model)).select_fields("icao24").shuffle() unique_aircraft = view.distinct("icao24.label") if len(unique_aircraft) > 1: train_aircraft = unique_aircraft[:math.floor(len(unique_aircraft)*.75)] eval_aircraft = unique_aircraft[math.floor(len(unique_aircraft)*.75):] print("{} Total: {} Train: {} Eval: {}".format(norm_model,len(unique_aircraft),len(train_aircraft),len(eval_aircraft))) for icao24 in train_aircraft[:1]: _tag_samples_by_icao24(dataset,icao24, "multi_class_train", False) for icao24 in train_aircraft[1:]: _tag_samples_by_icao24(dataset,icao24, "multi_class_train", True) for icao24 in eval_aircraft: _tag_samples_by_icao24(dataset,icao24, "multi_class_eval", True)
# Save predictions to dataset sample["mask_rcnn"] = fo.Detections(detections=detections) sample.save() print("Finished adding predictions") # In[ ]: session.view = predictions_view # In[ ]: # Only contains detections with confidence >= 0.15 high_conf_view = predictions_view.filter_labels("mask_rcnn", F("confidence") > 0.15) # In[ ]: # Evaluate the predictions in the `faster_rcnn` field of our `high_conf_view` # with respect to the objects in the `ground_truth` field results = high_conf_view.evaluate_detections( "mask_rcnn", gt_field="ground_truth", eval_key="eval", compute_mAP=True, ) # In[ ]: counts = dataset.count_values("ground_truth.detections.label")