def add_sample_images_to_voxel51_dataset(image_list, dataset, datasource_name=None): """Add sample images to a voxel51 dataset. # TODO: Add check to make sure you can't add the same image twice Args: image_list - list of image data dicts dataset - a voxel51 dataset object datasource_name - an optional string that allows for and identifying tag to be added to the batch of images being imported Returns: dataset (voxel51 dataset object) """ for image in image_list: # create a voxel51 row/sample based on the path to the image sample = fo.Sample(filepath=image["file_path"]) # add additional columns to the voxel51 dataset row sample["external_id"] = fo.Classification(label=image["external_id"]) sample["bearing"] = fo.Classification(label=image["bearing"]) sample["elevation"] = fo.Classification(label=image["elevation"]) sample["distance"] = fo.Classification(label=image["distance"]) sample["icao24"] = fo.Classification(label=image["icao24"]) if datasource_name is not None and len(datasource_name) > 0: sample.tags.append(datasource_name) dataset.add_sample(sample) # return modified dataset return dataset
def _make_detection_dataset( img, images_dir, num_samples=4, num_objects_per_sample=3 ): exts = [".jpg", ".png"] samples = [] for idx in range(num_samples): filepath = os.path.join( images_dir, "%06d%s" % (idx, exts[idx % len(exts)]) ) etai.write(img, filepath) detections = [] for _ in range(num_objects_per_sample): label = random.choice(["cat", "dog", "bird", "rabbit"]) bounding_box = [ 0.8 * random.random(), 0.8 * random.random(), 0.2, 0.2, ] detections.append( fo.Detection(label=label, bounding_box=bounding_box) ) samples.append( fo.Sample( filepath=filepath, ground_truth=fo.Detections(detections=detections), ) ) dataset = fo.Dataset() dataset.add_samples(samples) return dataset
def test_create_detection_mapping(): """Test create_detection_mapping().""" # credit to voxel51 crew for a helpful test suite from which this test borrows # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59 dataset = fo.Dataset(name="test_detection_mapping") dataset.add_sample( fo.Sample( filepath="filepath1.jpg", tags=["train"], test_dets=fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), ]), another_field=51, )) test_output = create_detection_mapping("test_detection_mapping", label_field="test_dets", training_tag="train") assert isinstance(test_output, str) assert ( test_output == 'item {\n name: "friend"\n id: 1\n}\nitem {\n name: "stopper"\n id: 2\n}\n' )
def test__create_list_of_class_names(): """Test _create_list_of_class_names.""" # credit to voxel51 crew for a helpful test suite from which this test borrows # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59 dataset = fo.Dataset() dataset.add_sample( fo.Sample( filepath="filepath1.jpg", tags=["test"], test_dets=fo.Detections(detections=[ fo.Detection( label="friend", confidence=0.9, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="stopper", confidence=0.1, bounding_box=[0, 0, 0.5, 0.5], ), fo.Detection( label="big bro", confidence=0.6, bounding_box=[0, 0, 0.1, 0.5], ), ]), another_field=51, )) test_list = _create_list_of_class_names(dataset, label_field="test_dets") assert set(test_list) == set(["friend", "stopper", "big bro"])
def _make_image_labels_dataset(img, images_dir, num_samples=4, num_objects_per_sample=3): exts = [".jpg", ".png"] samples = [] for idx in range(num_samples): filepath = os.path.join(images_dir, "%06d%s" % (idx, exts[idx % len(exts)])) etai.write(img, filepath) image_labels = etai.ImageLabels() _label = random.choice(["sun", "rain", "snow"]) image_labels.add_attribute(etad.CategoricalAttribute("label", _label)) for _ in range(num_objects_per_sample): _label = random.choice(["cat", "dog", "bird", "rabbit"]) _xtl = 0.8 * random.random() _ytl = 0.8 * random.random() _bounding_box = etag.BoundingBox.from_coords( _xtl, _ytl, _xtl + 0.2, _ytl + 0.2) image_labels.add_object( etao.DetectedObject(label=_label, bounding_box=_bounding_box)) samples.append( fo.Sample( filepath=filepath, ground_truth=fo.ImageLabels(labels=image_labels), )) dataset = fo.Dataset() dataset.add_samples(samples) return dataset
def _make_labeled_dataset_with_no_labels(img, images_dir): filepath = os.path.join(images_dir, "test.png") etai.write(img, filepath) dataset = fo.Dataset() dataset.add_sample(fo.Sample(filepath=filepath)) dataset.add_sample_field("ground_truth", fo.EmbeddedDocumentField, embedded_doc_type=fo.Label) dataset.info = { # FiftyOneImageClassificationDataset # FiftyOneImageDetectionDataset "classes": ["cat", "dog"], # COCODetectionDataset "year": "5151", "version": "5151", "description": "Brian's Dataset", "contributor": "Brian Moore", "url": "https://github.com/brimoor", "date_created": "5151-51-51T51:51:51", "licenses": ["license1", "license2"], # CVATImageDataset "task_labels": [ { "name": "cat", "attributes": [{ "name": "fluffy", "categories": ["yes", "no"] }], }, { "name": "dog", "attributes": [{ "name": "awesome", "categories": ["yes", "of course"] }], }, ], } dataset.save() return dataset
def create_fo_sample(image: Image, labels: str, boxes): """ Args ----------- image: PIL image labels: label name boxes: xyxy format """ assert len(labels) == len(boxes) detections = [] for i in range(len(labels)): detections.append(fo.Detection(label=labels[i], bounding_box=boxes[i])) sample = fo.Sample(filepath=image.filename) sample["ground_truth"] = fo.Detections(detections=detections) return sample
def _make_classification_dataset(img, images_dir, num_samples=4): exts = [".jpg", ".png"] samples = [] for idx in range(num_samples): filepath = os.path.join(images_dir, "%06d%s" % (idx, exts[idx % len(exts)])) etai.write(img, filepath) label = random.choice(["sun", "rain", "snow"]) samples.append( fo.Sample(filepath=filepath, ground_truth=fo.Classification(label=label))) dataset = fo.Dataset() dataset.add_samples(samples) return dataset
def setUp(self): self.dataset = fo.Dataset() self.sample1 = fo.Sample(filepath="test_one.png") self.sample2 = fo.Sample(filepath="test_two.png") self.dataset.add_sample(self.sample1) self.dataset.add_sample(self.sample2)
def dataset_view_test(): dataset = foz.load_zoo_dataset("quickstart") view = dataset.view() print(view) print("Media type = {}.".format(view.media_type)) print("#examples = {}.".format(len(view))) #print("#examples = {}.".format(view.count())) #-------------------- #for sample in view: # print(sample) #-------------------- sample = view.take(1).first() print(type(sample)) # fiftyone.core.sample.SampleView. same_sample = view[sample.id] also_same_sample = view[sample.filepath] #view[other_sample_id] # KeyError: sample non-existent or not in view. # List available view operations on a dataset. print(dataset.list_view_stages()) #-------------------- # View stages. # Random set of 100 samples from the dataset random_view = dataset.take(100) print("#examples = {}.".format(len(random_view))) # Sort 'random_view' by filepath. sorted_random_view = random_view.sort_by("filepath") #-------------------- # Slicing. # Skip the first 2 samples and take the next 3. range_view1 = dataset.skip(2).limit(3) # Equivalently, using array slicing. range_view2 = dataset[2:5] view = dataset[10:100] sample10 = view.first() sample100 = view.last() also_sample10 = view[sample10.id] assert also_sample10.filepath == sample10.filepath also_sample100 = view[sample100.filepath] assert sample100.id == also_sample100.id assert sample10 is not also_sample10 # A boolean array encoding the samples to extract. bool_array = np.array(dataset.values("uniqueness")) > 0.7 view = dataset[bool_array] print("#examples = {}.".format(len(view))) ids = itertools.compress(dataset.values("id"), bool_array) view = dataset.select(ids) print("#examples = {}.".format(len(view))) # ViewExpression defining the samples to match. expr = fo.ViewField("uniqueness") > 0.7 # Use a match() expression to define the view. view = dataset.match(expr) print("#examples = {}.".format(len(view))) # Equivalent: using boolean expression indexing is allowed too. view = dataset[expr] print("#examples = {}.".format(len(view))) #-------------------- # Sorting. view = dataset.sort_by("filepath") view = dataset.sort_by("filepath", reverse=True) # Sort by number of detections in 'Detections' field 'ground_truth'. view = dataset.sort_by(fo.ViewField("ground_truth.detections").length(), reverse=True) print(len(view.first().ground_truth.detections)) print(len(view.last().ground_truth.detections)) #-------------------- # Shuffling. # Randomly shuffle the order of the samples in the dataset. view1 = dataset.shuffle() # Randomly shuffle the samples in the dataset with a fixed seed. view2 = dataset.shuffle(seed=51) print(view2.first().id) also_view2 = dataset.shuffle(seed=51) print(also_view2.first().id) #-------------------- # Random sampling. # Take 5 random samples from the dataset. view1 = dataset.take(5) # Take 5 random samples from the dataset with a fixed seed. view2 = dataset.take(5, seed=51) print(view2.first().id) also_view2 = dataset.take(5, seed=51) print(also_view2.first().id) #-------------------- # Filtering. # Populate metadata on all samples. dataset.compute_metadata() # Samples whose image is less than 48 KB. small_images_view = dataset.match(fo.ViewField("metadata.size_bytes") < 48 * 1024) # Samples that contain at least one prediction with confidence above 0.99 or whose label ifs "cat" or "dog". match = (fo.ViewField("confidence") > 0.99) | (fo.ViewField("label").is_in(("cat", "dog"))) matching_view = dataset.match(fo.ViewField("predictions.detections").filter(match).length() > 0) # The validation split of the dataset. val_view = dataset.match_tags("validation") # Union of the validation and test splits. val_test_view = dataset.match_tags(("validation", "test")) # The subset of samples where predictions have been computed. predictions_view = dataset.exists("predictions") # Get the IDs of two random samples. sample_ids = [ dataset.take(1).first().id, dataset.take(1).first().id, ] # Include only samples with the given IDs in the view. selected_view = dataset.select(sample_ids) # Exclude samples with the given IDs from the view. excluded_view = dataset.exclude(sample_ids) for sample in dataset.select_fields("ground_truth"): print(sample.id) # OKAY: 'id' is always available print(sample.ground_truth) # OKAY: 'ground_truth' was selected #print(sample.predictions) # AttributeError: 'predictions' was not selected for sample in dataset.exclude_fields("predictions"): print(sample.id) # OKAY: 'id' is always available print(sample.ground_truth) # OKAY: 'ground_truth' was not excluded #print(sample.predictions) # AttributeError: 'predictions' was excluded #-------------------- # Date-based views. dataset = fo.Dataset() dataset.add_samples( [ fo.Sample( filepath="image1.png", capture_date=datetime(2021, 8, 24, 1, 0, 0), ), fo.Sample( filepath="image2.png", capture_date=datetime(2021, 8, 24, 2, 0, 0), ), fo.Sample( filepath="image3.png", capture_date=datetime(2021, 8, 24, 3, 0, 0), ), ] ) query_date = datetime(2021, 8, 24, 2, 1, 0) query_delta = timedelta(minutes=30) # Samples with capture date after 2021-08-24 02:01:00. view = dataset.match(fo.ViewField("capture_date") > query_date) print(view) # Samples with capture date within 30 minutes of 2021-08-24 02:01:00. view = dataset.match(abs(fo.ViewField("capture_date") - query_date) < query_delta) print(view)
class ServerServiceTests(unittest.TestCase): """Tests for ServerService""" image_url = "https://user-images.githubusercontent.com/3719547/74191434-8fe4f500-4c21-11ea-8d73-555edfce0854.png" test_one = os.path.abspath("./test_one.png") test_two = os.path.abspath("./test_two.png") dataset = fo.Dataset("test") sample1 = fo.Sample(filepath=test_one) sample2 = fo.Sample(filepath=test_two) session = Session(remote=True) sio_client = socketio.Client() sio_client.eio.start_background_task = foc._start_background_task client = AppClient() sio_client.register_namespace(client) foc._connect(sio_client, SERVER_ADDR % 5151) _tmp = None @classmethod def setUpClass(cls): urllib.request.urlretrieve(cls.image_url, cls.test_one) etau.copy_file(cls.test_one, cls.test_two) cls.dataset.add_sample(cls.sample1) cls.dataset.add_sample(cls.sample2) cls.sample1["scalar"] = 1 cls.sample1["label"] = fo.Classification(label="test") cls.sample1.tags.append("tag") cls.sample1["floats"] = [ 0.5, float("nan"), float("inf"), float("-inf"), ] cls.sample1.save() @classmethod def tearDownClass(cls): etau.delete_file(cls.test_one) etau.delete_file(cls.test_two) def step_connect(self): self.assertIs(self.session._hc_client.connected, True) self.assertIs(self.client.connected, True) def step_update(self): self.session.dataset = self.dataset self.wait_for_response() session = _serialize(self.session.state) client = self.client.data.serialize() self.assertEqual(_normalize_session(session), _normalize_session(client)) def step_get_current_state(self): self.maxDiff = None self.session.view = self.dataset.limit(1) self.wait_for_response() session = _serialize(self.session.state) self.client.emit("get_current_state", "", callback=self.client_callback) client = self.wait_for_response() self.assertEqual(_normalize_session(session), _normalize_session(client)) self.assertEqual( sorted(client["tags"]), sorted(self.dataset.get_tags()), ) self.assertEqual(client["view_count"], len(self.session.view)) self.assertNotEqual(client["view_count"], len(self.dataset)) def step_selection(self): self.client.emit("add_selection", self.sample1.id) self.wait_for_response(session=True) self.assertIs(len(self.session.selected), 1) self.assertEqual(self.session.selected[0], self.sample1.id) self.client.emit("remove_selection", self.sample1.id) self.wait_for_response(session=True) self.assertIs(len(self.session.selected), 0) def step_page(self): self.session.dataset = self.dataset self.wait_for_response() self.client.emit("page", 1, callback=self.client_callback) client = self.wait_for_response() results = client["results"] self.assertIs(len(results), 2) def step_get_distributions(self): self.session.dataset = self.dataset self.wait_for_response() self.client.emit("get_distributions", "tags", callback=self.client_callback) client = self.wait_for_response() self.assertIs(len(client), 1) self.assertEqual(client[0]["data"], [{"key": "tag", "count": 1}]) self.client.emit("get_distributions", "labels", callback=self.client_callback) client = self.wait_for_response() self.assertIs(len(client), 1) self.assertEqual(client[0]["data"], [{"key": "test", "count": 1}]) self.client.emit("get_distributions", "scalars", callback=self.client_callback) client = self.wait_for_response() self.assertIs(len(client), 1) self.assertEqual(client[0]["data"], [{"key": "null", "count": 2}]) def step_sessions(self): other_session = Session(remote=True) other_session.dataset = self.dataset self.wait_for_response(session=True) self.assertEqual(str(self.session.dataset), str(other_session.dataset)) other_session.view = self.dataset.limit(1) self.wait_for_response(session=True) self.assertEqual(str(self.session.view), str(other_session.view)) def step_server_services(self): port = 5252 session_one = Session(port=port, remote=True) session_two = Session(port=port, remote=True) self.assertEqual(len(_subscribed_sessions[port]), 2) self.assertEqual(len(_subscribed_sessions), 2) self.assertEqual(len(_server_services), 2) session_two.__del__() self.assertEqual(len(_subscribed_sessions[port]), 1) self.assertEqual(len(_subscribed_sessions), 2) self.assertEqual(len(_server_services), 2) session_one.__del__() self.assertEqual(len(_subscribed_sessions[port]), 0) self.assertEqual(len(_server_services), 1) def step_empty_derivables(self): self.session.dataset = fo.Dataset() response = self.wait_for_response() self.assertEqual(_serialize(self.session.state), _serialize(self.client.data)) def step_json_encoder(self): enc = FiftyOneJSONEncoder oid = "aaaaaaaaaaaaaaaaaaaaaaaa" self.assertEqual(enc.dumps(ObjectId(oid)), '{"$oid": "%s"}' % oid) self.assertEqual(enc.dumps(float("nan")), '"NaN"') self.assertEqual(enc.dumps(float("inf")), '"Infinity"') def test_steps(self): for name, step in self.steps(): try: step() self.session.dataset = None self.wait_for_response() except Exception as e: self.fail("{} failed ({}: {})".format(step, type(e), e)) def wait_for_response(self, timeout=3, session=False): start_time = time.time() while time.time() < start_time + timeout: if session: if self.session._hc_client.updated: self.session._hc_client.updated = False return elif self.client.response: response = self.client.response self.client.response = None return response time.sleep(0.2) raise RuntimeError("No response after %f" % timeout) def client_callback(self, data): self.client.response = data def steps(self): for name in dir(self): if name.startswith("step_"): yield name, getattr(self, name)
def train_with_hydra(cfg: DictConfig): # setup inference path cfg.inference.base_path = cfg.inference.model_path_to_load.split( "train/", 1)[0] + "inference/" print("INFERENCE RESULTS WILL BE SAVED {}".format(cfg.inference.base_path)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # For inferece set always batch_size = 1 cfg.inference.batch_size = 1 createFolderForExplanation(cfg) # Dataclass for custom Image transform See dataset configuration in .yaml @dataclass class ImageClassificationInputTransform(InputTransform): # transforms added to input training data def train_input_per_sample_transform(self): return instantiate(cfg.dataset.train_transform, _convert_="all") # transform label to tensor def target_per_sample_transform(self) -> Callable: return torch.as_tensor # transforms added to input validation data def val_input_per_sample_transform(self): return instantiate(cfg.dataset.val_transform, _convert_="all") def predict_input_per_sample_transform(self): return instantiate(cfg.dataset.test_transform, _convert_="all") # ---------- # INSTANTIATE DATASET FROM HYDRA CONF # ----------- # Check for empty folder for dirpath, dirnames, files in os.walk(cfg.inference.dataset_path): if (dirpath == cfg.inference.dataset_path): # Root directory as no file pass else: if files: pass else: raise Exception( "Test folder cannot be empty. Otherwise target label are not correct" ) datamodule = ImageClassificationData.from_folders( predict_folder=cfg.inference.dataset_path, predict_transform=ImageClassificationInputTransform, batch_size=cfg.inference.batch_size) # ---------- # INSTANTIATE MODEL AND TRAINER # ----------- model = instantiate(cfg.model.image_classifier) model = model.load_from_checkpoint(cfg.inference.model_path_to_load) # instantiate trainer trainer = instantiate(cfg.trainer.default) # ---------- # RUN PREDICTION # ----------- predictions = trainer.predict(model, datamodule=datamodule) # model needs to put on gpu after train.predict in order to run explanation on gpu if (torch.cuda.is_available()): modeladapter = model.to(device) modeladapter.eval() # ---------- # RUN MODEL INSPECTION # ----------- if (cfg.inference.captum.enable): print("SAVE EXPLANATION FILES ") # CSV write or append explanation_list = [] if (cfg.inference.confusion_matrix.enable): print("SAVE EXPLANATION FILES ") # CSV write or append samples = [] y_pred = [] y_true = [] if (cfg.inference.calibration.enable): preds_caliration = [] labels_oneh_calibration = [] for prediction in predictions: # value must be in float32 out32 = torch.tensor(prediction[0][DataKeys.PREDS].detach().view( 1, -1).contiguous(), dtype=torch.float32) inputImage = prediction[0][DataKeys.INPUT] if (torch.cuda.is_available()): out32 = out32.cuda() inputImage = inputImage.cuda() output = F.softmax(out32, dim=1) prediction_score, pred_label_idx = torch.topk(output, 1) pred_label_idx.squeeze_() pred_label_num = pred_label_idx.cpu().item() gt_label_num = prediction[0][DataKeys.TARGET].item() filepath = prediction[0][DataKeys.METADATA]["filepath"] filename = os.path.basename(os.path.normpath(filepath)) filename_without_ext, file_extension = os.path.splitext(filename) # EXPLANATION if (cfg.inference.captum.enable): explanation_list.append( save_explanation(inputImage, modeladapter, cfg, pred_label_idx, pred_label_num, gt_label_num, filename, filepath, filename_without_ext, prediction_score)) # CONFUSION MATRIX if (cfg.inference.confusion_matrix.enable): y_true.extend([gt_label_num]) y_pred.extend([pred_label_num]) samples.append( fo.Sample(filepath=filepath, ground_truth=fo.Classification( label=cfg.inference.class_name[gt_label_num]), prediction=fo.Classification( label=cfg.inference.class_name[pred_label_num]))) # CALIBRATION if (cfg.inference.calibration.enable): pred_calib = output.cpu().detach().numpy() preds_caliration.extend(pred_calib) # WARNING class_name must be configured label_oneh = torch.nn.functional.one_hot( torch.tensor([gt_label_num]).to(torch.long), num_classes=len(cfg.inference.class_name)) label_oneh = label_oneh.cpu().detach().numpy() labels_oneh_calibration.extend(label_oneh) # Save Explanation CSV for further analysis if (cfg.inference.captum.enable): explanation_dataframe = pd.DataFrame( explanation_list, columns=["pred", "GT", "predict_score", "image_path"]) # csv file could be imported on Ai4Prod explainability software explanation_dataframe.to_csv(cfg.inference.captum.csv_result, index=False) # Save confusion Matrix and show other stat if (cfg.inference.confusion_matrix.enable): dataset = fo.Dataset("custom_evaluation") dataset.add_samples(samples) results = dataset.evaluate_classifications( "prediction", gt_field="ground_truth", eval_key="custom_eval", ) plot = results.plot_confusion_matrix(classes=cfg.inference.class_name, backend="matplotlib", figsize=(6, 6)) plot.savefig( cfg.inference.confusion_matrix.path_to_confusion_matrix_image) dict_report = results.report() df_metric = pd.DataFrame(dict_report).transpose() df_metric.to_csv(cfg.inference.confusion_matrix.path_to_metrics_csv) # save cf matrix as csv. You can use this in C++ cf_matrix = confusion_matrix(y_true, y_pred, normalize="true") df_cm = pd.DataFrame(cf_matrix, index=[i for i in cfg.inference.class_name], columns=[i for i in cfg.inference.class_name]) df_cm.to_csv( cfg.inference.confusion_matrix.path_to_confusion_matrix_csv, index=False, header=False) if (cfg.inference.calibration.enable): preds_caliration = np.array(preds_caliration).flatten() labels_oneh_calibration = np.array(labels_oneh_calibration).flatten() draw_reliability_graph( preds_caliration, cfg.inference.calibration.path_to_creliability_diagram, labels_oneh_calibration)
DatasetPath= "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Mask/" maskList= os.listdir(DatasetPath) samples=[] dataset = fo.Dataset("duts_te_validation") dataset.persistent=True for maskName in maskList: maskPath= DatasetPath + maskName # load mask with opencv mask = cv2.imread(maskPath, cv2.IMREAD_UNCHANGED) ret,mask= cv2.threshold(mask, 127,255,cv2.THRESH_BINARY) sample = fo.Sample(filepath=maskPath, ground_truth=fo.Segmentation(mask=mask)) samples.append(sample) dataset.add_samples(samples) # ADD PREDICTION #To add prediction you need # 1) Cycle over all dataset sample GT # 2) For each sample add a custom field u2squared dataset = fo.load_dataset("duts_te_validation") DatasetPathPrediction= "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Image-Pred-Mask/"