def add_sample_images_to_voxel51_dataset(image_list, dataset, datasource_name=None): """Add sample images to a voxel51 dataset. # TODO: Add check to make sure you can't add the same image twice Args: image_list - list of image data dicts dataset - a voxel51 dataset object datasource_name - an optional string that allows for and identifying tag to be added to the batch of images being imported Returns: dataset (voxel51 dataset object) """ for image in image_list: # create a voxel51 row/sample based on the path to the image sample = fo.Sample(filepath=image["file_path"]) # add additional columns to the voxel51 dataset row sample["external_id"] = fo.Classification(label=image["external_id"]) sample["bearing"] = fo.Classification(label=image["bearing"]) sample["elevation"] = fo.Classification(label=image["elevation"]) sample["distance"] = fo.Classification(label=image["distance"]) sample["icao24"] = fo.Classification(label=image["icao24"]) if datasource_name is not None and len(datasource_name) > 0: sample.tags.append(datasource_name) dataset.add_sample(sample) # return modified dataset return dataset
def test_classification_fiftyone(tmpdir): tmpdir = Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "b").mkdir() _rand_image().save(tmpdir / "a_1.png") _rand_image().save(tmpdir / "b_1.png") train_images = [ str(tmpdir / "a_1.png"), str(tmpdir / "b_1.png"), ] train_dataset = fo.Dataset.from_dir(str(tmpdir), dataset_type=fo.types.ImageDirectory) s1 = train_dataset[train_images[0]] s2 = train_dataset[train_images[1]] s1["test"] = fo.Classification(label="1") s2["test"] = fo.Classification(label="2") s1.save() s2.save() data = ImageClassificationData.from_fiftyone( train_dataset=train_dataset, label_field="test", batch_size=2, num_workers=0, image_size=(64, 64), ) model = ImageClassifier(num_classes=2, backbone="resnet18") trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, datamodule=data, strategy="freeze")
def test_from_fiftyone(tmpdir): tmpdir = Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "b").mkdir() _rand_image().save(tmpdir / "a_1.png") _rand_image().save(tmpdir / "b_1.png") train_images = [ str(tmpdir / "a_1.png"), str(tmpdir / "b_1.png"), ] dataset = fo.Dataset.from_dir(str(tmpdir), dataset_type=fo.types.ImageDirectory) s1 = dataset[train_images[0]] s2 = dataset[train_images[1]] s1["test"] = fo.Classification(label="1") s2["test"] = fo.Classification(label="2") s1.save() s2.save() img_data = ImageClassificationData.from_fiftyone( train_dataset=dataset, test_dataset=dataset, val_dataset=dataset, label_field="test", batch_size=2, num_workers=0, ) assert img_data.train_dataloader() is not None assert img_data.val_dataloader() is not None assert img_data.test_dataloader() is not None # check train data data = next(iter(img_data.train_dataloader())) imgs, labels = data['input'], data['target'] assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2, ) assert sorted(list(labels.numpy())) == [0, 1] # check val data data = next(iter(img_data.val_dataloader())) imgs, labels = data['input'], data['target'] assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2, ) assert sorted(list(labels.numpy())) == [0, 1] # check test data data = next(iter(img_data.test_dataloader())) imgs, labels = data['input'], data['target'] assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2, ) assert sorted(list(labels.numpy())) == [0, 1]
def normalize_model_values(dataset_name): """Standardize plane model string values. The plane model string values received from ADS-B broadcasts are not standardized. An A319 model, for instance, could be represented as A319-112 or A319-115 or A39-132. This function helps standardize all model strings. Args: dataset - a voxel51 dataset object Returns: dataset - a voxel51 dataset object """ # TODO: Need to add testing. dataset = fo.load_dataset(dataset_name) # json file storing plane model strings as key and standardized model # as value with open("plane_model_dict.json", "r") as file_path: plane_model_dict = json.load(file_path) # Loop thru each row of model column for sample in dataset.exists("model_name"): model = sample["model_name"].label norm_model = plane_model_dict.get(model, None) #print("{} = {}".format(model, norm_model)) if norm_model is not None: sample["norm_model"] = fo.Classification(label=norm_model) sample.save() else: logging.info("Match not found for: %s", model) return dataset
def test_accuracy_resnet50(capsys): detectionDir = "/home/Develop/Dataset/Imagenet/Validation-2012/prediction" dataset = fo.load_dataset("imagenet_validation") classes = dataset.default_classes with capsys.disabled(): with fo.ProgressBar() as pb: detections = [] for sample in pb(dataset): head, tail = os.path.split(sample.filepath) filename, file_extension = os.path.splitext(tail) cvsPath = detectionDir + "/" + filename + ".txt" with open(cvsPath, "r") as file: reader = csv.reader(file) for row in reader: cls_index = row[0] sample["resnet50"] = fo.Classification( label=classes[int(cls_index)], ) sample.save() results = dataset.evaluate_classifications( "resnet50", gt_field="ground_truth", eval_key="resnet50_eval", ) print(results.metrics()) assert results.metrics()["accuracy"] > 0.74
def setUpClass(cls): urllib.request.urlretrieve(cls.image_url, cls.test_one) etau.copy_file(cls.test_one, cls.test_two) cls.dataset.add_sample(cls.sample1) cls.dataset.add_sample(cls.sample2) cls.sample1["scalar"] = 1 cls.sample1["label"] = fo.Classification(label="test") cls.sample1.tags.append("tag") cls.sample1["floats"] = [ 0.5, float("nan"), float("inf"), float("-inf"), ] cls.sample1.save()
def _make_classification_dataset(img, images_dir, num_samples=4): exts = [".jpg", ".png"] samples = [] for idx in range(num_samples): filepath = os.path.join(images_dir, "%06d%s" % (idx, exts[idx % len(exts)])) etai.write(img, filepath) label = random.choice(["sun", "rain", "snow"]) samples.append( fo.Sample(filepath=filepath, ground_truth=fo.Classification(label=label))) dataset = fo.Dataset() dataset.add_samples(samples) return dataset
def test_filter_classifications(self): self.sample1["test_clfs"] = fo.Classifications(classifications=[ fo.Classification( label="friend", confidence=0.9, ), fo.Classification( label="friend", confidence=0.3, ), fo.Classification( label="stopper", confidence=0.1, ), fo.Classification( label="big bro", confidence=0.6, ), ]) self.sample1.save() self.sample2["test_clfs"] = fo.Classifications(classifications=[ fo.Classification( label="friend", confidence=0.99, ), fo.Classification( label="tricam", confidence=0.2, ), fo.Classification( label="hex", confidence=0.8, ), ]) self.sample2.save() view = self.dataset.filter_classifications( "test_clfs", (F("confidence") > 0.5) & (F("label") == "friend")) for sv in view: for clf in sv.test_clfs.classifications: self.assertGreater(clf.confidence, 0.5) self.assertEqual(clf.label, "friend")
def add_faa_data_to_voxel51_dataset( voxel51_dataset_name, faa_master_dataset_path, faa_reference_dataset_path ): """Add FAA data to each entry in voxel51 dataset. Args: voxel51_dataset (str) - the voxel51 dataset name faa_master_dataset_path - path to FAA master dataset .txt faa_reference_dataset_path - path to FAA reference dataset .txt Returns: dataset (voxel51 dataset object) """ subprocess.run("./install_faa_data.sh", check=True) # import master dataset and strip white space from beacon column planes_master = pd.read_csv(faa_master_dataset_path, index_col="MODE S CODE HEX") planes_master.index = planes_master.index.str.strip() planes_reference = pd.read_csv( faa_reference_dataset_path, index_col="CODE", encoding="utf-8-sig" ) dataset = fo.load_dataset(voxel51_dataset_name) for row in dataset: # render plane_id in lowercase letters plane_icao24 = row["icao24"].label.upper() # find plane model code associated with the icao24 code, i.e. mode s code hex try: model_code = planes_master.loc[plane_icao24, "MFR MDL CODE"] except IndexError: logging.info( "Plane ID not found in master dataset. Plane ID: %s", plane_icao24 ) continue except KeyError: logging.info( "Plane ID not found in master dataset. Plane ID: %s", plane_icao24 ) continue # find reference row with all relevant model data plane_reference_row = planes_reference.loc[model_code] # exract all relevant data from plane_reference_row # convert all fields to string manufacturer = str(plane_reference_row["MFR"]).rstrip() model_name = str(plane_reference_row["MODEL"]).rstrip() aircraft_type = str(plane_reference_row["TYPE-ACFT"]) engine_type = str(plane_reference_row["TYPE-ENG"]) num_engines = str(plane_reference_row["NO-ENG"]) num_seats = str(plane_reference_row["NO-SEATS"]) aircraft_weight = str(plane_reference_row["AC-WEIGHT"]) # norm_model = normalize_single_model_value(model_name) # store values in voxel51 dataset row row["model_code"] = fo.Classification(label=model_code) row["manufacturer"] = fo.Classification(label=manufacturer) row["model_name"] = fo.Classification(label=model_name) row["aircraft_type"] = fo.Classification(label=aircraft_type) row["engine_type"] = fo.Classification(label=engine_type) row["num_engines"] = fo.Classification(label=num_engines) row["num_seats"] = fo.Classification(label=num_seats) row["aircraft_weight"] = fo.Classification(label=aircraft_weight) # if norm_model is not None: # sample["norm_model"] = fo.Classification(label=norm_model) row.save() return dataset
def train_with_hydra(cfg: DictConfig): # setup inference path cfg.inference.base_path = cfg.inference.model_path_to_load.split( "train/", 1)[0] + "inference/" print("INFERENCE RESULTS WILL BE SAVED {}".format(cfg.inference.base_path)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # For inferece set always batch_size = 1 cfg.inference.batch_size = 1 createFolderForExplanation(cfg) # Dataclass for custom Image transform See dataset configuration in .yaml @dataclass class ImageClassificationInputTransform(InputTransform): # transforms added to input training data def train_input_per_sample_transform(self): return instantiate(cfg.dataset.train_transform, _convert_="all") # transform label to tensor def target_per_sample_transform(self) -> Callable: return torch.as_tensor # transforms added to input validation data def val_input_per_sample_transform(self): return instantiate(cfg.dataset.val_transform, _convert_="all") def predict_input_per_sample_transform(self): return instantiate(cfg.dataset.test_transform, _convert_="all") # ---------- # INSTANTIATE DATASET FROM HYDRA CONF # ----------- # Check for empty folder for dirpath, dirnames, files in os.walk(cfg.inference.dataset_path): if (dirpath == cfg.inference.dataset_path): # Root directory as no file pass else: if files: pass else: raise Exception( "Test folder cannot be empty. Otherwise target label are not correct" ) datamodule = ImageClassificationData.from_folders( predict_folder=cfg.inference.dataset_path, predict_transform=ImageClassificationInputTransform, batch_size=cfg.inference.batch_size) # ---------- # INSTANTIATE MODEL AND TRAINER # ----------- model = instantiate(cfg.model.image_classifier) model = model.load_from_checkpoint(cfg.inference.model_path_to_load) # instantiate trainer trainer = instantiate(cfg.trainer.default) # ---------- # RUN PREDICTION # ----------- predictions = trainer.predict(model, datamodule=datamodule) # model needs to put on gpu after train.predict in order to run explanation on gpu if (torch.cuda.is_available()): modeladapter = model.to(device) modeladapter.eval() # ---------- # RUN MODEL INSPECTION # ----------- if (cfg.inference.captum.enable): print("SAVE EXPLANATION FILES ") # CSV write or append explanation_list = [] if (cfg.inference.confusion_matrix.enable): print("SAVE EXPLANATION FILES ") # CSV write or append samples = [] y_pred = [] y_true = [] if (cfg.inference.calibration.enable): preds_caliration = [] labels_oneh_calibration = [] for prediction in predictions: # value must be in float32 out32 = torch.tensor(prediction[0][DataKeys.PREDS].detach().view( 1, -1).contiguous(), dtype=torch.float32) inputImage = prediction[0][DataKeys.INPUT] if (torch.cuda.is_available()): out32 = out32.cuda() inputImage = inputImage.cuda() output = F.softmax(out32, dim=1) prediction_score, pred_label_idx = torch.topk(output, 1) pred_label_idx.squeeze_() pred_label_num = pred_label_idx.cpu().item() gt_label_num = prediction[0][DataKeys.TARGET].item() filepath = prediction[0][DataKeys.METADATA]["filepath"] filename = os.path.basename(os.path.normpath(filepath)) filename_without_ext, file_extension = os.path.splitext(filename) # EXPLANATION if (cfg.inference.captum.enable): explanation_list.append( save_explanation(inputImage, modeladapter, cfg, pred_label_idx, pred_label_num, gt_label_num, filename, filepath, filename_without_ext, prediction_score)) # CONFUSION MATRIX if (cfg.inference.confusion_matrix.enable): y_true.extend([gt_label_num]) y_pred.extend([pred_label_num]) samples.append( fo.Sample(filepath=filepath, ground_truth=fo.Classification( label=cfg.inference.class_name[gt_label_num]), prediction=fo.Classification( label=cfg.inference.class_name[pred_label_num]))) # CALIBRATION if (cfg.inference.calibration.enable): pred_calib = output.cpu().detach().numpy() preds_caliration.extend(pred_calib) # WARNING class_name must be configured label_oneh = torch.nn.functional.one_hot( torch.tensor([gt_label_num]).to(torch.long), num_classes=len(cfg.inference.class_name)) label_oneh = label_oneh.cpu().detach().numpy() labels_oneh_calibration.extend(label_oneh) # Save Explanation CSV for further analysis if (cfg.inference.captum.enable): explanation_dataframe = pd.DataFrame( explanation_list, columns=["pred", "GT", "predict_score", "image_path"]) # csv file could be imported on Ai4Prod explainability software explanation_dataframe.to_csv(cfg.inference.captum.csv_result, index=False) # Save confusion Matrix and show other stat if (cfg.inference.confusion_matrix.enable): dataset = fo.Dataset("custom_evaluation") dataset.add_samples(samples) results = dataset.evaluate_classifications( "prediction", gt_field="ground_truth", eval_key="custom_eval", ) plot = results.plot_confusion_matrix(classes=cfg.inference.class_name, backend="matplotlib", figsize=(6, 6)) plot.savefig( cfg.inference.confusion_matrix.path_to_confusion_matrix_image) dict_report = results.report() df_metric = pd.DataFrame(dict_report).transpose() df_metric.to_csv(cfg.inference.confusion_matrix.path_to_metrics_csv) # save cf matrix as csv. You can use this in C++ cf_matrix = confusion_matrix(y_true, y_pred, normalize="true") df_cm = pd.DataFrame(cf_matrix, index=[i for i in cfg.inference.class_name], columns=[i for i in cfg.inference.class_name]) df_cm.to_csv( cfg.inference.confusion_matrix.path_to_confusion_matrix_csv, index=False, header=False) if (cfg.inference.calibration.enable): preds_caliration = np.array(preds_caliration).flatten() labels_oneh_calibration = np.array(labels_oneh_calibration).flatten() draw_reliability_graph( preds_caliration, cfg.inference.calibration.path_to_creliability_diagram, labels_oneh_calibration)