コード例 #1
0
ファイル: customvox51.py プロジェクト: mchadwick-iqt/SkyScan
def add_sample_images_to_voxel51_dataset(image_list, dataset, datasource_name=None):
    """Add sample images to a voxel51 dataset.

    # TODO: Add check to make sure you can't add the same image twice

    Args:
        image_list - list of image data dicts
        dataset - a voxel51 dataset object
        datasource_name - an optional string that allows for and identifying
                tag to be added to the batch of images being imported
    Returns:
        dataset (voxel51 dataset object)
    """
    for image in image_list:
        # create a voxel51 row/sample based on the path to the image
        sample = fo.Sample(filepath=image["file_path"])
        # add additional columns to the voxel51 dataset row
        sample["external_id"] = fo.Classification(label=image["external_id"])
        sample["bearing"] = fo.Classification(label=image["bearing"])
        sample["elevation"] = fo.Classification(label=image["elevation"])
        sample["distance"] = fo.Classification(label=image["distance"])
        sample["icao24"] = fo.Classification(label=image["icao24"])
        if datasource_name is not None and len(datasource_name) > 0:
            sample.tags.append(datasource_name)
        dataset.add_sample(sample)

    # return modified dataset
    return dataset
コード例 #2
0
def test_classification_fiftyone(tmpdir):
    tmpdir = Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "b").mkdir()
    _rand_image().save(tmpdir / "a_1.png")
    _rand_image().save(tmpdir / "b_1.png")

    train_images = [
        str(tmpdir / "a_1.png"),
        str(tmpdir / "b_1.png"),
    ]

    train_dataset = fo.Dataset.from_dir(str(tmpdir),
                                        dataset_type=fo.types.ImageDirectory)
    s1 = train_dataset[train_images[0]]
    s2 = train_dataset[train_images[1]]
    s1["test"] = fo.Classification(label="1")
    s2["test"] = fo.Classification(label="2")
    s1.save()
    s2.save()

    data = ImageClassificationData.from_fiftyone(
        train_dataset=train_dataset,
        label_field="test",
        batch_size=2,
        num_workers=0,
        image_size=(64, 64),
    )

    model = ImageClassifier(num_classes=2, backbone="resnet18")
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, datamodule=data, strategy="freeze")
コード例 #3
0
def test_from_fiftyone(tmpdir):
    tmpdir = Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "b").mkdir()
    _rand_image().save(tmpdir / "a_1.png")
    _rand_image().save(tmpdir / "b_1.png")

    train_images = [
        str(tmpdir / "a_1.png"),
        str(tmpdir / "b_1.png"),
    ]

    dataset = fo.Dataset.from_dir(str(tmpdir),
                                  dataset_type=fo.types.ImageDirectory)
    s1 = dataset[train_images[0]]
    s2 = dataset[train_images[1]]
    s1["test"] = fo.Classification(label="1")
    s2["test"] = fo.Classification(label="2")
    s1.save()
    s2.save()

    img_data = ImageClassificationData.from_fiftyone(
        train_dataset=dataset,
        test_dataset=dataset,
        val_dataset=dataset,
        label_field="test",
        batch_size=2,
        num_workers=0,
    )
    assert img_data.train_dataloader() is not None
    assert img_data.val_dataloader() is not None
    assert img_data.test_dataloader() is not None

    # check train data
    data = next(iter(img_data.train_dataloader()))
    imgs, labels = data['input'], data['target']
    assert imgs.shape == (2, 3, 196, 196)
    assert labels.shape == (2, )
    assert sorted(list(labels.numpy())) == [0, 1]

    # check val data
    data = next(iter(img_data.val_dataloader()))
    imgs, labels = data['input'], data['target']
    assert imgs.shape == (2, 3, 196, 196)
    assert labels.shape == (2, )
    assert sorted(list(labels.numpy())) == [0, 1]

    # check test data
    data = next(iter(img_data.test_dataloader()))
    imgs, labels = data['input'], data['target']
    assert imgs.shape == (2, 3, 196, 196)
    assert labels.shape == (2, )
    assert sorted(list(labels.numpy())) == [0, 1]
コード例 #4
0
ファイル: customvox51.py プロジェクト: mchadwick-iqt/SkyScan
def normalize_model_values(dataset_name):
    """Standardize plane model string values.

    The plane model string values received from ADS-B broadcasts
    are not standardized. An A319 model, for instance, could be
    represented as A319-112 or A319-115 or A39-132. This function
    helps standardize all model strings.

    Args:
        dataset - a voxel51 dataset object

    Returns:
        dataset - a voxel51 dataset object
    """
    # TODO: Need to add testing.

    dataset = fo.load_dataset(dataset_name)

    # json file storing plane model strings as key and standardized model
    # as value
    with open("plane_model_dict.json", "r") as file_path:
        plane_model_dict = json.load(file_path)

    # Loop thru each row of model column
    for sample in dataset.exists("model_name"):
        model = sample["model_name"].label
        norm_model = plane_model_dict.get(model, None)
        #print("{} = {}".format(model, norm_model))
        if norm_model is not None:
            sample["norm_model"] = fo.Classification(label=norm_model)
            sample.save()
        else:
            logging.info("Match not found for: %s", model)

    return dataset
コード例 #5
0
def test_accuracy_resnet50(capsys):
    detectionDir = "/home/Develop/Dataset/Imagenet/Validation-2012/prediction"

    dataset = fo.load_dataset("imagenet_validation")
    classes = dataset.default_classes

    with capsys.disabled():
        with fo.ProgressBar() as pb:

            detections = []
            for sample in pb(dataset):

                head, tail = os.path.split(sample.filepath)
                filename, file_extension = os.path.splitext(tail)
                cvsPath = detectionDir + "/" + filename + ".txt"

                with open(cvsPath, "r") as file:

                    reader = csv.reader(file)
                    for row in reader:

                        cls_index = row[0]

                        sample["resnet50"] = fo.Classification(
                            label=classes[int(cls_index)], )
                        sample.save()

        results = dataset.evaluate_classifications(
            "resnet50",
            gt_field="ground_truth",
            eval_key="resnet50_eval",
        )
        print(results.metrics())
        assert results.metrics()["accuracy"] > 0.74
コード例 #6
0
 def setUpClass(cls):
     urllib.request.urlretrieve(cls.image_url, cls.test_one)
     etau.copy_file(cls.test_one, cls.test_two)
     cls.dataset.add_sample(cls.sample1)
     cls.dataset.add_sample(cls.sample2)
     cls.sample1["scalar"] = 1
     cls.sample1["label"] = fo.Classification(label="test")
     cls.sample1.tags.append("tag")
     cls.sample1["floats"] = [
         0.5,
         float("nan"),
         float("inf"),
         float("-inf"),
     ]
     cls.sample1.save()
コード例 #7
0
ファイル: dataset_tests.py プロジェクト: xibeiwind/fiftyone
def _make_classification_dataset(img, images_dir, num_samples=4):
    exts = [".jpg", ".png"]

    samples = []
    for idx in range(num_samples):
        filepath = os.path.join(images_dir,
                                "%06d%s" % (idx, exts[idx % len(exts)]))
        etai.write(img, filepath)

        label = random.choice(["sun", "rain", "snow"])
        samples.append(
            fo.Sample(filepath=filepath,
                      ground_truth=fo.Classification(label=label)))

    dataset = fo.Dataset()
    dataset.add_samples(samples)
    return dataset
コード例 #8
0
ファイル: stage_tests.py プロジェクト: zfyong/fiftyone
    def test_filter_classifications(self):
        self.sample1["test_clfs"] = fo.Classifications(classifications=[
            fo.Classification(
                label="friend",
                confidence=0.9,
            ),
            fo.Classification(
                label="friend",
                confidence=0.3,
            ),
            fo.Classification(
                label="stopper",
                confidence=0.1,
            ),
            fo.Classification(
                label="big bro",
                confidence=0.6,
            ),
        ])
        self.sample1.save()
        self.sample2["test_clfs"] = fo.Classifications(classifications=[
            fo.Classification(
                label="friend",
                confidence=0.99,
            ),
            fo.Classification(
                label="tricam",
                confidence=0.2,
            ),
            fo.Classification(
                label="hex",
                confidence=0.8,
            ),
        ])
        self.sample2.save()

        view = self.dataset.filter_classifications(
            "test_clfs", (F("confidence") > 0.5) & (F("label") == "friend"))

        for sv in view:
            for clf in sv.test_clfs.classifications:
                self.assertGreater(clf.confidence, 0.5)
                self.assertEqual(clf.label, "friend")
コード例 #9
0
ファイル: customvox51.py プロジェクト: mchadwick-iqt/SkyScan
def add_faa_data_to_voxel51_dataset(
    voxel51_dataset_name, faa_master_dataset_path, faa_reference_dataset_path
):
    """Add FAA data to each entry in voxel51 dataset.

    Args:
        voxel51_dataset (str) - the voxel51 dataset name
        faa_master_dataset_path - path to FAA master dataset .txt
        faa_reference_dataset_path - path to FAA reference dataset .txt

    Returns:
        dataset (voxel51 dataset object)
    """
    subprocess.run("./install_faa_data.sh", check=True)

    # import master dataset and strip white space from beacon column
    planes_master = pd.read_csv(faa_master_dataset_path, index_col="MODE S CODE HEX")
    planes_master.index = planes_master.index.str.strip()

    planes_reference = pd.read_csv(
        faa_reference_dataset_path, index_col="CODE", encoding="utf-8-sig"
    )

    dataset = fo.load_dataset(voxel51_dataset_name)

    for row in dataset:
        # render plane_id in lowercase letters
        plane_icao24 = row["icao24"].label.upper()
        # find plane model code associated with the icao24 code, i.e. mode s code hex
        try:
            model_code = planes_master.loc[plane_icao24, "MFR MDL CODE"]
        except IndexError:
            logging.info(
                "Plane ID not found in master dataset. Plane ID: %s", plane_icao24
            )
            continue
        except KeyError:
            logging.info(
                "Plane ID not found in master dataset. Plane ID: %s", plane_icao24
            )
            continue
        # find reference row with all relevant model data
        plane_reference_row = planes_reference.loc[model_code]
        # exract all relevant data from plane_reference_row
        # convert all fields to string
        manufacturer = str(plane_reference_row["MFR"]).rstrip()
        model_name = str(plane_reference_row["MODEL"]).rstrip()
        aircraft_type = str(plane_reference_row["TYPE-ACFT"])
        engine_type = str(plane_reference_row["TYPE-ENG"])
        num_engines = str(plane_reference_row["NO-ENG"])
        num_seats = str(plane_reference_row["NO-SEATS"])
        aircraft_weight = str(plane_reference_row["AC-WEIGHT"])
        # norm_model = normalize_single_model_value(model_name)

        # store values in voxel51 dataset row
        row["model_code"] = fo.Classification(label=model_code)
        row["manufacturer"] = fo.Classification(label=manufacturer)
        row["model_name"] = fo.Classification(label=model_name)
        row["aircraft_type"] = fo.Classification(label=aircraft_type)
        row["engine_type"] = fo.Classification(label=engine_type)
        row["num_engines"] = fo.Classification(label=num_engines)
        row["num_seats"] = fo.Classification(label=num_seats)
        row["aircraft_weight"] = fo.Classification(label=aircraft_weight)

        # if norm_model is not None:
        #    sample["norm_model"] = fo.Classification(label=norm_model)
        row.save()

    return dataset
コード例 #10
0
ファイル: inference.py プロジェクト: ai4prod/ai4prod_python
def train_with_hydra(cfg: DictConfig):

    # setup inference path

    cfg.inference.base_path = cfg.inference.model_path_to_load.split(
        "train/", 1)[0] + "inference/"
    print("INFERENCE RESULTS WILL BE SAVED {}".format(cfg.inference.base_path))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # For inferece set always batch_size = 1
    cfg.inference.batch_size = 1

    createFolderForExplanation(cfg)

    # Dataclass for custom Image transform See dataset configuration in .yaml
    @dataclass
    class ImageClassificationInputTransform(InputTransform):

        # transforms added to input training data
        def train_input_per_sample_transform(self):
            return instantiate(cfg.dataset.train_transform, _convert_="all")

        # transform label to tensor

        def target_per_sample_transform(self) -> Callable:
            return torch.as_tensor

        # transforms added to input validation data
        def val_input_per_sample_transform(self):
            return instantiate(cfg.dataset.val_transform, _convert_="all")

        def predict_input_per_sample_transform(self):
            return instantiate(cfg.dataset.test_transform, _convert_="all")

    # ----------
    # INSTANTIATE DATASET FROM HYDRA CONF
    # -----------

    # Check for empty folder
    for dirpath, dirnames, files in os.walk(cfg.inference.dataset_path):

        if (dirpath == cfg.inference.dataset_path):
            # Root directory as no file
            pass

        else:

            if files:
                pass
            else:
                raise Exception(
                    "Test folder cannot be empty. Otherwise target label are not correct"
                )

    datamodule = ImageClassificationData.from_folders(
        predict_folder=cfg.inference.dataset_path,
        predict_transform=ImageClassificationInputTransform,
        batch_size=cfg.inference.batch_size)

    # ----------
    # INSTANTIATE MODEL AND TRAINER
    # -----------

    model = instantiate(cfg.model.image_classifier)

    model = model.load_from_checkpoint(cfg.inference.model_path_to_load)

    # instantiate trainer

    trainer = instantiate(cfg.trainer.default)

    # ----------
    # RUN PREDICTION
    # -----------

    predictions = trainer.predict(model, datamodule=datamodule)

    # model needs to put on gpu after train.predict in order to run explanation on gpu
    if (torch.cuda.is_available()):
        modeladapter = model.to(device)

    modeladapter.eval()

    # ----------
    # RUN MODEL INSPECTION
    # -----------

    if (cfg.inference.captum.enable):
        print("SAVE EXPLANATION FILES ")
        # CSV write or append
        explanation_list = []

    if (cfg.inference.confusion_matrix.enable):
        print("SAVE EXPLANATION FILES ")
        # CSV write or append
        samples = []
        y_pred = []
        y_true = []

    if (cfg.inference.calibration.enable):
        preds_caliration = []
        labels_oneh_calibration = []

    for prediction in predictions:

        # value must be in float32
        out32 = torch.tensor(prediction[0][DataKeys.PREDS].detach().view(
            1, -1).contiguous(),
                             dtype=torch.float32)
        inputImage = prediction[0][DataKeys.INPUT]

        if (torch.cuda.is_available()):
            out32 = out32.cuda()
            inputImage = inputImage.cuda()

        output = F.softmax(out32, dim=1)
        prediction_score, pred_label_idx = torch.topk(output, 1)
        pred_label_idx.squeeze_()

        pred_label_num = pred_label_idx.cpu().item()

        gt_label_num = prediction[0][DataKeys.TARGET].item()

        filepath = prediction[0][DataKeys.METADATA]["filepath"]
        filename = os.path.basename(os.path.normpath(filepath))

        filename_without_ext, file_extension = os.path.splitext(filename)

        # EXPLANATION
        if (cfg.inference.captum.enable):

            explanation_list.append(
                save_explanation(inputImage, modeladapter, cfg, pred_label_idx,
                                 pred_label_num, gt_label_num, filename,
                                 filepath, filename_without_ext,
                                 prediction_score))

        # CONFUSION MATRIX
        if (cfg.inference.confusion_matrix.enable):

            y_true.extend([gt_label_num])
            y_pred.extend([pred_label_num])

            samples.append(
                fo.Sample(filepath=filepath,
                          ground_truth=fo.Classification(
                              label=cfg.inference.class_name[gt_label_num]),
                          prediction=fo.Classification(
                              label=cfg.inference.class_name[pred_label_num])))

        # CALIBRATION

        if (cfg.inference.calibration.enable):
            pred_calib = output.cpu().detach().numpy()
            preds_caliration.extend(pred_calib)

            # WARNING class_name must be configured
            label_oneh = torch.nn.functional.one_hot(
                torch.tensor([gt_label_num]).to(torch.long),
                num_classes=len(cfg.inference.class_name))
            label_oneh = label_oneh.cpu().detach().numpy()
            labels_oneh_calibration.extend(label_oneh)

    # Save Explanation CSV for further analysis

    if (cfg.inference.captum.enable):

        explanation_dataframe = pd.DataFrame(
            explanation_list,
            columns=["pred", "GT", "predict_score", "image_path"])
        # csv file could be imported on Ai4Prod explainability software
        explanation_dataframe.to_csv(cfg.inference.captum.csv_result,
                                     index=False)

    # Save confusion Matrix and show other stat

    if (cfg.inference.confusion_matrix.enable):
        dataset = fo.Dataset("custom_evaluation")
        dataset.add_samples(samples)

        results = dataset.evaluate_classifications(
            "prediction",
            gt_field="ground_truth",
            eval_key="custom_eval",
        )

        plot = results.plot_confusion_matrix(classes=cfg.inference.class_name,
                                             backend="matplotlib",
                                             figsize=(6, 6))

        plot.savefig(
            cfg.inference.confusion_matrix.path_to_confusion_matrix_image)

        dict_report = results.report()

        df_metric = pd.DataFrame(dict_report).transpose()
        df_metric.to_csv(cfg.inference.confusion_matrix.path_to_metrics_csv)

        # save cf matrix as csv. You can use this in C++

        cf_matrix = confusion_matrix(y_true, y_pred, normalize="true")

        df_cm = pd.DataFrame(cf_matrix,
                             index=[i for i in cfg.inference.class_name],
                             columns=[i for i in cfg.inference.class_name])

        df_cm.to_csv(
            cfg.inference.confusion_matrix.path_to_confusion_matrix_csv,
            index=False,
            header=False)

    if (cfg.inference.calibration.enable):
        preds_caliration = np.array(preds_caliration).flatten()
        labels_oneh_calibration = np.array(labels_oneh_calibration).flatten()
        draw_reliability_graph(
            preds_caliration,
            cfg.inference.calibration.path_to_creliability_diagram,
            labels_oneh_calibration)