Esempio n. 1
0
def test__create_list_of_class_names():
    """Test _create_list_of_class_names."""
    # credit to voxel51 crew for a helpful test suite from which this test borrows
    # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59
    dataset = fo.Dataset()
    dataset.add_sample(
        fo.Sample(
            filepath="filepath1.jpg",
            tags=["test"],
            test_dets=fo.Detections(detections=[
                fo.Detection(
                    label="friend",
                    confidence=0.9,
                    bounding_box=[0, 0, 0.5, 0.5],
                ),
                fo.Detection(
                    label="stopper",
                    confidence=0.1,
                    bounding_box=[0, 0, 0.5, 0.5],
                ),
                fo.Detection(
                    label="big bro",
                    confidence=0.6,
                    bounding_box=[0, 0, 0.1, 0.5],
                ),
            ]),
            another_field=51,
        ))
    test_list = _create_list_of_class_names(dataset, label_field="test_dets")
    assert set(test_list) == set(["friend", "stopper", "big bro"])
Esempio n. 2
0
def test_create_detection_mapping():
    """Test create_detection_mapping()."""
    # credit to voxel51 crew for a helpful test suite from which this test borrows
    # https://github.com/voxel51/fiftyone/blob/a7c2b36a4f101330fa8edec35a9bdee841886f96/tests/unittests/view_tests.py#L59
    dataset = fo.Dataset(name="test_detection_mapping")
    dataset.add_sample(
        fo.Sample(
            filepath="filepath1.jpg",
            tags=["train"],
            test_dets=fo.Detections(detections=[
                fo.Detection(
                    label="friend",
                    confidence=0.9,
                    bounding_box=[0, 0, 0.5, 0.5],
                ),
                fo.Detection(
                    label="stopper",
                    confidence=0.1,
                    bounding_box=[0, 0, 0.5, 0.5],
                ),
            ]),
            another_field=51,
        ))
    test_output = create_detection_mapping("test_detection_mapping",
                                           label_field="test_dets",
                                           training_tag="train")
    assert isinstance(test_output, str)
    assert (
        test_output ==
        'item {\n  name: "friend"\n  id: 1\n}\nitem {\n  name: "stopper"\n  id: 2\n}\n'
    )
Esempio n. 3
0
def _make_image_labels_dataset(img,
                               images_dir,
                               num_samples=4,
                               num_objects_per_sample=3):
    exts = [".jpg", ".png"]

    samples = []
    for idx in range(num_samples):
        filepath = os.path.join(images_dir,
                                "%06d%s" % (idx, exts[idx % len(exts)]))
        etai.write(img, filepath)

        image_labels = etai.ImageLabels()

        _label = random.choice(["sun", "rain", "snow"])
        image_labels.add_attribute(etad.CategoricalAttribute("label", _label))

        for _ in range(num_objects_per_sample):
            _label = random.choice(["cat", "dog", "bird", "rabbit"])
            _xtl = 0.8 * random.random()
            _ytl = 0.8 * random.random()
            _bounding_box = etag.BoundingBox.from_coords(
                _xtl, _ytl, _xtl + 0.2, _ytl + 0.2)
            image_labels.add_object(
                etao.DetectedObject(label=_label, bounding_box=_bounding_box))

        samples.append(
            fo.Sample(
                filepath=filepath,
                ground_truth=fo.ImageLabels(labels=image_labels),
            ))

    dataset = fo.Dataset()
    dataset.add_samples(samples)
    return dataset
Esempio n. 4
0
def _make_detection_dataset(
    img, images_dir, num_samples=4, num_objects_per_sample=3
):
    exts = [".jpg", ".png"]

    samples = []
    for idx in range(num_samples):
        filepath = os.path.join(
            images_dir, "%06d%s" % (idx, exts[idx % len(exts)])
        )
        etai.write(img, filepath)

        detections = []
        for _ in range(num_objects_per_sample):
            label = random.choice(["cat", "dog", "bird", "rabbit"])
            bounding_box = [
                0.8 * random.random(),
                0.8 * random.random(),
                0.2,
                0.2,
            ]
            detections.append(
                fo.Detection(label=label, bounding_box=bounding_box)
            )

        samples.append(
            fo.Sample(
                filepath=filepath,
                ground_truth=fo.Detections(detections=detections),
            )
        )

    dataset = fo.Dataset()
    dataset.add_samples(samples)
    return dataset
Esempio n. 5
0
def _make_multilabel_dataset(img, images_dir):
    image_path = os.path.join(images_dir, "image.jpg")
    etai.write(img, image_path)

    sample = fo.Sample.from_dict(
        {
            "filepath": image_path,
            "tags": [],
            "metadata": {
                "_cls": "ImageMetadata",
                "size_bytes": 53219,
                "mime_type": "image/jpeg",
                "width": 1280,
                "height": 720,
                "num_channels": 3,
            },
            "gt_weather": {"_cls": "Classification", "label": "overcast"},
            "gt_scene": {"_cls": "Classification", "label": "city street"},
            "gt_timeofday": {"_cls": "Classification", "label": "daytime"},
            "gt_objs": {
                "_cls": "Detections",
                "detections": [
                    {
                        "_cls": "Detection",
                        "label": "traffic sign",
                        "bounding_box": [
                            0.7817958921875,
                            0.39165613194444443,
                            0.031193851562499986,
                            0.06238770138888894,
                        ],
                        "attributes": {
                            "occluded": {
                                "_cls": "BooleanAttribute",
                                "value": False,
                            },
                            "truncated": {
                                "_cls": "BooleanAttribute",
                                "value": False,
                            },
                            "trafficLightColor": {
                                "_cls": "CategoricalAttribute",
                                "value": "none",
                            },
                        },
                    }
                ],
            },
            "uniqueness": 0.5432120379367298,
        }
    )

    dataset = fo.Dataset()
    dataset.add_sample(sample)
    return dataset
Esempio n. 6
0
def _make_labeled_dataset_with_no_labels(img, images_dir):
    filepath = os.path.join(images_dir, "test.png")
    etai.write(img, filepath)

    dataset = fo.Dataset()
    dataset.add_sample(fo.Sample(filepath=filepath))
    dataset.add_sample_field("ground_truth",
                             fo.EmbeddedDocumentField,
                             embedded_doc_type=fo.Label)

    dataset.info = {
        # FiftyOneImageClassificationDataset
        # FiftyOneImageDetectionDataset
        "classes": ["cat", "dog"],
        # COCODetectionDataset
        "year":
        "5151",
        "version":
        "5151",
        "description":
        "Brian's Dataset",
        "contributor":
        "Brian Moore",
        "url":
        "https://github.com/brimoor",
        "date_created":
        "5151-51-51T51:51:51",
        "licenses": ["license1", "license2"],
        # CVATImageDataset
        "task_labels": [
            {
                "name": "cat",
                "attributes": [{
                    "name": "fluffy",
                    "categories": ["yes", "no"]
                }],
            },
            {
                "name":
                "dog",
                "attributes": [{
                    "name": "awesome",
                    "categories": ["yes", "of course"]
                }],
            },
        ],
    }
    dataset.save()

    return dataset
Esempio n. 7
0
def visualize(labels: Union[List[Label], List[Dict[str, Label]]],
              filepaths: Optional[List[str]] = None,
              wait: Optional[bool] = True,
              label_field: Optional[str] = "predictions",
              **kwargs) -> Optional[Session]:
    """Use the result of a FiftyOne serializer to visualize predictions in the
    FiftyOne App.

    Args:
        labels: Either a list of FiftyOne labels that will be applied to the
            corresponding filepaths provided with through `filepath` or
            `datamodule`. Or a list of dictionaries containing image/video
            filepaths and corresponding FiftyOne labels.
        filepaths: A list of filepaths to images or videos corresponding to the
            provided `labels`.
        wait: A boolean determining whether to launch the FiftyOne session and
            wait until the session is closed or whether to return immediately.
        label_field: The string of the label field in the FiftyOne dataset
            containing predictions
    """
    if not _FIFTYONE_AVAILABLE:
        raise ModuleNotFoundError("Please, `pip install fiftyone`.")
    if flash._IS_TESTING:
        return None

    # Flatten list if batches were used
    if all(isinstance(fl, list) for fl in labels):
        labels = list(chain.from_iterable(labels))

    if all(isinstance(fl, dict) for fl in labels):
        filepaths = [lab["filepath"] for lab in labels]
        labels = [lab["predictions"] for lab in labels]

    if filepaths is None:
        raise ValueError(
            "The `filepaths` argument is required if filepaths are not provided in `labels`."
        )

    dataset = fo.Dataset()
    if filepaths:
        dataset.add_labeled_images(
            list(zip(filepaths, labels)),
            LabeledImageTupleSampleParser(),
            label_field=label_field,
        )
    session = fo.launch_app(dataset, **kwargs)
    if wait:
        session.wait()
    return session
Esempio n. 8
0
def _make_classification_dataset(img, images_dir, num_samples=4):
    exts = [".jpg", ".png"]

    samples = []
    for idx in range(num_samples):
        filepath = os.path.join(images_dir,
                                "%06d%s" % (idx, exts[idx % len(exts)]))
        etai.write(img, filepath)

        label = random.choice(["sun", "rain", "snow"])
        samples.append(
            fo.Sample(filepath=filepath,
                      ground_truth=fo.Classification(label=label)))

    dataset = fo.Dataset()
    dataset.add_samples(samples)
    return dataset
Esempio n. 9
0
def create_voxel51_dataset(dataset_name):
    """Create a voxel51 dataset or load existing one.

    Args:
        dataset_name: name of the voxel51 dataset to create or load

    Returns:
        dataset (voxel51 dataset object)
    """
    # attempt to open dataset
    try:
        dataset = fo.Dataset(name=dataset_name)
        dataset.persistent = True
        logging.info("Created %s dataset", dataset_name)
    # If the dataset already exists, load it instead
    except ValueError:
        dataset = fo.load_dataset(name=dataset_name)
        logging.info("Dataset already exists. Loaded %s dataset", dataset_name)

    return dataset
Esempio n. 10
0
 def setUp(self):
     self.dataset = fo.Dataset()
     self.sample1 = fo.Sample(filepath="test_one.png")
     self.sample2 = fo.Sample(filepath="test_two.png")
     self.dataset.add_sample(self.sample1)
     self.dataset.add_sample(self.sample2)
Esempio n. 11
0
def load_zoo_dataset(
    name,
    split=None,
    splits=None,
    dataset_dir=None,
    download_if_necessary=True,
    drop_existing_dataset=False,
):
    """Loads the dataset of the given name from the FiftyOne Dataset Zoo as
    a :class:`fiftyone.core.dataset.Dataset`.

    By default, the dataset will be downloaded if it does not already exist in
    the specified directory.

    Args:
        name: the name of the zoo dataset to load. Call
            :func:`list_zoo_datasets` to see the available datasets
        split (None) a split to load, if applicable. Typical values are
            ``("train", "validation", "test")``. If neither ``split`` nor
            ``splits`` are provided, all available splits are loaded. Consult
            the documentation for the :class:`ZooDataset` you specified to see
            the supported splits
        splits (None): a list of splits to load, if applicable. Typical values
            are ``("train", "validation", "test")``. If neither ``split`` nor
            ``splits`` are provided, all available splits are loaded. Consult
            the documentation for the :class:`ZooDataset` you specified to see
            the supported splits
        dataset_dir (None): the directory in which the dataset is stored or
            will be downloaded. By default,
            :func:`fiftyone.core.dataset.get_default_dataset_dir` is used
        download_if_necessary (True): whether to download the dataset if it is
            not found in the specified dataset directory
        drop_existing_dataset (False): whether to drop an existing dataset
            with the same name if it exists

    Returns:
        a :class:`fiftyone.core.dataset.Dataset`
    """
    splits = _parse_splits(split, splits)

    if download_if_necessary:
        info, dataset_dir = download_zoo_dataset(name,
                                                 splits=splits,
                                                 dataset_dir=dataset_dir)
        zoo_dataset = info.get_zoo_dataset()
    else:
        zoo_dataset, dataset_dir = _parse_dataset_details(name, dataset_dir)
        info = zoo_dataset.load_info(dataset_dir)

    dataset_name = zoo_dataset.name
    if splits is not None:
        dataset_name += "-" + "-".join(splits)

    if fo.dataset_exists(dataset_name):
        if not drop_existing_dataset:
            logger.info(
                "Loading existing dataset '%s'. To reload from disk, first "
                "delete the existing dataset",
                dataset_name,
            )
            return fo.load_dataset(dataset_name)

        fo.delete_dataset(dataset_name)

    if splits is None and zoo_dataset.has_splits:
        splits = zoo_dataset.supported_splits

    dataset = fo.Dataset(dataset_name)
    dataset_type = info.get_dataset_type()

    if splits:
        for split in splits:
            split_dir = zoo_dataset.get_split_dir(dataset_dir, split)
            tags = [split]

            logger.info("Loading '%s' split '%s'", zoo_dataset.name, split)
            dataset.add_dir(split_dir, dataset_type, tags=tags)
    else:
        logger.info("Loading '%s'", zoo_dataset.name)
        dataset.add_dir(dataset_dir, dataset_type)

    if info.classes is not None:
        dataset.info["classes"] = info.classes
        dataset.save()

    return dataset
Esempio n. 12
0
def dataset_view_test():
	dataset = foz.load_zoo_dataset("quickstart")

	view = dataset.view()

	print(view)

	print("Media type = {}.".format(view.media_type))
	print("#examples = {}.".format(len(view)))
	#print("#examples = {}.".format(view.count()))

	#--------------------
	#for sample in view:
	#	print(sample)

	#--------------------
	sample = view.take(1).first()

	print(type(sample))  # fiftyone.core.sample.SampleView.

	same_sample = view[sample.id]
	also_same_sample = view[sample.filepath]

	#view[other_sample_id]  # KeyError: sample non-existent or not in view.

	# List available view operations on a dataset.
	print(dataset.list_view_stages())

	#--------------------
	# View stages.

	# Random set of 100 samples from the dataset
	random_view = dataset.take(100)

	print("#examples = {}.".format(len(random_view)))

	# Sort 'random_view' by filepath.
	sorted_random_view = random_view.sort_by("filepath")

	#--------------------
	# Slicing.

	# Skip the first 2 samples and take the next 3.
	range_view1 = dataset.skip(2).limit(3)

	# Equivalently, using array slicing.
	range_view2 = dataset[2:5]

	view = dataset[10:100]

	sample10 = view.first()
	sample100 = view.last()

	also_sample10 = view[sample10.id]
	assert also_sample10.filepath == sample10.filepath

	also_sample100 = view[sample100.filepath]
	assert sample100.id == also_sample100.id

	assert sample10 is not also_sample10

	# A boolean array encoding the samples to extract.
	bool_array = np.array(dataset.values("uniqueness")) > 0.7
	view = dataset[bool_array]
	print("#examples = {}.".format(len(view)))

	ids = itertools.compress(dataset.values("id"), bool_array)
	view = dataset.select(ids)
	print("#examples = {}.".format(len(view)))

	# ViewExpression defining the samples to match.
	expr = fo.ViewField("uniqueness") > 0.7

	# Use a match() expression to define the view.
	view = dataset.match(expr)
	print("#examples = {}.".format(len(view)))

	# Equivalent: using boolean expression indexing is allowed too.
	view = dataset[expr]
	print("#examples = {}.".format(len(view)))

	#--------------------
	# Sorting.

	view = dataset.sort_by("filepath")
	view = dataset.sort_by("filepath", reverse=True)

	# Sort by number of detections in 'Detections' field 'ground_truth'.
	view = dataset.sort_by(fo.ViewField("ground_truth.detections").length(), reverse=True)

	print(len(view.first().ground_truth.detections))
	print(len(view.last().ground_truth.detections))

	#--------------------
	# Shuffling.

	# Randomly shuffle the order of the samples in the dataset.
	view1 = dataset.shuffle()

	# Randomly shuffle the samples in the dataset with a fixed seed.
	view2 = dataset.shuffle(seed=51)
	print(view2.first().id)

	also_view2 = dataset.shuffle(seed=51)
	print(also_view2.first().id)

	#--------------------
	# Random sampling.

	# Take 5 random samples from the dataset.
	view1 = dataset.take(5)

	# Take 5 random samples from the dataset with a fixed seed.
	view2 = dataset.take(5, seed=51)
	print(view2.first().id)

	also_view2 = dataset.take(5, seed=51)
	print(also_view2.first().id)

	#--------------------
	# Filtering.

	# Populate metadata on all samples.
	dataset.compute_metadata()

	# Samples whose image is less than 48 KB.
	small_images_view = dataset.match(fo.ViewField("metadata.size_bytes") < 48 * 1024)

	# Samples that contain at least one prediction with confidence above 0.99 or whose label ifs "cat" or "dog".
	match = (fo.ViewField("confidence") > 0.99) | (fo.ViewField("label").is_in(("cat", "dog")))
	matching_view = dataset.match(fo.ViewField("predictions.detections").filter(match).length() > 0)

	# The validation split of the dataset.
	val_view = dataset.match_tags("validation")
	# Union of the validation and test splits.
	val_test_view = dataset.match_tags(("validation", "test"))
	# The subset of samples where predictions have been computed.
	predictions_view = dataset.exists("predictions")

	# Get the IDs of two random samples.
	sample_ids = [
		dataset.take(1).first().id,
		dataset.take(1).first().id,
	]
	# Include only samples with the given IDs in the view.
	selected_view = dataset.select(sample_ids)
	# Exclude samples with the given IDs from the view.
	excluded_view = dataset.exclude(sample_ids)

	for sample in dataset.select_fields("ground_truth"):
		print(sample.id)            # OKAY: 'id' is always available
		print(sample.ground_truth)  # OKAY: 'ground_truth' was selected
		#print(sample.predictions)   # AttributeError: 'predictions' was not selected

	for sample in dataset.exclude_fields("predictions"):
		print(sample.id)            # OKAY: 'id' is always available
		print(sample.ground_truth)  # OKAY: 'ground_truth' was not excluded
		#print(sample.predictions)   # AttributeError: 'predictions' was excluded

	#--------------------
	# Date-based views.

	dataset = fo.Dataset()
	dataset.add_samples(
		[
			fo.Sample(
				filepath="image1.png",
				capture_date=datetime(2021, 8, 24, 1, 0, 0),
			),
			fo.Sample(
				filepath="image2.png",
				capture_date=datetime(2021, 8, 24, 2, 0, 0),
			),
			fo.Sample(
				filepath="image3.png",
				capture_date=datetime(2021, 8, 24, 3, 0, 0),
			),
		]
	)

	query_date = datetime(2021, 8, 24, 2, 1, 0)
	query_delta = timedelta(minutes=30)

	# Samples with capture date after 2021-08-24 02:01:00.
	view = dataset.match(fo.ViewField("capture_date") > query_date)
	print(view)

	# Samples with capture date within 30 minutes of 2021-08-24 02:01:00.
	view = dataset.match(abs(fo.ViewField("capture_date") - query_date) < query_delta)
	print(view)
Esempio n. 13
0
class ServerServiceTests(unittest.TestCase):
    """Tests for ServerService"""

    image_url = "https://user-images.githubusercontent.com/3719547/74191434-8fe4f500-4c21-11ea-8d73-555edfce0854.png"
    test_one = os.path.abspath("./test_one.png")
    test_two = os.path.abspath("./test_two.png")
    dataset = fo.Dataset("test")
    sample1 = fo.Sample(filepath=test_one)
    sample2 = fo.Sample(filepath=test_two)
    session = Session(remote=True)
    sio_client = socketio.Client()
    sio_client.eio.start_background_task = foc._start_background_task
    client = AppClient()
    sio_client.register_namespace(client)
    foc._connect(sio_client, SERVER_ADDR % 5151)
    _tmp = None

    @classmethod
    def setUpClass(cls):
        urllib.request.urlretrieve(cls.image_url, cls.test_one)
        etau.copy_file(cls.test_one, cls.test_two)
        cls.dataset.add_sample(cls.sample1)
        cls.dataset.add_sample(cls.sample2)
        cls.sample1["scalar"] = 1
        cls.sample1["label"] = fo.Classification(label="test")
        cls.sample1.tags.append("tag")
        cls.sample1["floats"] = [
            0.5,
            float("nan"),
            float("inf"),
            float("-inf"),
        ]
        cls.sample1.save()

    @classmethod
    def tearDownClass(cls):
        etau.delete_file(cls.test_one)
        etau.delete_file(cls.test_two)

    def step_connect(self):
        self.assertIs(self.session._hc_client.connected, True)
        self.assertIs(self.client.connected, True)

    def step_update(self):
        self.session.dataset = self.dataset
        self.wait_for_response()
        session = _serialize(self.session.state)
        client = self.client.data.serialize()
        self.assertEqual(_normalize_session(session),
                         _normalize_session(client))

    def step_get_current_state(self):
        self.maxDiff = None
        self.session.view = self.dataset.limit(1)
        self.wait_for_response()
        session = _serialize(self.session.state)
        self.client.emit("get_current_state",
                         "",
                         callback=self.client_callback)
        client = self.wait_for_response()
        self.assertEqual(_normalize_session(session),
                         _normalize_session(client))
        self.assertEqual(
            sorted(client["tags"]),
            sorted(self.dataset.get_tags()),
        )
        self.assertEqual(client["view_count"], len(self.session.view))
        self.assertNotEqual(client["view_count"], len(self.dataset))

    def step_selection(self):
        self.client.emit("add_selection", self.sample1.id)
        self.wait_for_response(session=True)
        self.assertIs(len(self.session.selected), 1)
        self.assertEqual(self.session.selected[0], self.sample1.id)

        self.client.emit("remove_selection", self.sample1.id)
        self.wait_for_response(session=True)
        self.assertIs(len(self.session.selected), 0)

    def step_page(self):
        self.session.dataset = self.dataset
        self.wait_for_response()
        self.client.emit("page", 1, callback=self.client_callback)
        client = self.wait_for_response()
        results = client["results"]
        self.assertIs(len(results), 2)

    def step_get_distributions(self):
        self.session.dataset = self.dataset
        self.wait_for_response()

        self.client.emit("get_distributions",
                         "tags",
                         callback=self.client_callback)
        client = self.wait_for_response()
        self.assertIs(len(client), 1)
        self.assertEqual(client[0]["data"], [{"key": "tag", "count": 1}])

        self.client.emit("get_distributions",
                         "labels",
                         callback=self.client_callback)
        client = self.wait_for_response()
        self.assertIs(len(client), 1)
        self.assertEqual(client[0]["data"], [{"key": "test", "count": 1}])

        self.client.emit("get_distributions",
                         "scalars",
                         callback=self.client_callback)
        client = self.wait_for_response()
        self.assertIs(len(client), 1)
        self.assertEqual(client[0]["data"], [{"key": "null", "count": 2}])

    def step_sessions(self):
        other_session = Session(remote=True)
        other_session.dataset = self.dataset
        self.wait_for_response(session=True)
        self.assertEqual(str(self.session.dataset), str(other_session.dataset))
        other_session.view = self.dataset.limit(1)
        self.wait_for_response(session=True)
        self.assertEqual(str(self.session.view), str(other_session.view))

    def step_server_services(self):
        port = 5252
        session_one = Session(port=port, remote=True)
        session_two = Session(port=port, remote=True)
        self.assertEqual(len(_subscribed_sessions[port]), 2)
        self.assertEqual(len(_subscribed_sessions), 2)
        self.assertEqual(len(_server_services), 2)
        session_two.__del__()
        self.assertEqual(len(_subscribed_sessions[port]), 1)
        self.assertEqual(len(_subscribed_sessions), 2)
        self.assertEqual(len(_server_services), 2)
        session_one.__del__()
        self.assertEqual(len(_subscribed_sessions[port]), 0)
        self.assertEqual(len(_server_services), 1)

    def step_empty_derivables(self):
        self.session.dataset = fo.Dataset()
        response = self.wait_for_response()
        self.assertEqual(_serialize(self.session.state),
                         _serialize(self.client.data))

    def step_json_encoder(self):
        enc = FiftyOneJSONEncoder
        oid = "aaaaaaaaaaaaaaaaaaaaaaaa"
        self.assertEqual(enc.dumps(ObjectId(oid)), '{"$oid": "%s"}' % oid)
        self.assertEqual(enc.dumps(float("nan")), '"NaN"')
        self.assertEqual(enc.dumps(float("inf")), '"Infinity"')

    def test_steps(self):
        for name, step in self.steps():
            try:
                step()
                self.session.dataset = None
                self.wait_for_response()
            except Exception as e:
                self.fail("{} failed ({}: {})".format(step, type(e), e))

    def wait_for_response(self, timeout=3, session=False):
        start_time = time.time()
        while time.time() < start_time + timeout:
            if session:
                if self.session._hc_client.updated:
                    self.session._hc_client.updated = False
                    return
            elif self.client.response:
                response = self.client.response
                self.client.response = None

                return response
            time.sleep(0.2)

        raise RuntimeError("No response after %f" % timeout)

    def client_callback(self, data):
        self.client.response = data

    def steps(self):
        for name in dir(self):
            if name.startswith("step_"):
                yield name, getattr(self, name)
Esempio n. 14
0
import eta.core.logging as etal

import fiftyone as fo
import fiftyone.zoo as foz

logger = logging.getLogger(__name__)

# Logs everything written by a `logger` in this benchmark
etal.custom_setup(
    etal.LoggingConfig(
        dict(
            filename=os.path.splitext(os.path.abspath(__file__))[0] + ".log",
            file_format="%(message)s",
        )),
    verbose=False,
)

#
# Add samples benchmark
#

dataset = foz.load_zoo_dataset("cifar10", split="train")

samples = [s.copy() for s in dataset]

logger.info("\nStarting test")
for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, None]:
    logger.info("\nBatch size: %s" % batch_size)
    dataset2 = fo.Dataset()
    dataset2.add_samples(samples, _batch_size=batch_size)
Esempio n. 15
0
def train_with_hydra(cfg: DictConfig):

    # setup inference path

    cfg.inference.base_path = cfg.inference.model_path_to_load.split(
        "train/", 1)[0] + "inference/"
    print("INFERENCE RESULTS WILL BE SAVED {}".format(cfg.inference.base_path))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # For inferece set always batch_size = 1
    cfg.inference.batch_size = 1

    createFolderForExplanation(cfg)

    # Dataclass for custom Image transform See dataset configuration in .yaml
    @dataclass
    class ImageClassificationInputTransform(InputTransform):

        # transforms added to input training data
        def train_input_per_sample_transform(self):
            return instantiate(cfg.dataset.train_transform, _convert_="all")

        # transform label to tensor

        def target_per_sample_transform(self) -> Callable:
            return torch.as_tensor

        # transforms added to input validation data
        def val_input_per_sample_transform(self):
            return instantiate(cfg.dataset.val_transform, _convert_="all")

        def predict_input_per_sample_transform(self):
            return instantiate(cfg.dataset.test_transform, _convert_="all")

    # ----------
    # INSTANTIATE DATASET FROM HYDRA CONF
    # -----------

    # Check for empty folder
    for dirpath, dirnames, files in os.walk(cfg.inference.dataset_path):

        if (dirpath == cfg.inference.dataset_path):
            # Root directory as no file
            pass

        else:

            if files:
                pass
            else:
                raise Exception(
                    "Test folder cannot be empty. Otherwise target label are not correct"
                )

    datamodule = ImageClassificationData.from_folders(
        predict_folder=cfg.inference.dataset_path,
        predict_transform=ImageClassificationInputTransform,
        batch_size=cfg.inference.batch_size)

    # ----------
    # INSTANTIATE MODEL AND TRAINER
    # -----------

    model = instantiate(cfg.model.image_classifier)

    model = model.load_from_checkpoint(cfg.inference.model_path_to_load)

    # instantiate trainer

    trainer = instantiate(cfg.trainer.default)

    # ----------
    # RUN PREDICTION
    # -----------

    predictions = trainer.predict(model, datamodule=datamodule)

    # model needs to put on gpu after train.predict in order to run explanation on gpu
    if (torch.cuda.is_available()):
        modeladapter = model.to(device)

    modeladapter.eval()

    # ----------
    # RUN MODEL INSPECTION
    # -----------

    if (cfg.inference.captum.enable):
        print("SAVE EXPLANATION FILES ")
        # CSV write or append
        explanation_list = []

    if (cfg.inference.confusion_matrix.enable):
        print("SAVE EXPLANATION FILES ")
        # CSV write or append
        samples = []
        y_pred = []
        y_true = []

    if (cfg.inference.calibration.enable):
        preds_caliration = []
        labels_oneh_calibration = []

    for prediction in predictions:

        # value must be in float32
        out32 = torch.tensor(prediction[0][DataKeys.PREDS].detach().view(
            1, -1).contiguous(),
                             dtype=torch.float32)
        inputImage = prediction[0][DataKeys.INPUT]

        if (torch.cuda.is_available()):
            out32 = out32.cuda()
            inputImage = inputImage.cuda()

        output = F.softmax(out32, dim=1)
        prediction_score, pred_label_idx = torch.topk(output, 1)
        pred_label_idx.squeeze_()

        pred_label_num = pred_label_idx.cpu().item()

        gt_label_num = prediction[0][DataKeys.TARGET].item()

        filepath = prediction[0][DataKeys.METADATA]["filepath"]
        filename = os.path.basename(os.path.normpath(filepath))

        filename_without_ext, file_extension = os.path.splitext(filename)

        # EXPLANATION
        if (cfg.inference.captum.enable):

            explanation_list.append(
                save_explanation(inputImage, modeladapter, cfg, pred_label_idx,
                                 pred_label_num, gt_label_num, filename,
                                 filepath, filename_without_ext,
                                 prediction_score))

        # CONFUSION MATRIX
        if (cfg.inference.confusion_matrix.enable):

            y_true.extend([gt_label_num])
            y_pred.extend([pred_label_num])

            samples.append(
                fo.Sample(filepath=filepath,
                          ground_truth=fo.Classification(
                              label=cfg.inference.class_name[gt_label_num]),
                          prediction=fo.Classification(
                              label=cfg.inference.class_name[pred_label_num])))

        # CALIBRATION

        if (cfg.inference.calibration.enable):
            pred_calib = output.cpu().detach().numpy()
            preds_caliration.extend(pred_calib)

            # WARNING class_name must be configured
            label_oneh = torch.nn.functional.one_hot(
                torch.tensor([gt_label_num]).to(torch.long),
                num_classes=len(cfg.inference.class_name))
            label_oneh = label_oneh.cpu().detach().numpy()
            labels_oneh_calibration.extend(label_oneh)

    # Save Explanation CSV for further analysis

    if (cfg.inference.captum.enable):

        explanation_dataframe = pd.DataFrame(
            explanation_list,
            columns=["pred", "GT", "predict_score", "image_path"])
        # csv file could be imported on Ai4Prod explainability software
        explanation_dataframe.to_csv(cfg.inference.captum.csv_result,
                                     index=False)

    # Save confusion Matrix and show other stat

    if (cfg.inference.confusion_matrix.enable):
        dataset = fo.Dataset("custom_evaluation")
        dataset.add_samples(samples)

        results = dataset.evaluate_classifications(
            "prediction",
            gt_field="ground_truth",
            eval_key="custom_eval",
        )

        plot = results.plot_confusion_matrix(classes=cfg.inference.class_name,
                                             backend="matplotlib",
                                             figsize=(6, 6))

        plot.savefig(
            cfg.inference.confusion_matrix.path_to_confusion_matrix_image)

        dict_report = results.report()

        df_metric = pd.DataFrame(dict_report).transpose()
        df_metric.to_csv(cfg.inference.confusion_matrix.path_to_metrics_csv)

        # save cf matrix as csv. You can use this in C++

        cf_matrix = confusion_matrix(y_true, y_pred, normalize="true")

        df_cm = pd.DataFrame(cf_matrix,
                             index=[i for i in cfg.inference.class_name],
                             columns=[i for i in cfg.inference.class_name])

        df_cm.to_csv(
            cfg.inference.confusion_matrix.path_to_confusion_matrix_csv,
            index=False,
            header=False)

    if (cfg.inference.calibration.enable):
        preds_caliration = np.array(preds_caliration).flatten()
        labels_oneh_calibration = np.array(labels_oneh_calibration).flatten()
        draw_reliability_graph(
            preds_caliration,
            cfg.inference.calibration.path_to_creliability_diagram,
            labels_oneh_calibration)
Esempio n. 16
0
 def step_empty_derivables(self):
     self.session.dataset = fo.Dataset()
     response = self.wait_for_response()
     self.assertEqual(_serialize(self.session.state),
                      _serialize(self.client.data))
Esempio n. 17
0
fo.config.dataset_zoo_dir = "/home/Develop/Dataset/fiftyone"



#------------------ SEGMENTATION DATASET ----------------------------------------

# CREATE DATASET
# Need to add sample for all .png binary mask that you want to verify

# Create Dust TE Segmentation Dataset

DatasetPath= "/home/Develop/Dataset/SemanticSegmentation/DUTS-TE/DUTS-TE-Mask/"
maskList= os.listdir(DatasetPath)

samples=[]
dataset = fo.Dataset("duts_te_validation")
dataset.persistent=True

for maskName in maskList:
    
    maskPath= DatasetPath + maskName

    # load mask with opencv
    mask = cv2.imread(maskPath, cv2.IMREAD_UNCHANGED)
    ret,mask= cv2.threshold(mask, 127,255,cv2.THRESH_BINARY)
    
    sample = fo.Sample(filepath=maskPath,
                    ground_truth=fo.Segmentation(mask=mask))
    
    samples.append(sample)