Exemple #1
0
def _write_generic_sample_dataset(dataset_exporter, samples, num_samples):
    with fou.ProgressBar(total=num_samples) as pb:
        with dataset_exporter:
            if isinstance(samples, foc.SampleCollection):
                dataset_exporter.log_collection(samples)

            for sample in pb(samples):
                dataset_exporter.export_sample(sample)
Exemple #2
0
    def compute_metadata(self, overwrite=False):
        """Populates the ``metadata`` field of all samples in the collection.

        Any samples with existing metadata are skipped, unless
        ``overwrite == True``.

        Args:
            overwrite (False): whether to overwrite existing metadata
        """
        with fou.ProgressBar() as pb:
            for sample in pb(self):
                if sample.metadata is None or overwrite:
                    sample.compute_metadata()
Exemple #3
0
def save_tp_fp_fn_counts(samples, pred_field, gt_field, iou):
    """Saves the true positive (TP), false positive (FP), and false negative
    (FN) counts at the given IoU level in top-level fields of each sample.

    The counts are stored in the following fields::

        TP: sample.tp_iou_<iou>
        FP: sample.fp_iou_<iou>
        FN: sample.fn_iou_<iou>

    where ``<iou> = str(iou).replace(".", "_")``.

    The samples must have been previously evaluated by passing them to
    :meth:`evaluate_detections`.

    Args:
        samples: an iterable of :class:`fiftyone.core.sample.Sample` instances.
            For example, this may be a :class:`fiftyone.core.dataset.Dataset`
            or a :class:`fiftyone.core.view.DatasetView`
        pred_field: the name of the field containing the predicted
            :class:`fiftyone.core.labels.Detections` that were evaluated
        gt_field: the name of the field containing the ground truth
            :class:`fiftyone.core.labels.Detections`
        iou: the IoU value for which to save the TP/FP/FN counts
    """
    pred_key = "%s_eval" % gt_field
    save_iou_str = str(iou).replace(".", "_")

    try:
        iou_ind = IOU_THRESHOLDS.index(iou)
        iou_str = _IOU_THRESHOLD_STRS[iou_ind]
    except ValueError:
        logger.info(
            "IoU %f is not an available IoU threshold: %s", iou, IOU_THRESHOLDS
        )
        return

    logger.info("Saving TP/FP/FN counts for IoU %f...", iou)
    with fou.ProgressBar() as pb:
        for sample in pb(samples):
            result_dict = sample[pred_field][pred_key]
            true_positives = result_dict["true_positives"][iou_str]
            false_positives = result_dict["false_positives"][iou_str]
            false_negatives = result_dict["false_negatives"][iou_str]

            sample["tp_iou_%s" % save_iou_str] = true_positives
            sample["fp_iou_%s" % save_iou_str] = false_positives
            sample["fn_iou_%s" % save_iou_str] = false_negatives

            sample.save()
Exemple #4
0
def condense_image_labels_field(
    dataset,
    label_field,
    prefix=None,
    labels_dict=None,
    keep_label_fields=False,
):
    """Condenses multiple :class:`fiftyone.core.labels.Label`` fields into a
    single :class:`fiftyone.core.labels.ImageLabels` field.

    Provide either ``prefix`` or ``labels_dict`` to customize the fields that
    are condensed. If you provide neither, all
    :class:`fiftyone.core.labels.Label`` fields are condensed.

    Args:
        dataset: a :class:`fiftyone.core.dataset.Dataset`
        label_field: the name of the :class:`fiftyone.core.labels.ImageLabels`
            field to create
        prefix (None): a label field prefix; all
            :class:`fiftyone.core.labels.Label` fields matching this prefix are
            merged into ``label_field``, with the prefix removed from the names
            of the labels
        labels_dict (None): a dictionary mapping names of
            :class:`fiftyone.core.labels.Label` fields to names to give them in
            the condensed :class:`fiftyone.core.labels.ImageLabels`
        keep_label_fields (False): whether to keep the input label fields after
            ``label_field`` is created. By default, the fields are deleted
    """
    if prefix is None:
        prefix = ""

    if labels_dict is None:
        labels_dict = _get_label_dict_for_prefix(dataset, prefix)

    logger.info("Condensing image labels into field '%s'", label_field)
    with fou.ProgressBar() as pb:
        for sample in pb(dataset):
            image_labels = etai.ImageLabels()
            for field_name, name in labels_dict.items():
                image_labels.merge_labels(
                    sample[field_name].to_image_labels(name=name))
                if not keep_label_fields:
                    sample.clear_field(field_name)

            sample[label_field] = fol.ImageLabels(labels=image_labels)
            sample.save()

    if not keep_label_fields:
        for field_name in labels_dict:
            dataset.delete_sample_field(field_name)
Exemple #5
0
def draw_labeled_images(samples,
                        label_fields,
                        anno_dir,
                        annotation_config=None):
    """Renders annotated versions of the image samples with label field(s)
    overlaid to the given directory.

    The filenames of the sample images are maintained, unless a name conflict
    would occur in ``anno_dir``, in which case an index of the form
    ``"-%d" % count`` is appended to the base filename.

    The images are written in format ``fo.config.default_image_ext``.

    Args:
        samples: an iterable of :class:`fiftyone.core.sample.Sample` instances
        label_fields: the list of :class:`fiftyone.core.labels.ImageLabel`
            fields to render
        anno_dir: the directory to write the annotated images
        annotation_config (None): an :class:`AnnotationConfig` specifying how
            to render the annotations

    Returns:
        the list of paths to the labeled images
    """
    if annotation_config is None:
        annotation_config = _DEFAULT_ANNOTATION_CONFIG

    filename_maker = fou.UniqueFilenameMaker(output_dir=anno_dir)
    output_ext = fo.config.default_image_ext

    outpaths = []
    with fou.ProgressBar() as pb:
        for sample in pb(samples):
            outpath = filename_maker.get_output_path(sample.filepath,
                                                     output_ext=output_ext)
            draw_labeled_image(
                sample,
                label_fields,
                outpath,
                annotation_config=annotation_config,
            )
            outpaths.append(outpath)

    return outpaths
Exemple #6
0
    def to_dict(self, rel_dir=None):
        """Returns a JSON dictionary representation of the collection.

        Args:
            rel_dir (None): a relative directory to remove from the
                ``filepath`` of each sample, if possible. The path is converted
                to an absolute path (if necessary) via
                ``os.path.abspath(os.path.expanduser(rel_dir))``. The typical
                use case for this argument is that your source data lives in
                a single directory and you wish to serialize relative, rather
                than absolute, paths to the data within that directory

        Returns:
            a JSON dict
        """
        # @todo support serializing video datasets?
        # That would be a lot of labels to store in one JSON......
        if self.media_type == fom.VIDEO:
            raise ValueError("Serializing video datasets is not supported")

        if rel_dir is not None:
            rel_dir = (os.path.abspath(os.path.expanduser(rel_dir)) +
                       os.path.sep)
            len_rel_dir = len(rel_dir)

        # Serialize samples
        samples = []
        with fou.ProgressBar() as pb:
            for sample in pb(self):
                d = sample.to_dict()
                if rel_dir and d["filepath"].startswith(rel_dir):
                    d["filepath"] = d["filepath"][len_rel_dir:]

                samples.append(d)

        return {
            "name": self.name,
            "num_samples": len(self),
            "tags": self.get_tags(),
            "info": self.info,
            "sample_fields": self._serialize_field_schema(),
            "samples": samples,
        }
Exemple #7
0
def _make_images_list(images_dir):
    logger.info("Computing image metadata for '%s'", images_dir)

    image_paths = foud.parse_images_dir(images_dir)

    images = []
    with fou.ProgressBar() as pb:
        for idx, image_path in pb(enumerate(image_paths)):
            metadata = fom.ImageMetadata.build_for(image_path)
            images.append({
                "id": idx,
                "file_name": os.path.basename(image_path),
                "height": metadata.height,
                "width": metadata.width,
                "license": None,
                "coco_url": None,
            })

    return images
Exemple #8
0
    def add_samples(self, samples, expand_schema=True, num_samples=None):
        """Adds the given samples to the dataset.

        Any sample instances that do not belong to a dataset are updated
        in-place to reflect membership in this dataset. Any sample instances
        that belong to other datasets are not modified.

        Args:
            samples: an iterable of :class:`fiftyone.core.sample.Sample`
                instances. For example, ``samples`` may be a :class:`Dataset`
                or a :class:`fiftyone.core.views.DatasetView`
            expand_schema (True): whether to dynamically add new sample fields
                encountered to the dataset schema. If False, an error is raised
                if a sample's schema is not a subset of the dataset schema
            num_samples (None): the number of samples in ``samples``. If not
                provided, this is computed via ``len(samples)``, if possible.
                This value is optional and is used only for optimization and
                progress tracking

        Returns:
            a list of IDs of the samples in the dataset

        Raises:
            :class:`mongoengine.errors.ValidationError` if a field of a sample
            has a type that is inconsistent with the dataset schema, or if
            ``expand_schema == False`` and a new field is encountered
        """
        if num_samples is None:
            try:
                num_samples = len(samples)
            except:
                pass

        sample_ids = []
        with fou.ProgressBar(total=num_samples) as pb:
            for batch in fou.iter_batches(samples, self._BATCH_SIZE):
                sample_ids.extend(
                    self._add_samples_batch(batch, expand_schema)
                )
                pb.update(count=len(batch))

        return sample_ids
Exemple #9
0
    def to_dict(self, rel_dir=None):
        """Returns a JSON dictionary representation of the collection.

        Args:
            rel_dir (None): a relative directory to remove from the
                ``filepath`` of each sample, if possible. The path is converted
                to an absolute path (if necessary) via
                ``os.path.abspath(os.path.expanduser(rel_dir))``. The typical
                use case for this argument is that your source data lives in
                a single directory and you wish to serialize relative, rather
                than absolute, paths to the data within that directory

        Returns:
            a JSON dict
        """
        if rel_dir is not None:
            rel_dir = (os.path.abspath(os.path.expanduser(rel_dir)) +
                       os.path.sep)
            len_rel_dir = len(rel_dir)

        # Get field schema
        fields = self.get_field_schema()

        # Serialize samples
        samples = []
        with fou.ProgressBar() as pb:
            for sample in pb(self):
                d = sample.to_dict()
                if rel_dir and d["filepath"].startswith(rel_dir):
                    d["filepath"] = d["filepath"][len_rel_dir:]

                samples.append(d)

        return {
            "name": self.name,
            "num_samples": len(self),
            "tags": self.get_tags(),
            "sample_fields":
            {field_name: str(field)
             for field_name, field in fields.items()},
            "samples": samples,
        }
Exemple #10
0
def _write_video_dataset(
    dataset_exporter, samples, sample_parser, num_samples
):
    labeled_videos = isinstance(dataset_exporter, LabeledVideoDatasetExporter)

    with fou.ProgressBar(total=num_samples) as pb:
        with dataset_exporter:
            if isinstance(samples, foc.SampleCollection):
                dataset_exporter.log_collection(samples)

            for sample in pb(samples):
                sample_parser.with_sample(sample)

                # Parse video
                video_path = sample_parser.get_video_path()

                # Parse metadata
                if dataset_exporter.requires_video_metadata:
                    if sample_parser.has_video_metadata:
                        metadata = sample_parser.get_video_metadata()
                    else:
                        metadata = None

                    if metadata is None:
                        metadata = fom.VideoMetadata.build_for(video_path)
                else:
                    metadata = None

                if labeled_videos:
                    # Parse labels
                    frames = sample_parser.get_frame_labels()

                    # Export sample
                    dataset_exporter.export_sample(
                        video_path, frames, metadata=metadata
                    )
                else:
                    # Export sample
                    dataset_exporter.export_sample(
                        video_path, metadata=metadata
                    )
Exemple #11
0
    def clone_field(self, field_name, new_field_name, samples=None):
        """Clones the field values of the samples into a new field of this
        dataset.

        Any samples in ``samples`` that are not in this dataset (i.e., their
        sample ID does not match any samples in this dataset) are skipped.

        The fields of the input samples are **deep copied**.

        Args:
            field_name: the field name to clone
            new_field_name: the new field name to populate
            samples (None): an iterable of :class:`fiftyone.core.sample.Sample`
                instances whose fields to clone. For example, ``samples`` may
                be a :class:`fiftyone.core.views.DatasetView`. By default, this
                dataset itself is used

        Returns:
            tuple of

            -   num_cloned: the number of samples that were cloned
            -   num_skipped: the number of samples that were skipped
        """
        if samples is None:
            samples = self

        num_cloned = 0
        num_skipped = 0
        with fou.ProgressBar() as pb:
            for sample in pb(samples):
                try:
                    _sample = self[sample.id]
                except KeyError:
                    num_skipped += 1
                    continue

                _sample[new_field_name] = deepcopy(sample[field_name])
                _sample.save()
                num_cloned += 1

        return num_cloned, num_skipped
Exemple #12
0
def add_open_images_predictions(
    dataset,
    predictions_path,
    class_descriptions_path=None,
    prediction_field_name="predicted_detections",
):
    """Adds TF Object Detection API format predictions to a
    :class:`fiftyone.core.dataset.Dataset`.

    Args:
        dataset: the :class:`fiftyone.core.dataset.Dataset` instance to add
            the predictions to
        predictions_path: path to a TF Object Detection API format
            predictions CSV
        class_descriptions_path: optional metadata file. if provided, the
            MID labels are mapped to descriptive labels
        prediction_field_name: the name of the field to save the predictions
            under
    """
    all_predictions = pd.read_csv(predictions_path)
    all_predictions.rename(columns={"Score": "Confidence"}, inplace=True)

    # map label MID to descriptive label
    if class_descriptions_path is not None:
        class_descriptions = pd.read_csv(
            class_descriptions_path, header=None, index_col=0
        )

        temp = class_descriptions.loc[all_predictions["LabelName"], 1]
        temp.index = all_predictions.index
        all_predictions["LabelName"] = temp

    with fou.ProgressBar(dataset) as pb:
        for sample in pb(dataset):
            # parse prediction bounding boxes
            cur_preds = all_predictions.loc[
                all_predictions["ImageID"] == sample[OPEN_IMAGES_ID]
            ]
            if not cur_preds.empty:
                sample[prediction_field_name] = df2detections(cur_preds)
                sample.save()
Exemple #13
0
def convert_classification_field_to_detections(
    dataset,
    classification_field,
    detections_field=None,
    keep_classification_field=False,
):
    """Converts the :class:`fiftyone.core.labels.Classification` field of the
    dataset into a :class:`fiftyone.core.labels.Detections` field containing
    the classification label.

    The detections are given bounding boxes that span the entire image.

    Args:
        dataset: a :class:`fiftyone.core.dataset.Dataset`
        classification_field: the name of the
            :class:`fiftyone.core.labels.Classification` field to convert to
            detections
        detections_field (None): the name of the
            :class:`fiftyone.core.labels.Detections` field to create. By
            default, ``classification_field`` is overwritten
        keep_classification_field (False): whether to keep
            ``classification_field`` after the conversion is completed. By
            default, the field is deleted from the dataset. If
            ``classification_field`` is being overwritten, this flag has no
            effect
    """
    dataset.validate_field_type(
        classification_field,
        fof.EmbeddedDocumentField,
        embedded_doc_type=fol.Classification,
    )

    if detections_field is None:
        detections_field = classification_field

    overwrite = detections_field == classification_field
    if overwrite:
        logger.info(
            "Converting Classification field '%s' to Detections format",
            classification_field,
        )
        keep_classification_field = False
        detections_field = dataset.make_unique_field_name(
            root=classification_field)
    else:
        logger.info(
            "Converting Classification field '%s' to Detections format in "
            "field '%s'",
            classification_field,
            detections_field,
        )

    with fou.ProgressBar() as pb:
        for sample in pb(dataset):
            label = sample[classification_field]
            if label is None:
                continue

            detection = fol.Detection(
                label=label.label,
                bounding_box=[0, 0, 1, 1],  # entire image
                confidence=label.confidence,
            )
            sample[detections_field] = fol.Detections(detections=[detection])
            if not keep_classification_field:
                sample.clear_field(classification_field)

            sample.save()

    if not keep_classification_field:
        dataset.delete_sample_field(classification_field)

    if overwrite:
        dataset.rename_field(detections_field, classification_field)
Exemple #14
0
def _write_image_dataset(dataset_exporter, samples, sample_parser,
                         num_samples):
    labeled_images = isinstance(dataset_exporter, LabeledImageDatasetExporter)

    with fou.ProgressBar(total=num_samples) as pb:
        with dataset_exporter:
            if isinstance(samples, foc.SampleCollection):
                dataset_exporter.log_collection(samples)

            for sample in pb(samples):
                sample_parser.with_sample(sample)

                # Parse image
                if sample_parser.has_image_path:
                    try:
                        image_or_path = sample_parser.get_image_path()
                    except:
                        image_or_path = sample_parser.get_image()
                else:
                    image_or_path = sample_parser.get_image()

                # Parse metadata
                if dataset_exporter.requires_image_metadata:
                    if sample_parser.has_image_metadata:
                        metadata = sample_parser.get_image_metadata()
                    else:
                        metadata = None

                    if metadata is None:
                        metadata = fom.ImageMetadata.build_for(image_or_path)
                else:
                    metadata = None

                if labeled_images:
                    # Parse label
                    label = sample_parser.get_label()

                    #
                    # SPECIAL CASE
                    #
                    # Convert `Classification` labels to `Detections` format,
                    # if necessary
                    #
                    if (dataset_exporter.label_cls is fol.Detections
                            and isinstance(label, fol.Classification)):
                        msg = ("Dataset exporter expects labels in %s format, "
                               "but found %s. Converting labels to detections "
                               "whose bounding boxes span the entire image" %
                               (fol.Detections, label.__class__))
                        warnings.warn(msg)
                        label = fol.Detections(detections=[
                            fol.Detection(
                                label=label.label,
                                bounding_box=[0, 0, 1, 1],  # entire image
                                confidence=label.confidence,
                            )
                        ])

                    # Export sample
                    dataset_exporter.export_sample(image_or_path,
                                                   label,
                                                   metadata=metadata)
                else:
                    # Export sample
                    dataset_exporter.export_sample(image_or_path,
                                                   metadata=metadata)
Exemple #15
0
def evaluate_detections(
    samples, pred_field, gt_field="ground_truth", save_iou=0.75
):
    """Evaluates the predicted detections in the given samples with respect to
    the specified ground truth detections for each of the following
    Intersection over Union (IoU) thresholds::

        [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95]

    It should be noted that if a :class:`fiftyone.core.labels.Detection` in the
    ground truth field has a boolean attribute called `iscrowd`, then this
    detection will be matched to multiple predictions and result in them all
    being true positives, as per the evaluation strategy used by the COCO
    authors.

    Dictionaries are added to each predicted/ground truth
    :class:`fiftyone.core.labels.Detections` instance in the fields listed
    below; these fields tabulate the true positive (TP), false positive (FP),
    and false negative (FN) counts for the sample at each IoU::

        Ground truth:   detections.<pred_field>_eval
        Predictions:    detections.<gt_field>_eval

    Dictionaries are also added to each individual
    :class:`fiftyone.core.labels.Detection` instance in the fields listed
    below; these fields tabulate the IDs of the matching ground
    truth/prediction for the detection at each IoU::

        Ground truth:   detection.<pred_field>_eval
        Predictions:    detection.<gt_field>_eval

    In addition, true positive (TP), false positive (FP), and false negative
    (FN) counts at the specified ``save_iou`` are saved in the following
    top-level fields of each sample::

        TP: sample.tp_iou_<save_iou>
        FP: sample.fp_iou_<save_iou>
        FN: sample.fn_iou_<save_iou>

    where ``<save_iou> = str(save_iou).replace(".", "_")``.

    Args:
        samples: an iterable of :class:`fiftyone.core.sample.Sample` instances.
            For example, this may be a :class:`fiftyone.core.dataset.Dataset`
            or a :class:`fiftyone.core.view.DatasetView`
        pred_field: the name of the field containing the predicted
            :class:`fiftyone.core.labels.Detections` to evaluate
        gt_field ("ground_truth"): the name of the field containing the ground
            truth :class:`fiftyone.core.labels.Detections`
        save_iou (0.75): an IoU value for which to save per-sample TP/FP/FN
            counts as top-level sample fields
    """
    gt_key = "%s_eval" % pred_field
    pred_key = "%s_eval" % gt_field
    eval_id = 0

    try:
        save_iou_ind = IOU_THRESHOLDS.index(save_iou)
        save_iou_str = _IOU_THRESHOLD_STRS[save_iou_ind]
    except ValueError:
        logger.info(
            "IoU %f is not in the list of available IoU thresholds: %s",
            save_iou,
            IOU_THRESHOLDS,
        )
        save_iou_str = None

    logger.info("Evaluating detections...")
    with fou.ProgressBar() as pb:
        for sample in pb(samples):
            preds = sample[pred_field]
            gts = sample[gt_field]

            # Sort preds and gt detections by category label
            sample_cats = {}
            for det in preds.detections:
                det[pred_key] = {}
                det[pred_key]["ious"] = {}
                det[pred_key]["matches"] = {
                    iou_str: {"gt_id": -1, "iou": -1}
                    for iou_str in _IOU_THRESHOLD_STRS
                }
                det[pred_key]["pred_id"] = eval_id
                eval_id += 1
                if det.label not in sample_cats:
                    sample_cats[det.label] = {}
                    sample_cats[det.label]["preds"] = []
                    sample_cats[det.label]["gts"] = []
                sample_cats[det.label]["preds"].append(det)

            for det in gts.detections:
                det[gt_key] = {}
                det[gt_key]["matches"] = {
                    iou_str: {"pred_id": -1, "iou": -1}
                    for iou_str in _IOU_THRESHOLD_STRS
                }

                det[gt_key]["gt_id"] = eval_id
                eval_id += 1
                if det.label not in sample_cats:
                    sample_cats[det.label] = {}
                    sample_cats[det.label]["preds"] = []
                    sample_cats[det.label]["gts"] = []
                sample_cats[det.label]["gts"].append(det)

            # Compute IoU for every detection and gt
            for cat, dets in sample_cats.items():
                gts = dets["gts"]
                preds = dets["preds"]

                inds = np.argsort(
                    [-(p.confidence or 0.0) for p in preds], kind="mergesort"
                )
                preds = [preds[i] for i in inds]
                sample_cats[cat]["preds"] = preds

                gt_ids = [g[gt_key]["gt_id"] for g in gts]

                gt_boxes = [list(g.bounding_box) for g in gts]
                pred_boxes = [list(p.bounding_box) for p in preds]

                iscrowd = [False] * len(gt_boxes)
                for gind, g in enumerate(gts):
                    if "iscrowd" in g.attributes:
                        iscrowd[gind] = bool(g.attributes["iscrowd"].value)

                # Get the IoU of every prediction with every ground truth
                # shape = [num_preds, num_gts]
                ious = _compute_iou(pred_boxes, gt_boxes, iscrowd)

                for pind, gt_ious in enumerate(ious):
                    preds[pind][pred_key]["ious"][cat] = list(
                        zip(gt_ids, gt_ious)
                    )

            #
            # Starting with highest confidence prediction, match all with gts
            # Store true and false positives
            #
            # Reference implementation:
            # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L273
            #
            result_dict = {
                "true_positives": {},
                "false_positives": {},
                "false_negatives": {},
            }

            for iou_ind, iou_thresh in enumerate(IOU_THRESHOLDS):
                iou_str = _IOU_THRESHOLD_STRS[iou_ind]
                true_positives = 0
                false_positives = 0
                for cat, dets in sample_cats.items():
                    gt_by_id = {g[gt_key]["gt_id"]: g for g in dets["gts"]}

                    # Note: predictions were sorted by confidence in the
                    # previous step
                    preds = dets["preds"]

                    # Match each prediction to the highest IoU ground truth
                    # available
                    for pred in preds:
                        if cat in pred[pred_key]["ious"]:
                            best_match = -1
                            best_match_iou = min([iou_thresh, 1 - 1e-10])
                            for gt_id, iou in pred[pred_key]["ious"][cat]:
                                gt = gt_by_id[gt_id]
                                curr_gt_match = gt[gt_key]["matches"][iou_str][
                                    "pred_id"
                                ]

                                if "iscrowd" in gt.attributes:
                                    iscrowd = bool(
                                        gt.attributes["iscrowd"].value
                                    )
                                else:
                                    iscrowd = False

                                # Cannot match two preds to the same gt unless
                                # the gt is a crowd
                                if curr_gt_match > -1 and not iscrowd:
                                    continue

                                # Ignore gts with an IoU lower than what was
                                # already found
                                if iou < best_match_iou:
                                    continue

                                best_match_iou = iou
                                best_match = gt_id

                            if best_match > -1:
                                # If the prediction was matched, store the eval
                                # id of the pred in the gt and of the gt in the
                                # pred
                                gt_to_store = gt_by_id[best_match][gt_key]
                                gt_to_store["matches"][iou_str] = {
                                    "pred_id": pred[pred_key]["pred_id"],
                                    "iou": best_match_iou,
                                }
                                pred[pred_key]["matches"][iou_str] = {
                                    "gt_id": best_match,
                                    "iou": best_match_iou,
                                }
                                true_positives += 1
                            else:
                                false_positives += 1

                        elif pred.label == cat:
                            false_positives += 1

                result_dict["true_positives"][iou_str] = true_positives
                result_dict["false_positives"][iou_str] = false_positives
                false_negatives = len(
                    [
                        g
                        for g in dets["gts"]
                        if g[gt_key]["matches"][iou_str]["pred_id"] == -1
                    ]
                )

                result_dict["false_negatives"][iou_str] = false_negatives

                if iou_str == save_iou_str:
                    sample["tp_iou_%s" % save_iou_str] = true_positives
                    sample["fp_iou_%s" % save_iou_str] = false_positives
                    sample["fn_iou_%s" % save_iou_str] = false_negatives

            sample[pred_field][pred_key] = result_dict

            # @todo compute sample-wise AP

            sample.save()
Exemple #16
0
def write_dataset(samples,
                  sample_parser,
                  dataset_dir=None,
                  dataset_type=None,
                  dataset_exporter=None,
                  num_samples=None,
                  **kwargs):
    """Writes the samples to disk as a dataset in the specified format.

    Provide either ``dataset_dir`` and ``dataset_type`` or ``dataset_exporter``
    to perform the write.

    Args:
        samples: an iterable of samples
        sample_parser: a :class:`fiftyone.utils.data.parsers.SampleParser` to
            use to parse the samples
        dataset_dir (None): the directory to which to write the dataset in
            format ``dataset_type``
        dataset_type (None): the :class:`fiftyone.types.dataset_types.Dataset`
            type to write
        dataset_exporter (None): a
            :class:`fiftyone.utils.data.exporters.DatasetExporter` to use to
            write the dataset
        num_samples (None): the number of samples in ``samples``. If omitted,
            this is computed (if possible) via ``len(samples)``
        **kwargs: optional keyword arguments to pass to
            ``dataset_type.get_dataset_exporter_cls(dataset_dir, **kwargs)``
    """
    if dataset_type is not None:
        if inspect.isclass(dataset_type):
            dataset_type = dataset_type()

        if not isinstance(
                dataset_type,
            (fot.UnlabeledImageDataset, fot.LabeledImageDataset),
        ):
            raise ValueError("Unsupported `dataset_type` %s" %
                             type(dataset_type))

    if dataset_exporter is None:
        dataset_exporter_cls = dataset_type.get_dataset_exporter_cls()
        dataset_exporter = dataset_exporter_cls(dataset_dir, **kwargs)

    if num_samples is None:
        try:
            num_samples = len(samples)
        except:
            pass

    raw_samples = False
    labeled_images = False
    if isinstance(dataset_exporter, GenericSampleDatasetExporter):
        raw_samples = True
    elif isinstance(dataset_exporter, UnlabeledImageDatasetExporter):
        labeled_images = False
    elif isinstance(dataset_exporter, LabeledImageDatasetExporter):
        labeled_images = True
    else:
        raise ValueError("Unsupported DatasetExporter %s" %
                         type(dataset_exporter))

    with fou.ProgressBar(total=num_samples) as pb:
        with dataset_exporter:
            for sample in pb(samples):
                # GenericSampleDatasetExporter
                if raw_samples:
                    dataset_exporter.export_sample(sample)
                    continue

                # UnlabeledImageDatasetExporter and LabeledImageDatasetExporter
                sample_parser.with_sample(sample)

                if sample_parser.has_image_path:
                    try:
                        image_or_path = sample_parser.get_image_path()
                    except:
                        image_or_path = sample_parser.get_image()
                else:
                    image_or_path = sample_parser.get_image()

                if dataset_exporter.requires_image_metadata:
                    if sample_parser.has_image_metadata:
                        metadata = sample_parser.get_image_metadata()
                    else:
                        metadata = None

                    if metadata is None:
                        metadata = fom.ImageMetadata.build_for(image_or_path)
                else:
                    metadata = None

                if labeled_images:
                    label = sample_parser.get_label()

                    dataset_exporter.export_sample(image_or_path,
                                                   label,
                                                   metadata=metadata)
                else:
                    dataset_exporter.export_sample(image_or_path,
                                                   metadata=metadata)
Exemple #17
0
def evaluate_detections(
    samples,
    pred_field,
    gt_field="ground_truth",
    iou=0.75,
):
    """Evaluates the predicted detections in the given samples with respect to
    the specified ground truth detections using the specified Intersection over
    Union (IoU) threshold to determine matches.

    This method uses COCO-style evaluation. In particular, this means that if a
    :class:`fiftyone.core.labels.Detection` in the ground truth field has a
    boolean attribute called ``iscrowd``, then this detection can have multiple
    true positive predictions matched to it.

    Dictionaries are added to each predicted/ground truth
    :class:`fiftyone.core.labels.Detections` instance in the fields listed
    below; these fields tabulate the true positive (TP), false positive (FP),
    and false negative (FN) counts for the sample at the specified IoU::

        Ground truth:   detections.<pred_field>_eval
        Predictions:    detections.<gt_field>_eval

    Dictionaries are also added to each individual
    :class:`fiftyone.core.labels.Detection` instance in the fields listed
    below; these fields tabulate the IDs of the matching ground
    truth/prediction for the detection at the specified IoU::

        Ground truth:   detection.<pred_field>_eval
        Predictions:    detection.<gt_field>_eval

    In addition, true positive (TP), false positive (FP), and false negative
    (FN) counts at the specified IoU are saved in the following top-level
    fields of each sample::

        TP: sample.tp_iou_<iou>
        FP: sample.fp_iou_<iou>
        FN: sample.fn_iou_<iou>

    where ``<iou> = str(iou).replace(".", "_")``.

    Args:
        samples: an iterable of :class:`fiftyone.core.sample.Sample` instances.
            For example, this may be a :class:`fiftyone.core.dataset.Dataset`
            or a :class:`fiftyone.core.view.DatasetView`
        pred_field: the name of the field containing the predicted
            :class:`fiftyone.core.labels.Detections` to evaluate
        gt_field ("ground_truth"): the name of the field containing the ground
            truth :class:`fiftyone.core.labels.Detections`
        iou (0.75): an IoU value for which to compute
            per-detection and per-image TP/FP/FN
    """
    gt_key = "%s_eval" % pred_field
    pred_key = "%s_eval" % gt_field
    eval_id = 0

    iou_str = str(iou).replace(".", "_")

    logger.info("Evaluating detections...")
    with fou.ProgressBar() as pb:
        for sample in pb(samples):
            # Get image(s) to process
            if sample.media_type == fom.VIDEO:
                images = sample.frames.values()
                has_frames = True
            else:
                images = [sample]
                has_frames = False

            # Initialize sample result dict tp, fp, fn = 0 for each IoU
            sample_result_dict = {
                "true_positives": 0,
                "false_positives": 0,
                "false_negatives": 0,
            }

            for image in images:
                preds = image[pred_field]
                gts = image[gt_field]

                # Sort preds and gt detections by category label
                image_cats = {}
                for det in preds.detections:
                    if pred_key not in det:
                        det[pred_key] = {}

                    if "matches" not in det[pred_key]:
                        det[pred_key]["matches"] = {
                            iou_str: {
                                "gt_id": -1,
                                "iou": -1
                            }
                        }

                    else:
                        matches = dict(det[pred_key]["matches"])
                        matches[iou_str] = {"gt_id": -1, "iou": -1}
                        det[pred_key]["matches"] = matches

                    if det.label not in image_cats:
                        image_cats[det.label] = {}
                        image_cats[det.label]["preds"] = []
                        image_cats[det.label]["gts"] = []
                    image_cats[det.label]["preds"].append(det)

                for det in gts.detections:
                    if gt_key not in det:
                        det[gt_key] = {}

                    if "matches" not in det[gt_key]:
                        det[gt_key]["matches"] = {
                            iou_str: {
                                "pred_id": -1,
                                "iou": -1
                            }
                        }

                    else:
                        matches = dict(det[gt_key]["matches"])
                        matches[iou_str] = {"pred_id": -1, "iou": -1}
                        det[gt_key]["matches"] = matches

                    if det.label not in image_cats:
                        image_cats[det.label] = {}
                        image_cats[det.label]["preds"] = []
                        image_cats[det.label]["gts"] = []
                    image_cats[det.label]["gts"].append(det)

                # Compute IoU for every detection and gt
                pred_ious = {}

                for cat, dets in image_cats.items():
                    gts = dets["gts"]
                    preds = dets["preds"]

                    inds = np.argsort(
                        [-(p.confidence or 0.0) for p in preds],
                        kind="mergesort",
                    )
                    preds = [preds[i] for i in inds]
                    image_cats[cat]["preds"] = preds

                    gt_ids = [g.id for g in gts]

                    gt_boxes = [list(g.bounding_box) for g in gts]
                    pred_boxes = [list(p.bounding_box) for p in preds]

                    iscrowd = [False] * len(gt_boxes)
                    for gind, g in enumerate(gts):
                        if "iscrowd" in g.attributes:
                            iscrowd[gind] = bool(g.attributes["iscrowd"].value)

                    # Get the IoU of every prediction with every ground truth
                    # shape = [num_preds, num_gts]
                    ious = _compute_iou(pred_boxes, gt_boxes, iscrowd)

                    for pind, gt_ious in enumerate(ious):
                        pred_ious[preds[pind].id] = list(zip(gt_ids, gt_ious))

                #
                # Starting with highest confidence prediction, match all with
                # GTs. Store true and false positives
                #
                # Reference implementation:
                # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L273
                #
                true_pos = 0
                false_pos = 0
                for cat, dets in image_cats.items():
                    gt_by_id = {g.id: g for g in dets["gts"]}

                    # Note: predictions were sorted by confidence in the
                    # previous step
                    preds = dets["preds"]

                    # Match each prediction to the highest IoU ground truth
                    # available
                    for pred in preds:
                        if pred.id in pred_ious:
                            best_match = -1
                            best_match_iou = min([iou, 1 - 1e-10])
                            for gt_id, det_iou in pred_ious[pred.id]:
                                gt = gt_by_id[gt_id]
                                curr_gt_match = gt[gt_key]["matches"][iou_str][
                                    "pred_id"]

                                if "iscrowd" in gt.attributes:
                                    iscrowd = bool(
                                        gt.attributes["iscrowd"].value)
                                else:
                                    iscrowd = False

                                # Cannot match two preds to the same gt unless
                                # the gt is a crowd
                                if curr_gt_match != -1 and not iscrowd:
                                    continue

                                # Ignore gts with an IoU lower than what was
                                # already found
                                if det_iou < best_match_iou:
                                    continue

                                best_match_iou = det_iou
                                best_match = gt_id

                            if best_match != -1:
                                # If the prediction was matched, store the eval
                                # id of the pred in the gt and of the gt in the
                                # pred
                                gt_to_store = gt_by_id[best_match][gt_key]
                                gt_to_store["matches"][iou_str] = {
                                    "pred_id": pred.id,
                                    "iou": best_match_iou,
                                }
                                pred[pred_key]["matches"][iou_str] = {
                                    "gt_id": best_match,
                                    "iou": best_match_iou,
                                }
                                true_pos += 1
                            else:
                                false_pos += 1

                        elif pred.label == cat:
                            false_pos += 1

                sample_result_dict["true_positives"] += true_pos
                sample_result_dict["false_positives"] += false_pos
                false_neg = len([
                    g for g in dets["gts"]
                    if g[gt_key]["matches"][iou_str]["pred_id"] == -1
                ])

                sample_result_dict["false_negatives"] += false_neg

                image["tp_iou_%s" % iou_str] = true_pos
                image["fp_iou_%s" % iou_str] = false_pos
                image["fn_iou_%s" % iou_str] = false_neg

            if has_frames:
                sample["tp_iou_%s" %
                       iou_str] = sample_result_dict["true_positives"]
                sample["fp_iou_%s" %
                       iou_str] = sample_result_dict["false_positives"]
                sample["fn_iou_%s" %
                       iou_str] = sample_result_dict["false_negatives"]

            sample.save()
Exemple #18
0
def evaluate_dataset(
    dataset,
    label_map_path,
    groundtruth_loc_field_name="groundtruth_detections",
    groundtruth_img_labels_field_name="groundtruth_image_labels",
    prediction_field_name="predicted_detections",
    iou_threshold=0.5,
):
    """Evaluates a FiftyOne dataset that contains all necessary fields for
    evaluation via Tensorflow Object Detection API on a per-image granularity.

    Args:
        dataset: the :class:`fiftyone.core.dataset.Dataset` to evaluate
        label_map_path: path to the label map .pbtxt file
        groundtruth_loc_field_name: the name of the groundtruth
            :class:`fiftyone.core.labels.Detections` field
        groundtruth_img_labels_field_name: the name of the groundtruth
            :class:`fiftyone.core.labels.Classifications` field
        prediction_field_name: the name of the predicted
            :class:`fiftyone.core.labels.Detections` field
        iou_threshold: the intersection-over-union bounding box matching
            threshold
    """
    _, categories = TensorflowObjectDetectionAPIEvaluator.load_labelmap(
        label_map_path)
    display2name_map = {d["display_name"]: d["name"] for d in categories}
    name2display_map = {d["name"]: d["display_name"] for d in categories}

    evaluator = TensorflowObjectDetectionAPIEvaluator(
        label_map_path, iou_threshold=iou_threshold)

    with fou.ProgressBar(dataset) as pb:
        for sample in pb(dataset):
            # convert groundtruth to dataframe
            loc_annos = detections2df(
                sample.open_images_id,
                sample[groundtruth_loc_field_name],
                display2name_map=display2name_map,
                is_groundtruth=True,
            )
            label_annos = classifications2df(
                sample.open_images_id,
                sample[groundtruth_img_labels_field_name],
                display2name_map=display2name_map,
            )
            groundtruth = pd.concat([loc_annos, label_annos])

            # convert predictions to dataframe
            predictions = detections2df(
                sample.open_images_id,
                sample[prediction_field_name],
                display2name_map=display2name_map,
            )

            # evaluate
            result = evaluator.evaluate_image(sample.open_images_id,
                                              groundtruth, predictions)

            # store mAP
            mAP = result["mAP"]
            if not np.isnan(mAP):
                sample["mAP"] = mAP

            # store AP per class
            sample["AP_per_class"] = {
                name2display_map[k]: v
                for k, v in result["AP_per_class"].items() if not np.isnan(v)
            }

            # store false positives
            for idx in result["false_positive_indexes"]:
                det = sample[prediction_field_name].detections[idx]
                det["eval"] = "false_positive"

            # store true positives
            for idx in result["true_positive_indexes"]:
                det = sample[prediction_field_name].detections[idx]
                det["eval"] = "true_positive"

            sample.save()
def load_open_images_dataset(
    dataset_name,
    images_dir,
    bounding_boxes_path=None,
    image_labels_path=None,
    predictions_path=None,
    prediction_field_name="predicted_detections",
    class_descriptions_path=None,
    load_images_with_preds=False,
    max_num_images=-1,
):
    """Loads an Open Images format dataset into FiftyOne.

    **Note** If this takes a long time it is highly recommended to save the
    dataset via:

        dataset.persistent = True

    such that this function only needs to be called once!

    Args:
        dataset_name: the name of the dataset to create in FiftyOne.
        images_dir: directory where images are stored. Images should be in
            <open-images-id>.jpg format
        bounding_boxes_path: path to the expanded-hierarchy annotation bounding
            boxes CSV
        image_labels_path: path to the expanded-hierarchy annotation image
            labels CSV
        predictions_path: path to the predicted bounding boxes CSV
        prediction_field_name: the name of the field to save the predictions
            under. Useful if other predictions may be added later
        class_descriptions_path: optional metadata file. if provided, the
            MID labels are mapped to descriptive labels
        load_images_with_preds: if True, skip any images that do not have
            predictions

    Returns:
        a :class:`fiftyone.core.dataset.Dataset` instance
    """
    # pylint: disable=unsubscriptable-object
    # read data from disk
    all_location_annotations = (pd.read_csv(bounding_boxes_path)
                                if bounding_boxes_path else None)
    all_label_annotations = (pd.read_csv(image_labels_path)
                             if image_labels_path else None)
    if predictions_path:
        all_predictions = pd.read_csv(predictions_path)
        all_predictions.rename(columns={"Score": "Confidence"}, inplace=True)
    else:
        all_predictions = None
    class_descriptions = (pd.read_csv(
        class_descriptions_path, header=None, index_col=0)
                          if class_descriptions_path else None)

    # map label MID to descriptive label
    if class_descriptions is not None:
        for df in [
                all_location_annotations,
                all_label_annotations,
                all_predictions,
        ]:
            if df is None:
                continue

            temp = class_descriptions.loc[df["LabelName"], 1]
            temp.index = df.index
            df["LabelName"] = temp

    if load_images_with_preds:
        img_paths = [
            os.path.join(images_dir, image_id + ".jpg")
            for image_id in set(all_predictions["ImageID"])
        ]
    else:
        img_paths = glob.glob(os.path.join(images_dir, "*.jpg"))

    if max_num_images != -1:
        img_paths = img_paths[:max_num_images]

    print("Parsing CSV labels...")
    _samples = []
    with fou.ProgressBar(img_paths) as pb:
        for image_path in pb(img_paths):
            image_id = os.path.splitext(os.path.basename(image_path))[0]

            kwargs = {"filepath": image_path, OPEN_IMAGES_ID: image_id}

            # parse ground truth image labels
            if all_label_annotations is not None:
                cur_lab_anns = all_label_annotations.query("ImageID == '%s'" %
                                                           image_id)
                if not cur_lab_anns.empty:
                    kwargs[GT_IMAGE_LABELS] = df2classifications(cur_lab_anns)

            # parse ground truth bounding boxes
            if all_location_annotations is not None:
                cur_loc_anns = all_location_annotations.query(
                    "ImageID == '%s'" % image_id)
                if not cur_loc_anns.empty:
                    kwargs[GT_DETECTIONS] = df2detections(cur_loc_anns)

            # parse prediction bounding boxes
            if all_predictions is not None:
                cur_preds = all_predictions.query("ImageID == '%s'" % image_id)
                if not cur_preds.empty:
                    kwargs[prediction_field_name] = df2detections(cur_preds)

            _samples.append(fos.Sample(**kwargs))

    print("Creating FiftyOne Dataset...")
    dataset = fod.Dataset(dataset_name)
    dataset.add_samples(_samples)

    return dataset
Exemple #20
0
def expand_image_labels_field(
    dataset,
    label_field,
    prefix=None,
    labels_dict=None,
    multilabel=False,
    skip_non_categorical=False,
    keep_label_field=False,
):
    """Expands the :class:`fiftyone.core.labels.ImageLabels` field of the
    dataset into per-label fields.

    Provide ``labels_dict`` if you want to customize which components of the
    labels are expanded. Otherwise, all objects/attributes are expanded as
    explained below.

    If ``multilabel`` is False, frame attributes will be stored in separate
    :class:`fiftyone.core.labels.Classification` fields with names
    ``prefix + attr.name``.

    If ``multilabel`` if True, all frame attributes will be stored in a
    :class:`fiftyone.core.labels.Classifications` field called
    ``prefix + "attrs"``.

    Objects are stored in :class:`fiftyone.core.labels.Detections` fields whose
    names are ``prefix + obj.name``, or ``prefix + "objs"`` for objects that
    do not have their ``name`` field populated.

    Args:
        dataset: a :class:`fiftyone.core.dataset.Dataset`
        label_field: the name of the :class:`fiftyone.core.labels.ImageLabels`
            field to expand
        prefix (None): a string prefix to prepend to each expanded field name
        labels_dict (None): a dictionary mapping names of attributes/objects
            in ``label_field`` to field names into which to expand them
        multilabel (False): whether to store frame attributes in a single
            :class:`fiftyone.core.labels.Classifications` field
        skip_non_categorical (False): whether to skip non-categorical frame
            attributes (True) or cast them to strings (False)
        keep_label_field (False): whether to keep ``label_field`` after the
            expansion is completed. By default, the field is deleted from the
            dataset
    """
    logger.info("Expanding image labels field '%s'", label_field)
    with fou.ProgressBar() as pb:
        for sample in pb(dataset):
            labels = sample[label_field]
            if labels is None:
                continue

            sample.update_fields(
                labels.expand(
                    prefix=prefix,
                    labels_dict=labels_dict,
                    multilabel=multilabel,
                    skip_non_categorical=skip_non_categorical,
                ))
            if not keep_label_field:
                sample.clear_field(label_field)

            sample.save()

    if not keep_label_field:
        dataset.delete_sample_field(label_field)