class DatasetSampleDocument(DatasetMixin, Document, SampleDocument): """Base class for sample documents backing samples in datasets. All ``fiftyone.core.dataset.Dataset._sample_doc_cls`` classes inherit from this class. """ meta = {"abstract": True} media_type = fof.StringField() # The path to the data on disk filepath = fof.StringField(unique=True) # The set of tags associated with the sample tags = fof.ListField(fof.StringField()) # Metadata about the sample media metadata = fof.EmbeddedDocumentField(fom.Metadata, null=True) # Random float used for random dataset operations (e.g. shuffle) _rand = fof.FloatField(default=_generate_rand) def set_field(self, field_name, value, create=True): if field_name == "frames" and isinstance(value, fofr.Frames): value = value.doc.frames super().set_field(field_name, value, create=create)
class Metadata(DynamicEmbeddedDocument): """Base class for storing metadata about generic samples. Args: size_bytes (None): the size of the media, in bytes mime_type (None): the MIME type of the media """ meta = {"allow_inheritance": True} size_bytes = fof.IntField() mime_type = fof.StringField() @classmethod def build_for(cls, filepath): """Builds a :class:`Metadata` object for the given filepath. Args: filepath: the path to the data on disk Returns: a :class:`Metadata` """ return cls( size_bytes=os.path.getsize(filepath), mime_type=etau.guess_mime_type(filepath), )
class CategoricalAttribute(Attribute): """A categorical attribute. Args: value (None): the attribute value confidence (None): a confidence in ``[0, 1]`` for the value logits (None): logits associated with the attribute """ value = fof.StringField() confidence = fof.FloatField() logits = fof.VectorField()
class Classification(ImageLabel, _HasID): """A classification label. Args: label (None): the label string confidence (None): a confidence in ``[0, 1]`` for the label logits (None): logits associated with the labels """ meta = {"allow_inheritance": True} label = fof.StringField() confidence = fof.FloatField() logits = fof.VectorField() def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_attribute( etad.CategoricalAttribute(name, self.label, confidence=self.confidence)) return image_labels @classmethod def from_attribute(cls, attr): """Creates a :class:`Classification` instance from an attribute. The attribute value is cast to a string, if necessary. Args: attr: an :class:`Attribute` or ``eta.core.data.Attribute`` Returns: a :class:`Classification` """ classification = cls(label=str(attr.value)) try: classification.confidence = attr.confidence except: pass return classification
class Classification(ImageLabel): """A classification label. Args: label (None): the label string confidence (None): a confidence in ``[0, 1]`` for the label logits (None): logits associated with the labels """ meta = {"allow_inheritance": True} _id = fof.ObjectIdField(required=True, default=ObjectId, unique=True, primary_key=True) label = fof.StringField() confidence = fof.FloatField() logits = fof.VectorField() @property def id(self): """The ID of the document.""" return str(self._id) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_attribute( etad.CategoricalAttribute(name, self.label, confidence=self.confidence)) return image_labels def _get_repr_fields(self): # pylint: disable=no-member return ("id", ) + self._fields_ordered
class VideoMetadata(Metadata): """Class for storing metadata about video samples. Args: size_bytes (None): the size of the video on disk, in bytes mime_type (None): the MIME type of the video frame_width (None): the width of the video frames, in pixels frame_height (None): the height of the video frames, in pixels frame_rate (None): the frame rate of the video total_frame_count (None): the total number of frames in the video duration (None): the duration of the video, in seconds encoding_str (None): the encoding string for the video """ frame_width = fof.IntField() frame_height = fof.IntField() frame_rate = fof.FloatField() total_frame_count = fof.IntField() duration = fof.FloatField() encoding_str = fof.StringField() @classmethod def build_for(cls, video_path): """Builds an :class:`VideoMetadata` object for the given video. Args: video_path: the path to a video on disk Returns: a :class:`VideoMetadata` """ m = etav.VideoMetadata.build_for(video_path) return cls( size_bytes=m.size_bytes, mime_type=m.mime_type, frame_width=m.frame_size[0], frame_height=m.frame_size[1], frame_rate=m.frame_rate, total_frame_count=m.total_frame_count, duration=m.duration, encoding_str=m.encoding_str, )
class DatasetSampleDocument(Document, SampleDocument): """Base class for sample documents backing samples in datasets. All ``fiftyone.core.dataset.Dataset._sample_doc_cls`` classes inherit from this class. """ meta = {"abstract": True} # The path to the data on disk filepath = fof.StringField(unique=True) # The set of tags associated with the sample tags = fof.ListField(fof.StringField()) # Metadata about the sample media metadata = fof.EmbeddedDocumentField(fom.Metadata, null=True) # Random float used for random dataset operations (e.g. shuffle) _rand = fof.FloatField(default=_generate_rand) def __setattr__(self, name, value): # pylint: disable=no-member has_field = self.has_field(name) if name.startswith("_") or (hasattr(self, name) and not has_field): super().__setattr__(name, value) return if not has_field: raise ValueError( "Adding sample fields using the `sample.field = value` syntax " "is not allowed; use `sample['field'] = value` instead" ) if value is not None: self._fields[name].validate(value) super().__setattr__(name, value) @property def dataset_name(self): """The name of the dataset to which this sample belongs, or ``None`` if it has not been added to a dataset. """ return self.__class__.__name__ @property def field_names(self): return tuple( f for f in self._get_fields_ordered(include_private=False) if f != "id" ) @classmethod def get_field_schema( cls, ftype=None, embedded_doc_type=None, include_private=False ): """Returns a schema dictionary describing the fields of this sample. If the sample belongs to a dataset, the schema will apply to all samples in the dataset. Args: ftype (None): an optional field type to which to restrict the returned schema. Must be a subclass of :class:`fiftyone.core.fields.Field` embedded_doc_type (None): an optional embedded document type to which to restrict the returned schema. Must be a subclass of :class:`fiftyone.core.odm.BaseEmbeddedDocument` include_private (False): a boolean indicating whether to return fields that start with the character "_" Returns: a dictionary mapping field names to field types """ if ftype is None: ftype = fof.Field if not issubclass(ftype, fof.Field): raise ValueError( "Field type %s must be subclass of %s" % (ftype, fof.Field) ) if embedded_doc_type and not issubclass( ftype, fof.EmbeddedDocumentField ): raise ValueError( "embedded_doc_type should only be specified if ftype is a" " subclass of %s" % fof.EmbeddedDocumentField ) d = OrderedDict() field_names = cls._get_fields_ordered(include_private=include_private) for field_name in field_names: # pylint: disable=no-member field = cls._fields[field_name] if not isinstance(cls._fields[field_name], ftype): continue if embedded_doc_type and not issubclass( field.document_type, embedded_doc_type ): continue d[field_name] = field return d def has_field(self, field_name): # pylint: disable=no-member return field_name in self._fields def get_field(self, field_name): if not self.has_field(field_name): raise AttributeError("Sample has no field '%s'" % field_name) return getattr(self, field_name) @classmethod def add_field( cls, field_name, ftype, embedded_doc_type=None, subfield=None, save=True, ): """Adds a new field to the sample. Args: field_name: the field name ftype: the field type to create. Must be a subclass of :class:`fiftyone.core.fields.Field` embedded_doc_type (None): the :class:`fiftyone.core.odm.BaseEmbeddedDocument` type of the field. Used only when ``ftype`` is :class:`fiftyone.core.fields.EmbeddedDocumentField` subfield (None): the type of the contained field. Used only when ``ftype`` is a list or dict type """ # Additional arg `save` is to prevent saving the fields when reloading # a dataset from the database. # pylint: disable=no-member if field_name in cls._fields: raise ValueError("Field '%s' already exists" % field_name) field = _create_field( field_name, ftype, embedded_doc_type=embedded_doc_type, subfield=subfield, ) cls._fields[field_name] = field cls._fields_ordered += (field_name,) try: if issubclass(cls, DatasetSampleDocument): # Only set the attribute if it is a class setattr(cls, field_name, field) except TypeError: # Instance, not class, so do not `setattr` pass if save: # Update dataset meta class # @todo(Tyler) refactor to avoid local import here import fiftyone.core.dataset as fod dataset = fod.load_dataset(cls.__name__) field = cls._fields[field_name] sample_field = SampleFieldDocument.from_field(field) dataset._meta.sample_fields.append(sample_field) dataset._meta.save() @classmethod def add_implied_field(cls, field_name, value): """Adds the field to the sample, inferring the field type from the provided value. Args: field_name: the field name value: the field value """ # pylint: disable=no-member if field_name in cls._fields: raise ValueError("Field '%s' already exists" % field_name) cls.add_field(field_name, **_get_implied_field_kwargs(value)) def set_field(self, field_name, value, create=False): if field_name.startswith("_"): raise ValueError( "Invalid field name: '%s'. Field names cannot start with '_'" % field_name ) if hasattr(self, field_name) and not self.has_field(field_name): raise ValueError("Cannot use reserved keyword '%s'" % field_name) if not self.has_field(field_name): if create: self.add_implied_field(field_name, value) else: msg = "Sample does not have field '%s'." % field_name if value is not None: # don't report this when clearing a field. msg += " Use `create=True` to create a new field." raise ValueError(msg) self.__setattr__(field_name, value) def clear_field(self, field_name): self.set_field(field_name, None, create=False) @classmethod @no_delete_default_field def delete_field(cls, field_name): """Deletes the field from the sample. If the sample is in a dataset, the field will be removed from all samples in the dataset. Args: field_name: the field name Raises: AttributeError: if the field does not exist """ try: # Delete from all samples # pylint: disable=no-member cls.objects.update(**{"unset__%s" % field_name: None}) except InvalidQueryError: raise AttributeError("Sample has no field '%s'" % field_name) # Remove from dataset # pylint: disable=no-member del cls._fields[field_name] cls._fields_ordered = tuple( fn for fn in cls._fields_ordered if fn != field_name ) delattr(cls, field_name) # Update dataset meta class # @todo(Tyler) refactor to avoid local import here import fiftyone.core.dataset as fod dataset = fod.load_dataset(cls.__name__) dataset._meta.sample_fields = [ sf for sf in dataset._meta.sample_fields if sf.name != field_name ] dataset._meta.save() def _get_repr_fields(self): return ("dataset_name",) + super()._get_repr_fields() def _update(self, object_id, update_doc, filtered_fields=None, **kwargs): """Updates an existing document. Helper method; should only be used inside :meth:`DatasetSampleDocument.save`. """ updated_existing = True collection = self._get_collection() select_dict = {"_id": object_id} extra_updates = self._extract_extra_updates( update_doc, filtered_fields ) if update_doc: result = collection.update_one( select_dict, update_doc, upsert=True ).raw_result if result is not None: updated_existing = result.get("updatedExisting") for update, element_id in extra_updates: result = collection.update_one( select_dict, update, array_filters=[{"element._id": element_id}], upsert=True, ).raw_result if result is not None: updated_existing = updated_existing and result.get( "updatedExisting" ) return updated_existing def _extract_extra_updates(self, update_doc, filtered_fields): """Extracts updates for filtered list fields that need to be updated by ID, not relative position (index). """ extra_updates = [] # # Check for illegal modifications # Match the list, or an indexed item in the list, but not a field # of an indexed item of the list: # my_detections.detections <- MATCH # my_detections.detections.1 <- MATCH # my_detections.detections.1.label <- NO MATCH # if filtered_fields: for d in update_doc.values(): for k in d.keys(): for ff in filtered_fields: if k.startswith(ff) and not k.lstrip(ff).count("."): raise ValueError( "Modifying root of filtered list field '%s' " "is not allowed" % k ) if filtered_fields and "$set" in update_doc: d = update_doc["$set"] del_keys = [] for k, v in d.items(): filtered_field = None for ff in filtered_fields: if k.startswith(ff): filtered_field = ff break if filtered_field: element_id, el_filter = self._parse_id_and_array_filter( k, filtered_field ) extra_updates.append( ({"$set": {el_filter: v}}, element_id) ) del_keys.append(k) for k in del_keys: del d[k] if not update_doc["$set"]: del update_doc["$set"] return extra_updates def _parse_id_and_array_filter(self, list_element_field, filtered_field): """Converts the ``list_element_field`` and ``filtered_field`` to an element object ID and array filter. Example:: Input: list_element_field = "test_dets.detections.1.label" filtered_field = "test_dets.detections" Output: ObjectID("5f2062bf27c024654f5286a0") "test_dets.detections.$[element].label" """ el = self for field_name in filtered_field.split("."): el = el[field_name] el_fields = list_element_field.lstrip(filtered_field).split(".") idx = int(el_fields.pop(0)) el = el[idx] el_filter = ".".join([filtered_field, "$[element]"] + el_fields) return el._id, el_filter @classmethod def _get_fields_ordered(cls, include_private=False): if include_private: return cls._fields_ordered return tuple(f for f in cls._fields_ordered if not f.startswith("_"))
class Detection(ImageLabel): """An object detection. Args: label (None): the label string bounding_box (None): a list of relative bounding box coordinates in ``[0, 1]`` in the following format:: [<top-left-x>, <top-left-y>, <width>, <height>] confidence (None): a confidence in ``[0, 1]`` for the label attributes ({}): a dict mapping attribute names to :class:`Attribute` instances """ meta = {"allow_inheritance": True} _id = fof.ObjectIdField( required=True, default=ObjectId, unique=True, primary_key=True ) label = fof.StringField() bounding_box = fof.ListField() confidence = fof.FloatField() attributes = fof.DictField(fof.EmbeddedDocumentField(Attribute)) @property def id(self): """The ID of the document.""" return str(self._id) def has_attribute(self, name): """Determines whether the detection has an attribute with the given name. Args: name: the attribute name Returns: True/False """ # pylint: disable=unsupported-membership-test return name in self.attributes def get_attribute_value(self, name, default=no_default): """Gets the value of the attribute with the given name. Args: name: the attribute name default (no_default): the default value to return if the attribute does not exist. Can be ``None``. If no default value is provided, an exception is raised if the attribute does not exist Returns: the attribute value Raises: KeyError: if the attribute does not exist and no default value was provided """ try: # pylint: disable=unsubscriptable-object return self.attributes[name].value except KeyError: if default is not no_default: return default raise def to_detected_object(self, name=None): """Returns an ``eta.core.objects.DetectedObject`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.objects.DetectedObject`` """ label = self.label # pylint: disable=unpacking-non-sequence tlx, tly, w, h = self.bounding_box brx = tlx + w bry = tly + h bounding_box = etag.BoundingBox.from_coords(tlx, tly, brx, bry) confidence = self.confidence # pylint: disable=no-member attrs = etad.AttributeContainer() for attr_name, attr in self.attributes.items(): attr_value = attr.value if isinstance(attr_value, bool): _attr = etad.BooleanAttribute(attr_name, attr_value) elif etau.is_numeric(attr_value): _attr = etad.NumericAttribute(attr_name, attr_value) else: _attr = etad.CategoricalAttribute(attr_name, str(attr_value)) attrs.add(_attr) return etao.DetectedObject( label=label, bounding_box=bounding_box, confidence=confidence, name=name, attrs=attrs, ) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_object(self.to_detected_object(name=name)) return image_labels @classmethod def from_detected_object(cls, dobj): """Creates a :class:`Detection` instance from an ``eta.core.objects.DetectedObject``. Args: dobj: a ``eta.core.objects.DetectedObject`` Returns: a :class:`Detection` """ # Bounding box xtl, ytl, xbr, ybr = dobj.bounding_box.to_coords() bounding_box = [xtl, ytl, (xbr - xtl), (ybr - ytl)] # Atrributes attributes = {} for attr in dobj.attrs: if isinstance(attr, etad.NumericAttribute): _attr = NumericAttribute(value=attr.value) elif isinstance(attr, etad.BooleanAttribute): _attr = BooleanAttribute(value=attr.value) else: _attr = CategoricalAttribute(value=str(attr.value)) if attr.confidence is not None: _attr.confidence = attr.confidence attributes[attr.name] = _attr return Detection( label=dobj.label, confidence=dobj.confidence, bounding_box=bounding_box, attributes=attributes, ) def _get_repr_fields(self): # pylint: disable=no-member return ("id",) + self._fields_ordered
class Classification(ImageLabel): """A classification label. Args: label (None): the label string confidence (None): a confidence in ``[0, 1]`` for the label logits (None): logits associated with the labels """ meta = {"allow_inheritance": True} _id = fof.ObjectIdField( required=True, default=ObjectId, unique=True, primary_key=True ) label = fof.StringField() confidence = fof.FloatField() logits = fof.VectorField() @property def id(self): """The ID of the document.""" return str(self._id) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_attribute( etad.CategoricalAttribute( name, self.label, confidence=self.confidence ) ) return image_labels @classmethod def from_attribute(cls, attr): """Creates a :class:`Classification` instance from an attribute. The attribute value is cast to a string, if necessary. Args: attr: an :class:`Attribute` or ``eta.core.data.Attribute`` Returns: a :class:`Classification` """ classification = cls(label=str(attr.value)) try: classification.confidence = attr.confidence except: pass return classification def _get_repr_fields(self): # pylint: disable=no-member return ("id",) + self._fields_ordered
class Keypoint(ImageLabel, _HasID, _HasAttributes): """A list of keypoints in an image. Args: label (None): a label for the points points (None): a list of ``(x, y)`` keypoints in ``[0, 1] x [0, 1]`` attributes ({}): a dict mapping attribute names to :class:`Attribute` instances """ meta = {"allow_inheritance": True} label = fof.StringField() points = fof.ListField() def to_eta_keypoints(self, name=None): """Returns an ``eta.core.keypoints.Keypoints`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.keypoints.Keypoints`` """ # pylint: disable=no-member attrs = _to_eta_attributes(self.attributes) return etak.Keypoints( name=name, label=self.label, points=self.points, attrs=attrs, ) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_keypoints(self.to_eta_keypoints(name=name)) return image_labels @classmethod def from_eta_keypoints(cls, keypoints): """Creates a :class:`Keypoint` instance from an ``eta.core.keypoints.Keypoints``. Args: keypoints: an ``eta.core.keypoints.Keypoints`` Returns: a :class:`Keypoint` """ attributes = _from_eta_attributes(keypoints.attrs) return cls( label=keypoints.label, points=keypoints.points, attributes=attributes, )
class Polyline(ImageLabel, _HasID, _HasAttributes): """A polyline or polygon. Args: label (None): a label for the shape points (None): a list of ``(x, y)`` points in ``[0, 1] x [0, 1]`` describing the vertexes of a polyline closed (False): whether the polyline is closed, i.e., and edge should be drawn from the last vertex to the first vertex filled (False): whether the polyline represents a shape that can be filled when rendering it attributes ({}): a dict mapping attribute names to :class:`Attribute` instances """ meta = {"allow_inheritance": True} label = fof.StringField() points = fof.ListField() closed = fof.BooleanField(default=False) filled = fof.BooleanField(default=False) def to_eta_polyline(self, name=None): """Returns an ``eta.core.polylines.Polyline`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.polylines.Polyline`` """ # pylint: disable=no-member attrs = _to_eta_attributes(self.attributes) return etap.Polyline( label=self.label, name=name, points=self.points, closed=self.closed, filled=self.filled, attrs=attrs, ) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_polyline(self.to_eta_polyline(name=name)) return image_labels @classmethod def from_eta_polyline(cls, polyline): """Creates a :class:`Polyline` instance from an ``eta.core.polylines.Polyline``. Args: polyline: an ``eta.core.polylines.Polyline`` Returns: a :class:`Polyline` """ attributes = _from_eta_attributes(polyline.attrs) return cls( label=polyline.label, points=polyline.points, closed=polyline.closed, filled=polyline.filled, attributes=attributes, )
class Detection(ImageLabel, _HasID, _HasAttributes): """An object detection. Args: label (None): the label string bounding_box (None): a list of relative bounding box coordinates in ``[0, 1]`` in the following format:: [<top-left-x>, <top-left-y>, <width>, <height>] mask (None): an instance segmentation mask for the detection within its bounding box, which should be a 2D binary or 0/1 integer NumPy array confidence (None): a confidence in ``[0, 1]`` for the label index (None): an index for the object attributes ({}): a dict mapping attribute names to :class:`Attribute` instances """ meta = {"allow_inheritance": True} label = fof.StringField() bounding_box = fof.ListField() mask = fof.ArrayField() confidence = fof.FloatField() index = fof.IntField() def to_detected_object(self, name=None): """Returns an ``eta.core.objects.DetectedObject`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.objects.DetectedObject`` """ label = self.label index = self.index # pylint: disable=unpacking-non-sequence tlx, tly, w, h = self.bounding_box brx = tlx + w bry = tly + h bounding_box = etag.BoundingBox.from_coords(tlx, tly, brx, bry) mask = self.mask confidence = self.confidence # pylint: disable=no-member attrs = _to_eta_attributes(self.attributes) return etao.DetectedObject( label=label, index=index, bounding_box=bounding_box, mask=mask, confidence=confidence, name=name, attrs=attrs, ) def to_image_labels(self, name=None): """Returns an ``eta.core.image.ImageLabels`` representation of this instance. Args: name (None): the name of the label field Returns: an ``eta.core.image.ImageLabels`` """ image_labels = etai.ImageLabels() image_labels.add_object(self.to_detected_object(name=name)) return image_labels @classmethod def from_detected_object(cls, dobj): """Creates a :class:`Detection` instance from an ``eta.core.objects.DetectedObject``. Args: dobj: a ``eta.core.objects.DetectedObject`` Returns: a :class:`Detection` """ xtl, ytl, xbr, ybr = dobj.bounding_box.to_coords() bounding_box = [xtl, ytl, (xbr - xtl), (ybr - ytl)] attributes = _from_eta_attributes(dobj.attrs) return cls( label=dobj.label, bounding_box=bounding_box, confidence=dobj.confidence, index=dobj.index, mask=dobj.mask, attributes=attributes, )