def matches_field(self, field): """Determines whether this sample field matches the given field. Args: field: a :class:``fiftyone.core.fields.Field`` instance Returns: True/False """ if self.name != field.name: return False if self.ftype != etau.get_class_name(field): return False if self.subfield and self.subfield != etau.get_class_name(field.field): return False if ( self.embedded_doc_type and self.embedded_doc_type != etau.get_class_name(field.document_type) ): return False return True
def __str__(self): if self.field is not None: return "%s(%s)" % ( etau.get_class_name(self), etau.get_class_name(self.field), ) return etau.get_class_name(self)
def add_images(dataset, samples, sample_parser, tags=None): """Adds the given images to the dataset. This operation does not read the images. See :ref:`this guide <custom-sample-parser>` for more details about adding images to a dataset by defining your own :class:`UnlabeledImageSampleParser <fiftyone.utils.data.parsers.UnlabeledImageSampleParser>`. Args: dataset: a :class:`fiftyone.core.dataset.Dataset` samples: an iterable of samples sample_parser: a :class:`fiftyone.utils.data.parsers.UnlabeledImageSampleParser` instance to use to parse the samples tags (None): an optional list of tags to attach to each sample Returns: a list of IDs of the samples that were added to the dataset """ if not sample_parser.has_image_path: raise ValueError( "Sample parser must have `has_image_path == True` to add its " "samples to the dataset") if not isinstance(sample_parser, UnlabeledImageSampleParser): raise ValueError("`sample_parser` must be a subclass of %s; found %s" % ( etau.get_class_name(UnlabeledImageSampleParser), etau.get_class_name(sample_parser), )) def parse_sample(sample): sample_parser.with_sample(sample) image_path = sample_parser.get_image_path() if sample_parser.has_image_metadata: metadata = sample_parser.get_image_metadata() else: metadata = None return fos.Sample(filepath=image_path, metadata=metadata, tags=tags) try: num_samples = len(samples) except: num_samples = None _samples = map(parse_sample, samples) return dataset.add_samples(_samples, num_samples=num_samples, expand_schema=False)
def parse_serializable(obj, cls): """Parses the given object as an instance of the given ``eta.core.serial.Serializable`` class. Args: obj: an instance of ``cls``, or a serialized string or dictionary representation of one cls: a ``eta.core.serial.Serializable`` class Returns: an instance of ``cls`` """ if isinstance(obj, cls): return obj if etau.is_str(obj): return cls.from_str(obj) if isinstance(obj, dict): return cls.from_dict(obj) raise ValueError( "Unable to load '%s' as an instance of '%s'" % (obj, etau.get_class_name(cls)) )
def __len__(self): """The total number of samples that will be imported. Raises: TypeError: if the total number is not known """ raise TypeError( "The number of samples in a '%s' is not known a priori" % etau.get_class_name(self))
def __init__(self, config): '''Initializes a ModelManager instance. Args: config: a Config for the ModelManager subclass ''' self.validate(config) self.type = etau.get_class_name(self) self.config = config
def find_port(): for child in fosu.normalize_wrapper_process( self.child).children(recursive=True): try: for local_port in fosu.get_listening_tcp_ports(child): if port is None or port == local_port: return local_port except psutil.Error: pass raise ServiceListenTimeout(etau.get_class_name(self), port)
def get_dataset_exporter_cls(self): """Returns the :class:`fiftyone.utils.data.exporters.DatasetExporter` class for exporting datasets of this type to disk. Returns: a :class:`fiftyone.utils.data.exporters.DatasetExporter` class """ raise TypeError( "Dataset type '%s' does not provide a default DatasetExporter" % etau.get_class_name(self))
def _serialize(self): """Returns a JSON dict representation of the :class:`ViewStage`. Returns: a JSON dict """ return { "kwargs": self._kwargs(), "_cls": etau.get_class_name(self), }
def find_port(): child_connections = itertools.chain.from_iterable( # flatten child.connections(kind="tcp") for child in self.child.children(recursive=True)) for conn in child_connections: if (not conn.raddr # not connected to a remote socket and conn.status == psutil.CONN_LISTEN): local_port = conn.laddr[1] if port is None or port == local_port: return local_port raise ServiceListenTimeout(etau.get_class_name(self), port)
def get_image_path(self): """Returns the image path for the current sample. Returns: the path to the image on disk """ if not self.has_image_path: raise ValueError("This '%s' does not provide image paths" % etau.get_class_name(self)) raise NotImplementedError("subclass must implement get_image_path()")
def get_video_metadata(self): """Returns the video metadata for the current sample. Returns: a :class:`fiftyone.core.metadata.ImageMetadata` instance """ if not self.has_video_metadata: raise ValueError("This '%s' does not provide video metadata" % etau.get_class_name(self)) raise NotImplementedError( "subclass must implement get_video_metadata()")
def _serialize(self): """Returns a JSON dict representation of the :class:`ViewStage`. Returns: a JSON dict """ if self._uuid is None: self._uuid = str(uuid.uuid4()) return { "kwargs": self._kwargs(), "_cls": etau.get_class_name(self), "_uuid": self._uuid, }
def get_dataset_info(self): """Returns the dataset info for the dataset. By convention, this method should be called after all samples in the dataset have been imported. Returns: a dict of dataset info """ if not self.has_dataset_info: raise ValueError("This '%s' does not provide dataset info" % etau.get_class_name(self)) raise NotImplementedError("subclass must implement get_dataset_info()")
def from_field(cls, field): """Creates a :class:`SampleFieldDocument` from a MongoEngine field. Args: field: a :class:``fiftyone.core.fields.Field`` instance Returns: a :class:`SampleFieldDocument` """ return cls( name=field.name, ftype=etau.get_class_name(field), subfield=cls._get_attr_repr(field, "field"), embedded_doc_type=cls._get_attr_repr(field, "document_type"), )
def get_sample_field_schema(self): """Returns dictionary describing the field schema of the samples loaded by this importer. The returned dictionary should map field names to to string representations of :class:`fiftyone.core.fields.Field` instances generated by ``str(field)``. Returns: a dict """ if not self.has_sample_field_schema: raise ValueError( "This '%s' does not provide a sample field schema" % etau.get_class_name(self)) raise NotImplementedError( "subclass must implement get_sample_field_schema()")
def serialize(self, reflective=True): '''Serializes the container into a dictionary. Containers have a custom serialization implementation that optionally embeds the class name and element class name into the JSON, which enables reflective parsing when reading from disk. Args: reflective: whether to include the reflective attributes in the JSON representation. By default, this is True Returns: a JSON dictionary representation of the container ''' d = OrderedDict() if reflective: d["_CLS"] = self.get_class_name() d[self._ELE_CLS_FIELD] = etau.get_class_name(self._ELE_CLS) d[self._ELE_ATTR] = [o.serialize() for o in self.__elements__] return d
def dataset_type(self): """The fully-qualified class string of the :class:`fiftyone.types.dataset_types.Dataset` type. """ return etau.get_class_name(self._dataset_type)
def zoo_dataset(self): """The fully-qualified class string for the :class:`ZooDataset` of the dataset. """ return etau.get_class_name(self._zoo_dataset)
def get_config_class_name(cls): '''Returns the fully-qualified class name string of the Config instances in this container. ''' return etau.get_class_name(cls._ELE_CLS)
def get_class_name(cls): '''Returns the fully-qualified class name string of this container.''' return etau.get_class_name(cls)
def add_labeled_images( dataset, samples, sample_parser, label_field="ground_truth", tags=None, expand_schema=True, ): """Adds the given labeled images to the dataset. This operation will iterate over all provided samples, but the images will not be read (unless the sample parser requires it in order to compute image metadata). See :ref:`this guide <custom-sample-parser>` for more details about adding labeled images to a dataset by defining your own :class:`LabeledImageSampleParser <fiftyone.utils.data.parsers.LabeledImageSampleParser>`. Args: dataset: a :class:`fiftyone.core.dataset.Dataset` samples: an iterable of samples sample_parser: a :class:`fiftyone.utils.data.parsers.LabeledImageSampleParser` instance to use to parse the samples label_field ("ground_truth"): the name of the field to use for the labels tags (None): an optional list of tags to attach to each sample expand_schema (True): whether to dynamically add new sample fields encountered to the dataset schema. If False, an error is raised if a sample's schema is not a subset of the dataset schema Returns: a list of IDs of the samples that were added to the dataset """ if not sample_parser.has_image_path: raise ValueError( "Sample parser must have `has_image_path == True` to add its " "samples to the dataset") if not isinstance(sample_parser, LabeledImageSampleParser): raise ValueError("`sample_parser` must be a subclass of %s; found %s" % ( etau.get_class_name(LabeledImageSampleParser), etau.get_class_name(sample_parser), )) if expand_schema and sample_parser.label_cls is not None: # This has the benefit of ensuring that `label_field` exists, even if # all of the parsed samples are unlabeled (i.e., return labels that are # all `None`) dataset._ensure_label_field(label_field, sample_parser.label_cls) # The schema now never needs expanding, because we already ensured # that `label_field` exists, if necessary expand_schema = False def parse_sample(sample): sample_parser.with_sample(sample) image_path = sample_parser.get_image_path() if sample_parser.has_image_metadata: metadata = sample_parser.get_image_metadata() else: metadata = None label = sample_parser.get_label() sample = fos.Sample(filepath=image_path, metadata=metadata, tags=tags) if isinstance(label, dict): sample.update_fields(label) elif label is not None: sample[label_field] = label return sample try: num_samples = len(samples) except: num_samples = None _samples = map(parse_sample, samples) return dataset.add_samples(_samples, expand_schema=expand_schema, num_samples=num_samples)
def export(self, export_dir=None, dataset_type=None, dataset_exporter=None, label_field=None, label_prefix=None, labels_dict=None, overwrite=False, **kwargs): """Exports the samples in the collection to disk. Provide either ``export_dir`` and ``dataset_type`` or ``dataset_exporter`` to perform an export. See :ref:`this guide <custom-dataset-exporter>` for more details about exporting datasets in custom formats by defining your own :class:`DatasetExporter <fiftyone.utils.data.exporters.DatasetExporter>`. Args: export_dir (None): the directory to which to export the samples in format ``dataset_type`` dataset_type (None): the :class:`fiftyone.types.dataset_types.Dataset` type to write. If not specified, the default type for ``label_field`` is used dataset_exporter (None): a :class:`fiftyone.utils.data.exporters.DatasetExporter` to use to export the samples label_field (None): the name of the label field to export, if applicable. If none of ``label_field``, ``label_prefix``, and ``labels_dict`` are specified and the requested output type is a labeled dataset, the first field of compatible type for the output format is used label_prefix (None): a label field prefix; all fields whose name starts with the given prefix will be exported (with the prefix removed when constructing the label dicts). This parameter can only be used when the exporter can handle dictionaries of labels labels_dict (None): a dictionary mapping label field names to keys to use when constructing the label dict to pass to the exporter. This parameter can only be used when the exporter can handle dictionaries of labels overwrite (False): when an ``export_dir`` is provided, whether to delete the existing directory before performing the export **kwargs: optional keyword arguments to pass to ``dataset_type.get_dataset_exporter_cls(export_dir, **kwargs)`` """ if dataset_type is None and dataset_exporter is None: raise ValueError( "Either `dataset_type` or `dataset_exporter` must be provided") if dataset_type is not None and inspect.isclass(dataset_type): dataset_type = dataset_type() # If no dataset exporter was provided, construct one based on the # dataset type if dataset_exporter is None: if os.path.isdir(export_dir): if overwrite: etau.delete_dir(export_dir) else: logger.warning( "Directory '%s' already exists; export will be merged " "with existing files", export_dir, ) dataset_exporter_cls = dataset_type.get_dataset_exporter_cls() try: dataset_exporter = dataset_exporter_cls(export_dir, **kwargs) except Exception as e: exporter_name = dataset_exporter_cls.__name__ raise ValueError( "Failed to construct exporter using syntax " "%s(export_dir, **kwargs); you may need to supply " "mandatory arguments to the constructor via `kwargs`. " "Please consult the documentation of `%s` to learn more" % ( exporter_name, etau.get_class_name(dataset_exporter_cls), )) from e if label_prefix is not None: labels_dict = _get_labels_dict_for_prefix(self, label_prefix) if labels_dict is not None: label_field_or_dict = labels_dict elif label_field is None: # Choose the first label field that is compatible with the dataset # exporter (if any) label_field_or_dict = _get_default_label_field_for_exporter( self, dataset_exporter) else: label_field_or_dict = label_field # Export the dataset foud.export_samples( self, dataset_exporter=dataset_exporter, label_field_or_dict=label_field_or_dict, )
def add_labeled_videos( dataset, samples, sample_parser, tags=None, expand_schema=True, ): """Adds the given labeled videos to the dataset. This operation will iterate over all provided samples, but the videos will not be read/decoded/etc. See :ref:`this guide <custom-sample-parser>` for more details about adding labeled videos to a dataset by defining your own :class:`LabeledVideoSampleParser <fiftyone.utils.data.parsers.LabeledVideoSampleParser>`. Args: dataset: a :class:`fiftyone.core.dataset.Dataset` samples: an iterable of samples sample_parser: a :class:`fiftyone.utils.data.parsers.LabeledVideoSampleParser` instance to use to parse the samples tags (None): an optional list of tags to attach to each sample expand_schema (True): whether to dynamically add new sample fields encountered to the dataset schema. If False, an error is raised if a sample's schema is not a subset of the dataset schema Returns: a list of IDs of the samples that were added to the dataset """ if not isinstance(sample_parser, LabeledVideoSampleParser): raise ValueError("`sample_parser` must be a subclass of %s; found %s" % ( etau.get_class_name(LabeledVideoSampleParser), etau.get_class_name(sample_parser), )) def parse_sample(sample): sample_parser.with_sample(sample) video_path = sample_parser.get_video_path() if sample_parser.has_video_metadata: metadata = sample_parser.get_video_metadata() else: metadata = None sample = fos.Sample(filepath=video_path, metadata=metadata, tags=tags) frames = sample_parser.get_frame_labels() if frames is not None: sample.frames.update(frames) return sample try: num_samples = len(samples) except: num_samples = None _samples = map(parse_sample, samples) return dataset.add_samples(_samples, expand_schema=expand_schema, num_samples=num_samples)
def __str__(self): return "%s(%s)" % ( etau.get_class_name(self), etau.get_class_name(self.document_type), )
def convert_dataset( input_dir=None, input_type=None, dataset_importer=None, output_dir=None, output_type=None, dataset_exporter=None, ): """Converts a dataset stored on disk to another format on disk. The input dataset may be specified by providing either an ``input_dir`` and a corresponding ``input_type`` or by providing a ``dataset_importer``. The output dataset may be specified by providing either an ``output_dir`` and a corresponding ``output_type`` or by providing a ``dataset_exporter``. Args: input_dir (None): the input dataset directory input_type (None): the :class:`fiftyone.types.dataset_types.Dataset` type of the dataset in ``input_dir`` dataset_importer (None): a :class:`fiftyone.utils.data.importers.DatasetImporter` to use to import the input dataset output_dir (None): the directory to which to write the output dataset output_type (None): the :class:`fiftyone.types.dataset_types.Dataset` type to write to ``output_dir`` dataset_exporter (None): a :class:`fiftyone.utils.data.exporters.DatasetExporter` to use to export the dataset """ if input_type is None and dataset_importer is None: raise ValueError( "Either `input_type` or `dataset_importer` must be provided") if output_type is None and dataset_exporter is None: raise ValueError( "Either `output_type` or `dataset_exporter` must be provided") # Label field used (if necessary) when converting labeled datasets label_field = "label" # Import dataset if dataset_importer is not None: # Import via ``dataset_importer`` logger.info("Loading dataset from '%s'", dataset_importer.dataset_dir) logger.info("Using DatasetImporter '%s'", etau.get_class_name(dataset_importer)) dataset = fo.Dataset.from_importer(dataset_importer, label_field=label_field) logger.info("Import complete") else: # Import via ``input_type`` if inspect.isclass(input_type): input_type = input_type() # If the input dataset contains TFRecords, they must be unpacked into a # temporary directory during conversion if isinstance( input_type, (fot.TFImageClassificationDataset, fot.TFObjectDetectionDataset), ): with etau.TempDir() as images_dir: dataset_importer_cls = input_type.get_dataset_importer_cls() dataset_importer = dataset_importer_cls(input_dir, images_dir) convert_dataset( dataset_importer=dataset_importer, output_dir=output_dir, output_type=output_type, dataset_exporter=dataset_exporter, ) return logger.info("Loading dataset from '%s'", input_dir) logger.info("Input format '%s'", etau.get_class_name(input_type)) dataset = fo.Dataset.from_dir(input_dir, input_type, label_field=label_field) logger.info("Import complete") # Export dataset if dataset_exporter is not None: # Export via ``dataset_exporter`` logger.info("Exporting dataset to '%s'", dataset_exporter.export_dir) logger.info("Using DatasetExporter '%s'", etau.get_class_name(dataset_exporter)) dataset.export(dataset_exporter=dataset_exporter, label_field=label_field) logger.info("Export complete") else: # Export via ``output_type`` if inspect.isclass(output_type): output_type = output_type() logger.info("Exporting dataset to '%s'", output_dir) logger.info("Export format '%s'", etau.get_class_name(output_type)) dataset.export( export_dir=output_dir, dataset_type=output_type, label_field=label_field, ) logger.info("Export complete") # Cleanup dataset.delete()
def __str__(self): return etau.get_class_name(self)
def _get_attr_repr(field, attr_name): attr = getattr(field, attr_name, None) return etau.get_class_name(attr) if attr else None