Beispiel #1
0
    def setUp(self):
        self.id = 0
        self.factory = FormatterFactory()

        # Dummy FileDescriptor for testing getFormatter
        self.fileDescriptor = FileDescriptor(Location("/a/b/c", "d"),
                                             StorageClass("DummyStorageClass", dict, None))
Beispiel #2
0
    def fromMetadata(cls, metadata, obsInfo=None, storageClass=None, location=None):
        """Construct a possibly-limited formatter from known metadata.

        Parameters
        ----------
        metadata : `lsst.daf.base.PropertyList`
            Raw header metadata, with any fixes (see
            `astro_metadata_translator.fix_header`) applied but nothing
            stripped.
        obsInfo : `astro_metadata_translator.ObservationInfo`, optional
            Structured information already extracted from ``metadata``.
            If not provided, will be read from ``metadata`` on first use.
        storageClass : `lsst.daf.butler.StorageClass`, optional
            StorageClass for this file.  If not provided, the formatter will
            only support `makeWcs`, `makeVisitInfo`, `makeFilter`, and other
            operations that operate purely on metadata and not the actual file.
        location : `lsst.daf.butler.Location`, optional.
            Location of the file.  If not provided, the formatter will only
            support `makeWcs`, `makeVisitInfo`, `makeFilter`, and other
            operations that operate purely on metadata and not the actual file.

        Returns
        -------
        formatter : `FitsRawFormatterBase`
            An instance of ``cls``.
        """
        self = cls(FileDescriptor(location, storageClass))
        self._metadata = metadata
        self._observationInfo = obsInfo
        return self
Beispiel #3
0
    def setUp(self):
        self.id = 0
        self.factory = FormatterFactory()
        self.universe = DimensionUniverse()
        self.dataId = DataCoordinate.makeEmpty(self.universe)

        # Dummy FileDescriptor for testing getFormatter
        self.fileDescriptor = FileDescriptor(
            Location("/a/b/c", "d"),
            StorageClass("DummyStorageClass", dict, None))
    def _prepare_for_put(self, inMemoryDataset, ref):
        """Check the arguments for ``put`` and obtain formatter and
        location.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Returns
        -------
        location : `Location`
            The location to write the dataset.
        formatter : `Formatter`
            The `Formatter` to use to write the dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.
        """
        self._validate_put_parameters(inMemoryDataset, ref)

        # Work out output file name
        try:
            template = self.templates.getTemplate(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find template for {ref}") from e

        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        storageClass = ref.datasetType.storageClass
        try:
            formatter = self.formatterFactory.getFormatter(
                ref, FileDescriptor(location, storageClass=storageClass),
                ref.dataId)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find formatter for {ref}") from e

        return location, formatter
    def _prepare_for_get(self, ref, parameters=None):
        """Check parameters for ``get`` and obtain formatter and
        location.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.
        parameters : `dict`
            `StorageClass`-specific parameters that specify, for example,
            a slice of the Dataset to be loaded.

        Returns
        -------
        getInfo : `DatastoreFileGetInformation`
            Parameters needed to retrieve the file.
        """
        log.debug("Retrieve %s from %s with parameters %s", ref, self.name,
                  parameters)

        # Get file metadata and internal metadata
        location, storedFileInfo = self._get_dataset_location_info(ref)
        if location is None:
            raise FileNotFoundError(f"Could not retrieve Dataset {ref}.")

        # We have a write storage class and a read storage class and they
        # can be different for concrete composites.
        readStorageClass = ref.datasetType.storageClass
        writeStorageClass = storedFileInfo.storageClass

        # Check that the supplied parameters are suitable for the type read
        readStorageClass.validateParameters(parameters)

        # Is this a component request?
        component = ref.datasetType.component()

        formatter = getInstanceOf(
            storedFileInfo.formatter,
            FileDescriptor(location,
                           readStorageClass=readStorageClass,
                           storageClass=writeStorageClass,
                           parameters=parameters), ref.dataId)
        formatterParams, assemblerParams = formatter.segregateParameters()

        return DatastoreFileGetInformation(location, formatter, storedFileInfo,
                                           assemblerParams, component,
                                           readStorageClass)
Beispiel #6
0
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.

        Notes
        -----
        If the datastore is configured to reject certain dataset types it
        is possible that the put will fail and raise a
        `DatasetTypeNotSupportedError`.  The main use case for this is to
        allow `ChainedDatastore` to put to multiple datastores without
        requiring that every datastore accepts the dataset.
        """
        datasetType = ref.datasetType
        storageClass = datasetType.storageClass

        # Sanity check
        if not isinstance(inMemoryDataset, storageClass.pytype):
            raise TypeError("Inconsistency between supplied object ({}) "
                            "and storage class type ({})".format(
                                type(inMemoryDataset), storageClass.pytype))

        # Confirm that we can accept this dataset
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        # Work out output file name
        try:
            template = self.templates.getTemplate(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find template for {ref}") from e

        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        try:
            formatter = self.formatterFactory.getFormatter(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find formatter for {ref}") from e

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                safeMakeDir(storageDir)

        # Write the file
        predictedFullPath = os.path.join(self.root,
                                         formatter.predictPath(location))

        if os.path.exists(predictedFullPath):
            raise FileExistsError(
                f"Cannot write file for ref {ref} as "
                f"output file {predictedFullPath} already exists")

        with self._transaction.undoWith("write", os.remove, predictedFullPath):
            path = formatter.write(
                inMemoryDataset,
                FileDescriptor(location, storageClass=storageClass))
            assert predictedFullPath == os.path.join(self.root, path)
            log.debug("Wrote file to %s", path)

        self.ingest(path, ref, formatter=formatter)
Beispiel #7
0
    def get(self, ref, parameters=None):
        """Load an InMemoryDataset from the store.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.
        parameters : `dict`
            `StorageClass`-specific parameters that specify, for example,
            a slice of the Dataset to be loaded.

        Returns
        -------
        inMemoryDataset : `object`
            Requested Dataset or slice thereof as an InMemoryDataset.

        Raises
        ------
        FileNotFoundError
            Requested dataset can not be retrieved.
        TypeError
            Return value from formatter has unexpected type.
        ValueError
            Formatter failed to process the dataset.
        """
        log.debug("Retrieve %s from %s with parameters %s", ref, self.name,
                  parameters)

        # Get file metadata and internal metadata
        try:
            storedFileInfo = self.getStoredFileInfo(ref)
        except KeyError:
            raise FileNotFoundError(
                "Could not retrieve Dataset {}".format(ref))

        # Use the path to determine the location
        location = self.locationFactory.fromPath(storedFileInfo.path)

        # Too expensive to recalculate the checksum on fetch
        # but we can check size and existence
        if not os.path.exists(location.path):
            raise FileNotFoundError(
                "Dataset with Id {} does not seem to exist at"
                " expected location of {}".format(ref.id, location.path))
        stat = os.stat(location.path)
        size = stat.st_size
        if size != storedFileInfo.size:
            raise RuntimeError(
                "Integrity failure in Datastore. Size of file {} ({}) does not"
                " match recorded size of {}".format(location.path, size,
                                                    storedFileInfo.size))

        # We have a write storage class and a read storage class and they
        # can be different for concrete composites.
        readStorageClass = ref.datasetType.storageClass
        writeStorageClass = storedFileInfo.storageClass

        # Check that the supplied parameters are suitable for the type read
        readStorageClass.validateParameters(parameters)

        # Is this a component request?
        component = ref.datasetType.component()

        formatter = getInstanceOf(storedFileInfo.formatter)
        formatterParams, assemblerParams = formatter.segregateParameters(
            parameters)
        try:
            result = formatter.read(FileDescriptor(
                location,
                readStorageClass=readStorageClass,
                storageClass=writeStorageClass,
                parameters=parameters),
                                    component=component)
        except Exception as e:
            raise ValueError(
                "Failure from formatter for Dataset {}: {}".format(ref.id, e))

        # Process any left over parameters
        if parameters:
            result = readStorageClass.assembler().handleParameters(
                result, assemblerParams)

        # Validate the returned data type matches the expected data type
        pytype = readStorageClass.pytype
        if pytype and not isinstance(result, pytype):
            raise TypeError(
                "Got type {} from formatter but expected {}".format(
                    type(result), pytype))

        return result
    def getUri(self, ref, predict=False):
        """URI to the Dataset.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.
        predict : `bool`
            If `True`, allow URIs to be returned of datasets that have not
            been written.

        Returns
        -------
        uri : `str`
            URI string pointing to the Dataset within the datastore. If the
            Dataset does not exist in the datastore, and if ``predict`` is
            `True`, the URI will be a prediction and will include a URI
            fragment "#predicted".
            If the datastore does not have entities that relate well
            to the concept of a URI the returned URI string will be
            descriptive. The returned URI is not guaranteed to be obtainable.

        Raises
        ------
        FileNotFoundError
            A URI has been requested for a dataset that does not exist and
            guessing is not allowed.

        Notes
        -----
        When a predicted URI is requested an attempt will be made to form
        a reasonable URI based on file templates and the expected formatter.
        """
        # if this has never been written then we have to guess
        if not self.exists(ref):
            if not predict:
                raise FileNotFoundError(
                    "Dataset {} not in this datastore".format(ref))

            template = self.templates.getTemplate(ref)
            location = self.locationFactory.fromPath(template.format(ref))
            storageClass = ref.datasetType.storageClass
            formatter = self.formatterFactory.getFormatter(
                ref, FileDescriptor(location, storageClass=storageClass))
            # Try to use the extension attribute but ignore problems if the
            # formatter does not define one.
            try:
                location = formatter.makeUpdatedLocation(location)
            except Exception:
                # Use the default extension
                pass

            # Add a URI fragment to indicate this is a guess
            return location.uri + "#predicted"

        # If this is a ref that we have written we can get the path.
        # Get file metadata and internal metadata
        storedFileInfo = self.getStoredItemInfo(ref)

        # Use the path to determine the location
        location = self.locationFactory.fromPath(storedFileInfo.path)

        return location.uri
Beispiel #9
0
    def checkInstrumentWithRegistry(self, cls, testRaw):

        Butler.makeRepo(self.root)
        butler = Butler(self.root, run="tests")
        instrument = cls()
        scFactory = StorageClassFactory()

        # Check instrument class and metadata translator agree on
        # instrument name -- use the raw formatter to do the file reading
        rawFormatterClass = instrument.getRawFormatter({})
        formatter = rawFormatterClass(
            FileDescriptor(Location(DATAROOT, testRaw), StorageClass("x")))
        obsInfo = formatter.observationInfo
        self.assertEqual(instrument.getName(), obsInfo.instrument)

        # Add Instrument, Detector, and PhysicalFilter entries to the
        # Butler Registry.
        instrument.register(butler.registry)

        # Define a DatasetType for the cameraGeom.Camera, which can be
        # accessed just by identifying its Instrument.
        # A real-world Camera DatasetType should be identified by a
        # validity range as well.
        cameraDatasetType = DatasetType(
            "camera",
            dimensions=["instrument"],
            storageClass=scFactory.getStorageClass("Camera"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(cameraDatasetType)

        # Define a DatasetType for cameraGeom.Detectors, which can be
        # accessed by identifying its Instrument and (Butler) Detector.
        # A real-world Detector DatasetType probably doesn't need to exist,
        # as  it would just duplicate information in the Camera, and
        # reading a full Camera just to get a single Detector should be
        # plenty efficient.
        detectorDatasetType = DatasetType(
            "detector",
            dimensions=["instrument", "detector"],
            storageClass=scFactory.getStorageClass("Detector"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(detectorDatasetType)

        # Put and get the Camera.
        dataId = dict(instrument=instrument.instrument)
        butler.put(instrument.getCamera(), "camera", dataId=dataId)
        camera = butler.get("camera", dataId)
        # Full camera comparisons are *slow*; just compare names.
        self.assertEqual(instrument.getCamera().getName(), camera.getName())

        # Put and get a random subset of the Detectors.
        allDetectors = list(instrument.getCamera())
        numDetectors = min(3, len(allDetectors))
        someDetectors = [
            allDetectors[i] for i in self.rng.choice(
                len(allDetectors), size=numDetectors, replace=False)
        ]
        for cameraGeomDetector in someDetectors:
            # Right now we only support integer detector IDs in data IDs;
            # support for detector names and groups (i.e. rafts) is
            # definitely planned but not yet implemented.
            dataId = dict(instrument=instrument.instrument,
                          detector=cameraGeomDetector.getId())
            butler.put(cameraGeomDetector, "detector", dataId=dataId)
            cameraGeomDetector2 = butler.get("detector", dataId=dataId)
            # Full detector comparisons are *slow*; just compare names and
            # serials.
            self.assertEqual(cameraGeomDetector.getName(),
                             cameraGeomDetector2.getName())
            self.assertEqual(cameraGeomDetector.getSerial(),
                             cameraGeomDetector2.getSerial())