Beispiel #1
0
def makeSimpleButler(root: str,
                     run: str = "test",
                     inMemory: bool = True) -> Butler:
    """Create new data butler instance.

    Parameters
    ----------
    root : `str`
        Path or URI to the root location of the new repository.
    run : `str`, optional
        Run collection name.
    inMemory : `bool`, optional
        If true make in-memory repository.

    Returns
    -------
    butler : `~lsst.daf.butler.Butler`
        Data butler instance.
    """
    root_path = ResourcePath(root, forceDirectory=True)
    if not root_path.isLocal:
        raise ValueError(f"Only works with local root not {root_path}")
    config = Config()
    if not inMemory:
        config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite"
        config[
            "datastore",
            "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore"
    repo = butlerTests.makeTestRepo(str(root_path), {}, config=config)
    butler = Butler(butler=repo, run=run)
    return butler
Beispiel #2
0
    def saveUri(self, uri: ResourcePathExpression) -> None:
        """Save `QuantumGraph` to the specified URI.

        Parameters
        ----------
        uri : convertible to `ResourcePath`
            URI to where the graph should be saved.
        """
        buffer = self._buildSaveObject()
        path = ResourcePath(uri)
        if path.getExtension() not in (".qgraph"):
            raise TypeError(
                f"Can currently only save a graph in qgraph format not {uri}")
        path.write(
            buffer
        )  # type: ignore  # Ignore because bytearray is safe to use in place of bytes
Beispiel #3
0
    def addFileOverride(self, filename):
        """Add overrides from a specified file.

        Parameters
        ----------
        filename : convertible to `ResourcePath`
            Path or URI to the override file.  All URI schemes supported by
            `ResourcePath` are supported.
        """
        self._overrides.append((OverrideTypes.File, ResourcePath(filename)))
Beispiel #4
0
def _setupNewButler(butler: Butler, outputLocation: ResourcePath, dirExists: bool) -> Butler:
    # Set up the new butler object at the specified location
    if dirExists:
        # Remove the existing table, if the code got this far and this exists
        # clobber must be true
        executionRegistry = outputLocation.join("gen3.sqlite3")
        if executionRegistry.exists():
            executionRegistry.remove()
    else:
        outputLocation.mkdir()

    # Copy the existing butler config, modifying the location of the
    # registry to the specified location.
    # Preserve the root path from the existing butler so things like
    # file data stores continue to look at the old location.
    config = Config(butler._config)
    config["root"] = outputLocation.geturl()
    config["allow_put_of_predefined_dataset"] = True
    config["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"

    # Remove any namespace that may be set in main registry.
    config.pop(("registry", "namespace"), None)

    # record the current root of the datastore if it is specified relative
    # to the butler root
    if config.get(("datastore", "root")) == BUTLER_ROOT_TAG and butler._config.configDir is not None:
        config["datastore", "root"] = butler._config.configDir.geturl()
    config["datastore", "trust_get_request"] = True

    # Requires that we use the dimension configuration from the original
    # butler and not use the defaults.
    config = Butler.makeRepo(
        root=outputLocation,
        config=config,
        dimensionConfig=butler.registry.dimensions.dimensionConfig,
        overwrite=True,
        forceConfigRoot=False,
    )

    # Return a newly created butler
    return Butler(config, writeable=True)
Beispiel #5
0
    def readHeader(cls,
                   uri: ResourcePathExpression,
                   minimumVersion: int = 3) -> Optional[str]:
        """Read the header of a `QuantumGraph` pointed to by the uri parameter
        and return it as a string.

        Parameters
        ----------
        uri : convertible to `ResourcePath`
            The location of the `QuantumGraph` to load. If the argument is a
            string, it must correspond to a valid `ResourcePath` path.
        minimumVersion : int
            Minimum version of a save file to load. Set to -1 to load all
            versions. Older versions may need to be loaded, and re-saved
            to upgrade them to the latest format before they can be used in
            production.

        Returns
        -------
        header : `str` or `None`
            The header associated with the specified `QuantumGraph` it there is
            one, else `None`.

        Raises
        ------
        ValueError
            Raised if `QuantuGraph` was saved as a pickle.
            Raised if the extention of the file specified by uri is not a
            `QuantumGraph` extention.
        """
        uri = ResourcePath(uri)
        if uri.getExtension() in (".pickle", ".pkl"):
            raise ValueError(
                "Reading a header from a pickle save is not supported")
        elif uri.getExtension() in (".qgraph"):
            return LoadHelper(uri, minimumVersion).readHeader()
        else:
            raise ValueError("Only know how to handle files saved as `qgraph`")
Beispiel #6
0
    def loadUri(
        cls,
        uri: ResourcePathExpression,
        universe: DimensionUniverse,
        nodes: Optional[Iterable[uuid.UUID]] = None,
        graphID: Optional[BuildId] = None,
        minimumVersion: int = 3,
    ) -> QuantumGraph:
        """Read `QuantumGraph` from a URI.

        Parameters
        ----------
        uri : convertible to `ResourcePath`
            URI from where to load the graph.
        universe: `~lsst.daf.butler.DimensionUniverse`
            DimensionUniverse instance, not used by the method itself but
            needed to ensure that registry data structures are initialized.
        nodes: iterable of `int` or None
            Numbers that correspond to nodes in the graph. If specified, only
            these nodes will be loaded. Defaults to None, in which case all
            nodes will be loaded.
        graphID : `str` or `None`
            If specified this ID is verified against the loaded graph prior to
            loading any Nodes. This defaults to None in which case no
            validation is done.
        minimumVersion : int
            Minimum version of a save file to load. Set to -1 to load all
            versions. Older versions may need to be loaded, and re-saved
            to upgrade them to the latest format before they can be used in
            production.

        Returns
        -------
        graph : `QuantumGraph`
            Resulting QuantumGraph instance.

        Raises
        ------
        TypeError
            Raised if pickle contains instance of a type other than
            QuantumGraph.
        ValueError
            Raised if one or more of the nodes requested is not in the
            `QuantumGraph` or if graphID parameter does not match the graph
            being loaded or if the supplied uri does not point at a valid
            `QuantumGraph` save file.


        Notes
        -----
        Reading Quanta from pickle requires existence of singleton
        DimensionUniverse which is usually instantiated during Registry
        initialization. To make sure that DimensionUniverse exists this method
        accepts dummy DimensionUniverse argument.
        """
        uri = ResourcePath(uri)
        # With ResourcePath we have the choice of always using a local file
        # or reading in the bytes directly. Reading in bytes can be more
        # efficient for reasonably-sized pickle files when the resource
        # is remote. For now use the local file variant. For a local file
        # as_local() does nothing.

        if uri.getExtension() in (".pickle", ".pkl"):
            with uri.as_local() as local, open(local.ospath, "rb") as fd:
                warnings.warn(
                    "Pickle graphs are deprecated, please re-save your graph with the save method"
                )
                qgraph = pickle.load(fd)
        elif uri.getExtension() in (".qgraph"):
            with LoadHelper(uri, minimumVersion) as loader:
                qgraph = loader.load(universe, nodes, graphID)
        else:
            raise ValueError(
                "Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`"
            )
        if not isinstance(qgraph, QuantumGraph):
            raise TypeError(
                f"QuantumGraph save file contains unexpected object type: {type(qgraph)}"
            )
        return qgraph
Beispiel #7
0
def buildExecutionButler(
    butler: Butler,
    graph: QuantumGraph,
    outputLocation: ResourcePathExpression,
    run: Optional[str],
    *,
    clobber: bool = False,
    butlerModifier: Optional[Callable[[Butler], Butler]] = None,
    collections: Optional[Iterable[str]] = None,
) -> Butler:
    r"""buildExecutionButler is a function that is responsible for exporting
    input `QuantumGraphs` into a new minimal `~lsst.daf.butler.Butler` which
    only contains datasets specified by the `QuantumGraph`. These datasets are
    both those that already exist in the input `~lsst.daf.butler.Butler`, and
    those that are expected to be produced during the execution of the
    `QuantumGraph`.

    Parameters
    ----------
    butler : `lsst.daf.butler.Bulter`
        This is the existing `~lsst.daf.butler.Butler` instance from which
        existing datasets will be exported. This should be the
        `~lsst.daf.butler.Butler` which was used to create any `QuantumGraphs`
        that will be converted with this object.
    graph : `QuantumGraph`
        Graph containing nodes that are to be exported into an execution
        butler
    outputLocation : convertible to `ResourcePath
        URI Location at which the execution butler is to be exported. May be
        specified as a string or a `ResourcePath` instance.
    run : `str`, optional
        The run collection that the exported datasets are to be placed in. If
        None, the default value in registry.defaults will be used.
    clobber : `bool`, Optional
        By default a butler will not be created if a file or directory
        already exists at the output location. If this is set to `True`
        what is at the location will be deleted prior to running the
        export. Defaults to `False`
    butlerModifier : `~typing.Callable`, Optional
        If supplied this should be a callable that accepts a
        `~lsst.daf.butler.Butler`, and returns an instantiated
        `~lsst.daf.butler.Butler`. This callable may be used to make any
        modifications to the `~lsst.daf.butler.Butler` desired. This
        will be called after importing all datasets that exist in the input
        `~lsst.daf.butler.Butler` but prior to inserting Datasets expected
        to be produced. Examples of what this method could do include
        things such as creating collections/runs/ etc.
    collections : `~typing.Iterable` of `str`, Optional
        An iterable of collection names that will be exported from the input
        `~lsst.daf.butler.Butler` when creating the execution butler. If not
        supplied the `~lsst.daf.butler.Butler`\ 's `~lsst.daf.butler.Registry`
        default collections will be used.

    Returns
    -------
    executionButler : `lsst.daf.butler.Butler`
        An instance of the newly created execution butler

    Raises
    ------
    FileExistsError
        Raised if something exists in the filesystem at the specified output
        location and clobber is `False`
    NotADirectoryError
        Raised if specified output URI does not correspond to a directory
    """
    # We know this must refer to a directory.
    outputLocation = ResourcePath(outputLocation, forceDirectory=True)

    # Do this first to Fail Fast if the output exists
    if (dirExists := outputLocation.exists()) and not clobber:
        raise FileExistsError("Cannot create a butler at specified location, location exists")
Beispiel #8
0
    def run(self,
            locations,
            run=None,
            file_filter=r".*Photodiode_Readings.*txt",
            track_file_attrs=None):
        """Ingest photodiode data into a Butler data repository.

        Parameters
        ----------
        files : iterable over `lsst.resources.ResourcePath`
            URIs to the files to be ingested.
        run : `str`, optional
            Name of the RUN-type collection to write to,
            overriding the default derived from the instrument
            name.
        skip_existing_exposures : `bool`, optional
            If `True`, skip photodiodes that have already been
            ingested (i.e. raws for which we already have a
            dataset with the same data ID in the target
            collection).
        track_file_attrs : `bool`, optional
            Control whether file attributes such as the size or
            checksum should be tracked by the datastore.  Whether
            this parameter is honored depends on the specific
            datastore implementation.

        Returns
        -------
        refs : `list` [`lsst.daf.butler.DatasetRef`]
            Dataset references for ingested raws.

        Raises
        ------
        RuntimeError
            Raised if the number of exposures found for a photodiode
            file is not one
        """
        files = ResourcePath.findFileResources(locations, file_filter)

        registry = self.butler.registry
        registry.registerDatasetType(self.datasetType)

        # Find and register run that we will ingest to.
        if run is None:
            run = self.instrument.makeCollectionName("calib", "photodiode")
        registry.registerCollection(run, type=CollectionType.RUN)

        # Use datasetIds that match the raw exposure data.
        if self.butler.registry.supportsIdGenerationMode(
                DatasetIdGenEnum.DATAID_TYPE_RUN):
            mode = DatasetIdGenEnum.DATAID_TYPE_RUN
        else:
            mode = DatasetIdGenEnum.UNIQUE

        refs = []
        numExisting = 0
        numFailed = 0
        for inputFile in files:
            # Convert the file into the right class.
            with inputFile.as_local() as localFile:
                calib = PhotodiodeCalib.readTwoColumnPhotodiodeData(
                    localFile.ospath)

            dayObs = calib.getMetadata()['day_obs']
            seqNum = calib.getMetadata()['seq_num']

            # Find the associated exposure information.
            whereClause = "exposure.day_obs=dayObs and exposure.seq_num=seqNum"
            instrumentName = self.instrument.getName()
            exposureRecords = [
                rec for rec in registry.queryDimensionRecords(
                    "exposure",
                    instrument=instrumentName,
                    where=whereClause,
                    bind={
                        "dayObs": dayObs,
                        "seqNum": seqNum
                    })
            ]

            nRecords = len(exposureRecords)
            if nRecords == 1:
                exposureId = exposureRecords[0].id
                calib.updateMetadata(camera=self.camera, exposure=exposureId)
            elif nRecords == 0:
                numFailed += 1
                self.log.warning(
                    "Skipping instrument %s and dayObs/seqNum %d %d: no exposures found.",
                    instrumentName, dayObs, seqNum)
                continue
            else:
                numFailed += 1
                self.log.warning(
                    "Multiple exposure entries found for instrument %s and "
                    "dayObs/seqNum %d %d.", instrumentName, dayObs, seqNum)
                continue

            # Generate the dataId for this file.
            dataId = DataCoordinate.standardize(
                instrument=self.instrument.getName(),
                exposure=exposureId,
                universe=self.universe,
            )

            # If this already exists, we should skip it and continue.
            existing = {
                ref.dataId
                for ref in self.butler.registry.queryDatasets(
                    self.datasetType, collections=[run], dataId=dataId)
            }
            if existing:
                self.log.debug(
                    "Skipping instrument %s and dayObs/seqNum %d %d: already exists in run %s.",
                    instrumentName, dayObs, seqNum, run)
                numExisting += 1
                continue

            # Ingest must work from a file, but we can't use the
            # original, as we've added new metadata and reformatted
            # it.  Write it to a temp file that we can use to ingest.
            # If we can have the files written appropriately, this
            # will be a direct ingest of those files.
            with ResourcePath.temporary_uri(suffix=".fits") as tempFile:
                calib.writeFits(tempFile.ospath)

                ref = DatasetRef(self.datasetType, dataId)
                dataset = FileDataset(path=tempFile,
                                      refs=ref,
                                      formatter=FitsGenericFormatter)

                # No try, as if this fails, we should stop.
                self.butler.ingest(dataset,
                                   transfer=self.config.transfer,
                                   run=run,
                                   idGenerationMode=mode,
                                   record_validation_info=track_file_attrs)
                self.log.info("Photodiode %s:%d (%d/%d) ingested successfully",
                              instrumentName, exposureId, dayObs, seqNum)
                refs.append(dataset)

        if numExisting != 0:
            self.log.warning(
                "Skipped %d entries that already existed in run %s",
                numExisting, run)
        if numFailed != 0:
            raise RuntimeError(
                f"Failed to ingest {numFailed} entries due to missing exposure information."
            )
        return refs