def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler: """Create new data butler instance. Parameters ---------- root : `str` Path or URI to the root location of the new repository. run : `str`, optional Run collection name. inMemory : `bool`, optional If true make in-memory repository. Returns ------- butler : `~lsst.daf.butler.Butler` Data butler instance. """ root_path = ResourcePath(root, forceDirectory=True) if not root_path.isLocal: raise ValueError(f"Only works with local root not {root_path}") config = Config() if not inMemory: config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite" config[ "datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore" repo = butlerTests.makeTestRepo(str(root_path), {}, config=config) butler = Butler(butler=repo, run=run) return butler
def saveUri(self, uri: ResourcePathExpression) -> None: """Save `QuantumGraph` to the specified URI. Parameters ---------- uri : convertible to `ResourcePath` URI to where the graph should be saved. """ buffer = self._buildSaveObject() path = ResourcePath(uri) if path.getExtension() not in (".qgraph"): raise TypeError( f"Can currently only save a graph in qgraph format not {uri}") path.write( buffer ) # type: ignore # Ignore because bytearray is safe to use in place of bytes
def addFileOverride(self, filename): """Add overrides from a specified file. Parameters ---------- filename : convertible to `ResourcePath` Path or URI to the override file. All URI schemes supported by `ResourcePath` are supported. """ self._overrides.append((OverrideTypes.File, ResourcePath(filename)))
def _setupNewButler(butler: Butler, outputLocation: ResourcePath, dirExists: bool) -> Butler: # Set up the new butler object at the specified location if dirExists: # Remove the existing table, if the code got this far and this exists # clobber must be true executionRegistry = outputLocation.join("gen3.sqlite3") if executionRegistry.exists(): executionRegistry.remove() else: outputLocation.mkdir() # Copy the existing butler config, modifying the location of the # registry to the specified location. # Preserve the root path from the existing butler so things like # file data stores continue to look at the old location. config = Config(butler._config) config["root"] = outputLocation.geturl() config["allow_put_of_predefined_dataset"] = True config["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" # Remove any namespace that may be set in main registry. config.pop(("registry", "namespace"), None) # record the current root of the datastore if it is specified relative # to the butler root if config.get(("datastore", "root")) == BUTLER_ROOT_TAG and butler._config.configDir is not None: config["datastore", "root"] = butler._config.configDir.geturl() config["datastore", "trust_get_request"] = True # Requires that we use the dimension configuration from the original # butler and not use the defaults. config = Butler.makeRepo( root=outputLocation, config=config, dimensionConfig=butler.registry.dimensions.dimensionConfig, overwrite=True, forceConfigRoot=False, ) # Return a newly created butler return Butler(config, writeable=True)
def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Optional[str]: """Read the header of a `QuantumGraph` pointed to by the uri parameter and return it as a string. Parameters ---------- uri : convertible to `ResourcePath` The location of the `QuantumGraph` to load. If the argument is a string, it must correspond to a valid `ResourcePath` path. minimumVersion : int Minimum version of a save file to load. Set to -1 to load all versions. Older versions may need to be loaded, and re-saved to upgrade them to the latest format before they can be used in production. Returns ------- header : `str` or `None` The header associated with the specified `QuantumGraph` it there is one, else `None`. Raises ------ ValueError Raised if `QuantuGraph` was saved as a pickle. Raised if the extention of the file specified by uri is not a `QuantumGraph` extention. """ uri = ResourcePath(uri) if uri.getExtension() in (".pickle", ".pkl"): raise ValueError( "Reading a header from a pickle save is not supported") elif uri.getExtension() in (".qgraph"): return LoadHelper(uri, minimumVersion).readHeader() else: raise ValueError("Only know how to handle files saved as `qgraph`")
def loadUri( cls, uri: ResourcePathExpression, universe: DimensionUniverse, nodes: Optional[Iterable[uuid.UUID]] = None, graphID: Optional[BuildId] = None, minimumVersion: int = 3, ) -> QuantumGraph: """Read `QuantumGraph` from a URI. Parameters ---------- uri : convertible to `ResourcePath` URI from where to load the graph. universe: `~lsst.daf.butler.DimensionUniverse` DimensionUniverse instance, not used by the method itself but needed to ensure that registry data structures are initialized. nodes: iterable of `int` or None Numbers that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. graphID : `str` or `None` If specified this ID is verified against the loaded graph prior to loading any Nodes. This defaults to None in which case no validation is done. minimumVersion : int Minimum version of a save file to load. Set to -1 to load all versions. Older versions may need to be loaded, and re-saved to upgrade them to the latest format before they can be used in production. Returns ------- graph : `QuantumGraph` Resulting QuantumGraph instance. Raises ------ TypeError Raised if pickle contains instance of a type other than QuantumGraph. ValueError Raised if one or more of the nodes requested is not in the `QuantumGraph` or if graphID parameter does not match the graph being loaded or if the supplied uri does not point at a valid `QuantumGraph` save file. Notes ----- Reading Quanta from pickle requires existence of singleton DimensionUniverse which is usually instantiated during Registry initialization. To make sure that DimensionUniverse exists this method accepts dummy DimensionUniverse argument. """ uri = ResourcePath(uri) # With ResourcePath we have the choice of always using a local file # or reading in the bytes directly. Reading in bytes can be more # efficient for reasonably-sized pickle files when the resource # is remote. For now use the local file variant. For a local file # as_local() does nothing. if uri.getExtension() in (".pickle", ".pkl"): with uri.as_local() as local, open(local.ospath, "rb") as fd: warnings.warn( "Pickle graphs are deprecated, please re-save your graph with the save method" ) qgraph = pickle.load(fd) elif uri.getExtension() in (".qgraph"): with LoadHelper(uri, minimumVersion) as loader: qgraph = loader.load(universe, nodes, graphID) else: raise ValueError( "Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`" ) if not isinstance(qgraph, QuantumGraph): raise TypeError( f"QuantumGraph save file contains unexpected object type: {type(qgraph)}" ) return qgraph
def buildExecutionButler( butler: Butler, graph: QuantumGraph, outputLocation: ResourcePathExpression, run: Optional[str], *, clobber: bool = False, butlerModifier: Optional[Callable[[Butler], Butler]] = None, collections: Optional[Iterable[str]] = None, ) -> Butler: r"""buildExecutionButler is a function that is responsible for exporting input `QuantumGraphs` into a new minimal `~lsst.daf.butler.Butler` which only contains datasets specified by the `QuantumGraph`. These datasets are both those that already exist in the input `~lsst.daf.butler.Butler`, and those that are expected to be produced during the execution of the `QuantumGraph`. Parameters ---------- butler : `lsst.daf.butler.Bulter` This is the existing `~lsst.daf.butler.Butler` instance from which existing datasets will be exported. This should be the `~lsst.daf.butler.Butler` which was used to create any `QuantumGraphs` that will be converted with this object. graph : `QuantumGraph` Graph containing nodes that are to be exported into an execution butler outputLocation : convertible to `ResourcePath URI Location at which the execution butler is to be exported. May be specified as a string or a `ResourcePath` instance. run : `str`, optional The run collection that the exported datasets are to be placed in. If None, the default value in registry.defaults will be used. clobber : `bool`, Optional By default a butler will not be created if a file or directory already exists at the output location. If this is set to `True` what is at the location will be deleted prior to running the export. Defaults to `False` butlerModifier : `~typing.Callable`, Optional If supplied this should be a callable that accepts a `~lsst.daf.butler.Butler`, and returns an instantiated `~lsst.daf.butler.Butler`. This callable may be used to make any modifications to the `~lsst.daf.butler.Butler` desired. This will be called after importing all datasets that exist in the input `~lsst.daf.butler.Butler` but prior to inserting Datasets expected to be produced. Examples of what this method could do include things such as creating collections/runs/ etc. collections : `~typing.Iterable` of `str`, Optional An iterable of collection names that will be exported from the input `~lsst.daf.butler.Butler` when creating the execution butler. If not supplied the `~lsst.daf.butler.Butler`\ 's `~lsst.daf.butler.Registry` default collections will be used. Returns ------- executionButler : `lsst.daf.butler.Butler` An instance of the newly created execution butler Raises ------ FileExistsError Raised if something exists in the filesystem at the specified output location and clobber is `False` NotADirectoryError Raised if specified output URI does not correspond to a directory """ # We know this must refer to a directory. outputLocation = ResourcePath(outputLocation, forceDirectory=True) # Do this first to Fail Fast if the output exists if (dirExists := outputLocation.exists()) and not clobber: raise FileExistsError("Cannot create a butler at specified location, location exists")
def run(self, locations, run=None, file_filter=r".*Photodiode_Readings.*txt", track_file_attrs=None): """Ingest photodiode data into a Butler data repository. Parameters ---------- files : iterable over `lsst.resources.ResourcePath` URIs to the files to be ingested. run : `str`, optional Name of the RUN-type collection to write to, overriding the default derived from the instrument name. skip_existing_exposures : `bool`, optional If `True`, skip photodiodes that have already been ingested (i.e. raws for which we already have a dataset with the same data ID in the target collection). track_file_attrs : `bool`, optional Control whether file attributes such as the size or checksum should be tracked by the datastore. Whether this parameter is honored depends on the specific datastore implementation. Returns ------- refs : `list` [`lsst.daf.butler.DatasetRef`] Dataset references for ingested raws. Raises ------ RuntimeError Raised if the number of exposures found for a photodiode file is not one """ files = ResourcePath.findFileResources(locations, file_filter) registry = self.butler.registry registry.registerDatasetType(self.datasetType) # Find and register run that we will ingest to. if run is None: run = self.instrument.makeCollectionName("calib", "photodiode") registry.registerCollection(run, type=CollectionType.RUN) # Use datasetIds that match the raw exposure data. if self.butler.registry.supportsIdGenerationMode( DatasetIdGenEnum.DATAID_TYPE_RUN): mode = DatasetIdGenEnum.DATAID_TYPE_RUN else: mode = DatasetIdGenEnum.UNIQUE refs = [] numExisting = 0 numFailed = 0 for inputFile in files: # Convert the file into the right class. with inputFile.as_local() as localFile: calib = PhotodiodeCalib.readTwoColumnPhotodiodeData( localFile.ospath) dayObs = calib.getMetadata()['day_obs'] seqNum = calib.getMetadata()['seq_num'] # Find the associated exposure information. whereClause = "exposure.day_obs=dayObs and exposure.seq_num=seqNum" instrumentName = self.instrument.getName() exposureRecords = [ rec for rec in registry.queryDimensionRecords( "exposure", instrument=instrumentName, where=whereClause, bind={ "dayObs": dayObs, "seqNum": seqNum }) ] nRecords = len(exposureRecords) if nRecords == 1: exposureId = exposureRecords[0].id calib.updateMetadata(camera=self.camera, exposure=exposureId) elif nRecords == 0: numFailed += 1 self.log.warning( "Skipping instrument %s and dayObs/seqNum %d %d: no exposures found.", instrumentName, dayObs, seqNum) continue else: numFailed += 1 self.log.warning( "Multiple exposure entries found for instrument %s and " "dayObs/seqNum %d %d.", instrumentName, dayObs, seqNum) continue # Generate the dataId for this file. dataId = DataCoordinate.standardize( instrument=self.instrument.getName(), exposure=exposureId, universe=self.universe, ) # If this already exists, we should skip it and continue. existing = { ref.dataId for ref in self.butler.registry.queryDatasets( self.datasetType, collections=[run], dataId=dataId) } if existing: self.log.debug( "Skipping instrument %s and dayObs/seqNum %d %d: already exists in run %s.", instrumentName, dayObs, seqNum, run) numExisting += 1 continue # Ingest must work from a file, but we can't use the # original, as we've added new metadata and reformatted # it. Write it to a temp file that we can use to ingest. # If we can have the files written appropriately, this # will be a direct ingest of those files. with ResourcePath.temporary_uri(suffix=".fits") as tempFile: calib.writeFits(tempFile.ospath) ref = DatasetRef(self.datasetType, dataId) dataset = FileDataset(path=tempFile, refs=ref, formatter=FitsGenericFormatter) # No try, as if this fails, we should stop. self.butler.ingest(dataset, transfer=self.config.transfer, run=run, idGenerationMode=mode, record_validation_info=track_file_attrs) self.log.info("Photodiode %s:%d (%d/%d) ingested successfully", instrumentName, exposureId, dayObs, seqNum) refs.append(dataset) if numExisting != 0: self.log.warning( "Skipped %d entries that already existed in run %s", numExisting, run) if numFailed != 0: raise RuntimeError( f"Failed to ingest {numFailed} entries due to missing exposure information." ) return refs