def makeSQLiteRegistry(create=True): """Context manager to create new empty registry database. Yields ------ config : `RegistryConfig` Registry configuration for initialized registry database. """ with temporaryDirectory() as tmpdir: uri = f"sqlite:///{tmpdir}/gen3.sqlite" config = RegistryConfig() config["db"] = uri if create: Registry.createFromConfig(config) yield config
def loadDimensionData() -> DataCoordinateSequence: """Load dimension data from an export file included in the code repository. Returns ------- dataIds : `DataCoordinateSet` A set containing all data IDs in the export file. """ # Create an in-memory SQLite database and Registry just to import the YAML # data and retreive it as a set of DataCoordinate objects. config = RegistryConfig() config["db"] = "sqlite://" registry = Registry.fromConfig(config, create=True) with open(DIMENSION_DATA_FILE, 'r') as stream: backend = YamlRepoImportBackend(stream, registry) backend.register() backend.load(datastore=None) dimensions = DimensionGraph(registry.dimensions, names=["visit", "detector", "tract", "patch"]) return DataCoordinateSequence( dataIds=tuple(registry.queryDimensions(dimensions, expand=True)), graph=dimensions, hasFull=True, hasRecords=True, )
def test_makeGraph(self): """Test for makeGraph() implementation. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) pipeline = self._makePipeline() collection = "" userQuery = None coll = DatasetOriginInfoDef([collection], collection) graph = gbuilder.makeGraph(pipeline, coll, userQuery) self.assertEqual(len(graph), 2) taskDef = graph[0].taskDef self.assertEqual(taskDef.taskName, "TaskOne") self.assertEqual(taskDef.taskClass, TaskOne) # TODO: temporary until we add some content to regitry # quanta = graph[0].quanta # self.assertEqual(len(quanta), 10) # for quantum in quanta: # self._checkQuantum(quantum.inputs, Dataset1, range(10)) # self._checkQuantum(quantum.outputs, Dataset2, range(10)) taskDef = graph[1].taskDef self.assertEqual(taskDef.taskName, "TaskTwo") self.assertEqual(taskDef.taskClass, TaskTwo)
def test_register(self): """Test that register() sets appropriate Dimensions. """ registryConfigPath = os.path.join(getPackageDir("daf_butler"), "tests/config/basic/butler.yaml") registry = Registry.fromConfig(ButlerConfig(registryConfigPath)) # check that the registry starts out empty self.assertEqual(list(registry.queryDimensions(["instrument"])), []) self.assertEqual(list(registry.queryDimensions(["detector"])), []) self.assertEqual(list(registry.queryDimensions(["physical_filter"])), []) # register the instrument and check that certain dimensions appear self.instrument.register(registry) instrumentDataIds = list(registry.queryDimensions(["instrument"])) self.assertEqual(len(instrumentDataIds), 1) instrumentNames = { dataId["instrument"] for dataId in instrumentDataIds } self.assertEqual(instrumentNames, {self.data.name}) detectorDataIds = list(registry.queryDimensions(["detector"])) self.assertEqual(len(detectorDataIds), self.data.nDetectors) detectorNames = { dataId.records["detector"].full_name for dataId in detectorDataIds } self.assertIn(self.data.firstDetectorName, detectorNames) physicalFilterDataIds = list( registry.queryDimensions(["physical_filter"])) filterNames = { dataId['physical_filter'] for dataId in physicalFilterDataIds } self.assertGreaterEqual(filterNames, self.data.physical_filters)
def test_makeGraphSelect(self): """Test for makeGraph() implementation with subset of data. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) pipeline = self._makePipeline() collection = "" userQuery = "1 = 1" coll = DatasetOriginInfoDef([collection], collection) graph = gbuilder.makeGraph(pipeline, coll, userQuery) self.assertEqual(len(graph), 2) taskDef = graph[0].taskDef self.assertEqual(taskDef.taskName, "TaskOne") self.assertEqual(taskDef.taskClass, TaskOne) # TODO: temporary until we implement makeGraph() # quanta = graph[0].quanta # self.assertEqual(len(quanta), 3) # for quantum in quanta: # self._checkQuantum(quantum.inputs, Dataset1, [1, 5, 9]) # self._checkQuantum(quantum.outputs, Dataset2, [1, 5, 9]) taskDef = graph[1].taskDef self.assertEqual(taskDef.taskName, "TaskTwo") self.assertEqual(taskDef.taskClass, TaskTwo)
def testTransfer(self): metrics = makeExampleMetrics() dataUnits = frozenset(("visit", "filter")) dataId = {"visit": 2048, "filter": "Uprime"} sc = self.storageClassFactory.getStorageClass("StructuredData") ref = self.makeDatasetRef("metric", dataUnits, sc, dataId) inputConfig = DatastoreConfig(self.configFile) inputConfig['datastore.root'] = os.path.join(self.testDir, "./test_input_datastore") inputPosixDatastore = PosixDatastore(config=inputConfig, registry=self.registry) outputConfig = inputConfig.copy() outputConfig['datastore.root'] = os.path.join( self.testDir, "./test_output_datastore") outputPosixDatastore = PosixDatastore(config=outputConfig, registry=Registry.fromConfig( self.configFile)) inputPosixDatastore.put(metrics, ref) outputPosixDatastore.transfer(inputPosixDatastore, ref) metricsOut = outputPosixDatastore.get(ref) self.assertEqual(metrics, metricsOut)
def test_register(self): """Test that register() sets appropriate Dimensions.""" registryConfig = RegistryConfig() registryConfig["db"] = "sqlite://" registry = Registry.createFromConfig(registryConfig) # Check that the registry starts out empty. self.instrument.importAll(registry) self.assertFalse(list(registry.queryDimensionRecords("instrument"))) # Register and check again. self.instrument.register(registry) instruments = list(registry.queryDimensionRecords("instrument")) self.assertEqual(len(instruments), 1) self.assertEqual(instruments[0].name, self.name) self.assertEqual(instruments[0].detector_max, 2) self.assertIn("DummyInstrument", instruments[0].class_name) self.instrument.importAll(registry) from_registry = DummyInstrument.fromName("DummyInstrument", registry) self.assertIsInstance(from_registry, Instrument) with self.assertRaises(LookupError): Instrument.fromName("NotThrere", registry) # Register a bad instrument. BadInstrument().register(registry) with self.assertRaises(TypeError): Instrument.fromName("BadInstrument", registry) UnimportableInstrument().register(registry) with self.assertRaises(ImportError): Instrument.fromName("NoImportInstr", registry) # This should work even with the bad class name. self.instrument.importAll(registry)
def __init__(self, registry: Registry, name: str): self.name = name try: self.chain = tuple(registry.getCollectionChain(name)) self.exists = True except MissingCollectionError: self.chain = () self.exists = False
def makeButler(self, **kwargs: Any) -> Butler: """Return new Butler instance on each call. """ config = ButlerConfig() # make separate temporary directory for registry of this instance tmpdir = tempfile.mkdtemp(dir=self.root) config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" config["root"] = self.root # have to make a registry first registryConfig = RegistryConfig(config.get("registry")) Registry.createFromConfig(registryConfig) butler = Butler(config, **kwargs) DatastoreMock.apply(butler) return butler
def __init__(self, registry: Registry, name: str): self.name = name try: actualType = registry.getCollectionType(name) if actualType is not CollectionType.RUN: raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") self.exists = True except MissingCollectionError: self.exists = False
def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = None) -> Instrument: """Given an instrument name and a butler registry, retrieve a corresponding instantiated instrument object. Parameters ---------- name : `str` Name of the instrument (must match the return value of `getName`). registry : `lsst.daf.butler.Registry` Butler registry to query to find the information. collection_prefix : `str`, optional Prefix for collection names to use instead of the intrument's own name. This is primarily for use in simulated-data repositories, where the instrument name may not be necessary and/or sufficient to distinguish between collections. Returns ------- instrument : `Instrument` An instance of the relevant `Instrument`. Notes ----- The instrument must be registered in the corresponding butler. Raises ------ LookupError Raised if the instrument is not known to the supplied registry. ModuleNotFoundError Raised if the class could not be imported. This could mean that the relevant obs package has not been setup. TypeError Raised if the class name retrieved is not a string or the imported symbol is not an `Instrument` subclass. """ try: records = list( registry.queryDimensionRecords("instrument", instrument=name)) except DataIdError: records = None if not records: raise LookupError(f"No registered instrument with name '{name}'.") cls_name = records[0].class_name if not isinstance(cls_name, str): raise TypeError( f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})" ) instrument_cls: type = doImportType(cls_name) if not issubclass(instrument_cls, Instrument): raise TypeError( f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass." ) return instrument_cls(collection_prefix=collection_prefix)
def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None: """Register all dataset types used by tasks in a registry. Copied and modified from `PreExecInit.initializeDatasetTypes`. Parameters ---------- registry : `~lsst.daf.butler.Registry` Registry instance. pipeline : `typing.Iterable` of `TaskDef` Iterable of TaskDef instances, likely the output of the method toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object """ for taskDef in pipeline: configDatasetType = DatasetType(taskDef.configDatasetName, {}, storageClass="Config", universe=registry.dimensions) storageClass = "Packages" packagesDatasetType = DatasetType("packages", {}, storageClass=storageClass, universe=registry.dimensions) datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry) for datasetType in itertools.chain( datasetTypes.initInputs, datasetTypes.initOutputs, datasetTypes.inputs, datasetTypes.outputs, datasetTypes.prerequisites, [configDatasetType, packagesDatasetType], ): _LOG.info("Registering %s with registry", datasetType) # this is a no-op if it already exists and is consistent, # and it raises if it is inconsistent. But components must be # skipped if not datasetType.isComponent(): registry.registerDatasetType(datasetType)
def test_register(self): """Test that register() sets appropriate Dimensions. """ registryConfig = RegistryConfig() registryConfig["db"] = "sqlite://" registry = Registry.createFromConfig(registryConfig) # check that the registry starts out empty self.assertFalse(registry.queryDataIds(["instrument"]).toSequence()) self.assertFalse(registry.queryDataIds(["detector"]).toSequence()) self.assertFalse( registry.queryDataIds(["physical_filter"]).toSequence()) # register the instrument and check that certain dimensions appear self.instrument.register(registry) instrumentDataIds = registry.queryDataIds(["instrument"]).toSequence() self.assertEqual(len(instrumentDataIds), 1) instrumentNames = { dataId["instrument"] for dataId in instrumentDataIds } self.assertEqual(instrumentNames, {self.data.name}) detectorDataIds = registry.queryDataIds(["detector" ]).expanded().toSequence() self.assertEqual(len(detectorDataIds), self.data.nDetectors) detectorNames = { dataId.records["detector"].full_name for dataId in detectorDataIds } self.assertIn(self.data.firstDetectorName, detectorNames) physicalFilterDataIds = registry.queryDataIds(["physical_filter" ]).toSequence() filterNames = { dataId['physical_filter'] for dataId in physicalFilterDataIds } self.assertGreaterEqual(filterNames, self.data.physical_filters) # Check that the instrument class can be retrieved registeredInstrument = Instrument.fromName(self.instrument.getName(), registry) self.assertEqual(type(registeredInstrument), type(self.instrument)) # Check that re-registration is not an error. self.instrument.register(registry)
def test_makeFullIODatasetTypes(self): """Test for _makeFullIODatasetTypes() implementation. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) # build a pipeline tasks = self._makePipeline() # collect inputs/outputs from each task taskDatasets = [] for taskDef in tasks: taskClass = taskDef.taskClass taskInputs = taskClass.getInputDatasetTypes(taskDef.config) or {} taskInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInputs.values()] taskOutputs = taskClass.getOutputDatasetTypes(taskDef.config) or {} taskOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskOutputs.values()] taskInitInputs = taskClass.getInitInputDatasetTypes(taskDef.config) or {} taskInitInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitInputs.values()] taskInitOutputs = taskClass.getInitOutputDatasetTypes(taskDef.config) or {} taskInitOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitOutputs.values()] taskDatasets.append(_TaskDatasetTypes(taskDef=taskDef, inputs=taskInputs, outputs=taskOutputs, initInputs=taskInitInputs, initOutputs=taskInitOutputs)) # make inputs and outputs from per-task dataset types inputs, outputs, initInputs, initOutputs = gbuilder._makeFullIODatasetTypes(taskDatasets) self.assertIsInstance(inputs, set) self.assertIsInstance(outputs, set) self.assertIsInstance(initInputs, set) self.assertIsInstance(initOutputs, set) self.assertEqual([x.name for x in inputs], ["input"]) self.assertEqual(set(x.name for x in outputs), set(["output", "output2"])) self.assertEqual([x.name for x in initInputs], ['initInput']) self.assertEqual([x.name for x in initOutputs], ['initOutputs'])
def _fillRelationships(dimension: Dimension, dimensionInfo: Mapping[str, Any], existing: Registry) -> Mapping[str, Any]: """Create arbitrary mappings from one dimension to all dimensions it depends on. Parameters ---------- dimension : `lsst.daf.butler.Dimension` The dimension for which to generate relationships. dimensionInfo : `dict` [`str`] A mapping of dimension keys to values. existing : `lsst.daf.butler.Registry` The registry with all previously registered dimensions. Returns ------- filledInfo : `dict` [`str`] A version of ``dimensionInfo`` with extra mappings for any relationships required by ``dimension``. Any relationships already defined in ``dimensionInfo`` are preserved. Raises ------ ValueError Raised if ``dimension`` depends on a dimension for which no values exist yet. """ filledInfo = dimensionInfo.copy() for other in dimension.required: if other != dimension and other.name not in filledInfo: _matchAnyDataId(filledInfo, existing, other) # Do not recurse, to keep the user from having to provide # irrelevant dimensions. for other in dimension.implied: toUpdate = other != dimension and other.name not in filledInfo updatable = other.viewOf is None # Do not run query if either toUpdate or updatable is false if toUpdate and updatable and list( existing.queryDimensionRecords(other)): _matchAnyDataId(filledInfo, existing, other) return filledInfo
def addSkyPix(self, registry: Registry, dimension: SkyPixDimension): """Populate the included skypix IDs for the given dimension from those that overlap the visits the `ConversionSubset` was initialized with. Parameters ---------- registry : `lsst.daf.butler.Registry` Registry that can be queried for visit regions. name : `str` SkyMap name used in Gen3 data IDs. """ if self.regions is None: self.regions = [] for visit in self.visits: dataId = registry.expandDataId(instrument=self.instrument, visit=visit) self.regions.append(dataId.region) ranges = RangeSet() for region in self.regions: ranges = ranges.union(dimension.pixelization.envelope(region)) self.skypix[dimension] = ranges
def testTransfer(self): catalog = self.makeExampleCatalog() dataUnits = frozenset(("visit", "filter")) dataId = {"visit": 12345, "filter": "red"} storageClass = self.storageClassFactory.getStorageClass("SourceCatalog") ref = self.makeDatasetRef("calexp", dataUnits, storageClass, dataId) inputConfig = DatastoreConfig(self.configFile) inputConfig['datastore.root'] = os.path.join(self.testDir, "./test_input_datastore") inputPosixDatastore = PosixDatastore(config=inputConfig, registry=self.registry) outputConfig = inputConfig.copy() outputConfig['datastore.root'] = os.path.join(self.testDir, "./test_output_datastore") outputPosixDatastore = PosixDatastore(config=outputConfig, registry=Registry.fromConfig(self.configFile)) inputPosixDatastore.put(catalog, ref) outputPosixDatastore.transfer(inputPosixDatastore, ref) catalogOut = outputPosixDatastore.get(ref) self.assertCatalogEqual(catalog, catalogOut)
def addSkyMap(self, registry: Registry, name: str): """Populate the included tract IDs for the given skymap from those that overlap the visits the `ConversionSubset` was initialized with. Parameters ---------- registry : `lsst.daf.butler.Registry` Registry that can be queried for visit/tract overlaps. name : `str` SkyMap name used in Gen3 data IDs. """ tracts = set() self.tracts[name] = tracts for visit in self.visits: for dataId in registry.queryDataIds(["tract"], dataId={ "skymap": name, "instrument": self.instrument, "visit": visit }): tracts.add(dataId["tract"])
def importAll(registry: Registry) -> None: """Import all the instruments known to this registry. This will ensure that all metadata translators have been registered. Parameters ---------- registry : `lsst.daf.butler.Registry` Butler registry to query to find the information. Notes ----- It is allowed for a particular instrument class to fail on import. This might simply indicate that a particular obs package has not been setup. """ records = list(registry.queryDimensionRecords("instrument")) for record in records: cls = record.class_name try: doImportType(cls) except Exception: pass
def test_register(self): """Test that register() sets appropriate Dimensions. """ registryConfigPath = os.path.join(getPackageDir("daf_butler"), "tests/config/basic/butler.yaml") registry = Registry.fromConfig(ButlerConfig(registryConfigPath)) # check that the registry starts out empty self.assertEqual(registry.findDimensionEntries('instrument'), []) self.assertEqual(registry.findDimensionEntries('detector'), []) self.assertEqual(registry.findDimensionEntries('physical_filter'), []) # register the instrument and check that certain dimensions appear self.instrument.register(registry) self.assertEqual(len(registry.findDimensionEntries('instrument')), 1) self.assertEqual( registry.findDimensionEntries('instrument')[0]['instrument'], self.data.name) self.assertEqual(len(registry.findDimensionEntries('detector')), self.data.nDetectors) filterNames = { x['physical_filter'] for x in registry.findDimensionEntries('physical_filter') } self.assertGreaterEqual(filterNames, self.data.physical_filters)
def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension): """Matches a partial dimension record to an existing record along a specific dimension. Parameters ---------- record : `dict` [`str`] A mapping representing the record to be matched. registry : `lsst.daf.butler.Registry` The registry with all known dimension records. dimension : `lsst.daf.butler.Dimension` The dimension on which to find a match for ``record``. Raises ------ RuntimeError Raised if there are no existing records for ``dimension``. """ matches = list(registry.queryDimensionRecords(dimension.name)) if matches: record[dimension.name] = matches[0].dataId[dimension.name] else: raise RuntimeError(f"No matching values for {dimension.name} found.")
def setUp(self): self.registry = Registry.fromConfig(self.configFile) # Need to keep ID for each datasetRef since we have no butler # for these tests self.id = 1
def makeRegistry(self): testDir = os.path.dirname(__file__) configFile = os.path.join(testDir, "config/basic/butler.yaml") butlerConfig = ButlerConfig(configFile) butlerConfig["registry", "limited"] = True return Registry.fromConfig(butlerConfig, create=True)
def getRegistry(self): return Registry.fromConfig(self.butlerConfig, butlerRoot=self.root)
def setUp(self): self.testDir = os.path.dirname(__file__) self.configFile = os.path.join(self.testDir, "config/basic/butler.yaml") self.butlerConfig = ButlerConfig(self.configFile) self.registry = Registry.fromConfig(self.butlerConfig)