Esempio n. 1
0
def makeSQLiteRegistry(create=True):
    """Context manager to create new empty registry database.

    Yields
    ------
    config : `RegistryConfig`
        Registry configuration for initialized registry database.
    """
    with temporaryDirectory() as tmpdir:
        uri = f"sqlite:///{tmpdir}/gen3.sqlite"
        config = RegistryConfig()
        config["db"] = uri
        if create:
            Registry.createFromConfig(config)
        yield config
Esempio n. 2
0
def loadDimensionData() -> DataCoordinateSequence:
    """Load dimension data from an export file included in the code repository.

    Returns
    -------
    dataIds : `DataCoordinateSet`
        A set containing all data IDs in the export file.
    """
    # Create an in-memory SQLite database and Registry just to import the YAML
    # data and retreive it as a set of DataCoordinate objects.
    config = RegistryConfig()
    config["db"] = "sqlite://"
    registry = Registry.fromConfig(config, create=True)
    with open(DIMENSION_DATA_FILE, 'r') as stream:
        backend = YamlRepoImportBackend(stream, registry)
    backend.register()
    backend.load(datastore=None)
    dimensions = DimensionGraph(registry.dimensions,
                                names=["visit", "detector", "tract", "patch"])
    return DataCoordinateSequence(
        dataIds=tuple(registry.queryDimensions(dimensions, expand=True)),
        graph=dimensions,
        hasFull=True,
        hasRecords=True,
    )
Esempio n. 3
0
    def test_makeGraph(self):
        """Test for makeGraph() implementation.
        """
        taskFactory = TaskFactoryMock()
        reg = Registry.fromConfig(RegistryConfig(), SchemaConfig())
        gbuilder = GraphBuilder(taskFactory, reg)

        pipeline = self._makePipeline()
        collection = ""
        userQuery = None
        coll = DatasetOriginInfoDef([collection], collection)
        graph = gbuilder.makeGraph(pipeline, coll, userQuery)

        self.assertEqual(len(graph), 2)
        taskDef = graph[0].taskDef
        self.assertEqual(taskDef.taskName, "TaskOne")
        self.assertEqual(taskDef.taskClass, TaskOne)
        # TODO: temporary until we add some content to regitry
        # quanta = graph[0].quanta
        # self.assertEqual(len(quanta), 10)
        # for quantum in quanta:
        #     self._checkQuantum(quantum.inputs, Dataset1, range(10))
        #     self._checkQuantum(quantum.outputs, Dataset2, range(10))

        taskDef = graph[1].taskDef
        self.assertEqual(taskDef.taskName, "TaskTwo")
        self.assertEqual(taskDef.taskClass, TaskTwo)
    def test_register(self):
        """Test that register() sets appropriate Dimensions.
        """
        registryConfigPath = os.path.join(getPackageDir("daf_butler"),
                                          "tests/config/basic/butler.yaml")
        registry = Registry.fromConfig(ButlerConfig(registryConfigPath))
        # check that the registry starts out empty
        self.assertEqual(list(registry.queryDimensions(["instrument"])), [])
        self.assertEqual(list(registry.queryDimensions(["detector"])), [])
        self.assertEqual(list(registry.queryDimensions(["physical_filter"])),
                         [])

        # register the instrument and check that certain dimensions appear
        self.instrument.register(registry)
        instrumentDataIds = list(registry.queryDimensions(["instrument"]))
        self.assertEqual(len(instrumentDataIds), 1)
        instrumentNames = {
            dataId["instrument"]
            for dataId in instrumentDataIds
        }
        self.assertEqual(instrumentNames, {self.data.name})
        detectorDataIds = list(registry.queryDimensions(["detector"]))
        self.assertEqual(len(detectorDataIds), self.data.nDetectors)
        detectorNames = {
            dataId.records["detector"].full_name
            for dataId in detectorDataIds
        }
        self.assertIn(self.data.firstDetectorName, detectorNames)
        physicalFilterDataIds = list(
            registry.queryDimensions(["physical_filter"]))
        filterNames = {
            dataId['physical_filter']
            for dataId in physicalFilterDataIds
        }
        self.assertGreaterEqual(filterNames, self.data.physical_filters)
Esempio n. 5
0
    def test_makeGraphSelect(self):
        """Test for makeGraph() implementation with subset of data.
        """
        taskFactory = TaskFactoryMock()
        reg = Registry.fromConfig(RegistryConfig(), SchemaConfig())
        gbuilder = GraphBuilder(taskFactory, reg)

        pipeline = self._makePipeline()
        collection = ""
        userQuery = "1 = 1"
        coll = DatasetOriginInfoDef([collection], collection)
        graph = gbuilder.makeGraph(pipeline, coll, userQuery)

        self.assertEqual(len(graph), 2)
        taskDef = graph[0].taskDef
        self.assertEqual(taskDef.taskName, "TaskOne")
        self.assertEqual(taskDef.taskClass, TaskOne)
        # TODO: temporary until we implement makeGraph()
        # quanta = graph[0].quanta
        # self.assertEqual(len(quanta), 3)
        # for quantum in quanta:
        #     self._checkQuantum(quantum.inputs, Dataset1, [1, 5, 9])
        #     self._checkQuantum(quantum.outputs, Dataset2, [1, 5, 9])

        taskDef = graph[1].taskDef
        self.assertEqual(taskDef.taskName, "TaskTwo")
        self.assertEqual(taskDef.taskClass, TaskTwo)
    def testTransfer(self):
        metrics = makeExampleMetrics()

        dataUnits = frozenset(("visit", "filter"))
        dataId = {"visit": 2048, "filter": "Uprime"}

        sc = self.storageClassFactory.getStorageClass("StructuredData")
        ref = self.makeDatasetRef("metric", dataUnits, sc, dataId)

        inputConfig = DatastoreConfig(self.configFile)
        inputConfig['datastore.root'] = os.path.join(self.testDir,
                                                     "./test_input_datastore")
        inputPosixDatastore = PosixDatastore(config=inputConfig,
                                             registry=self.registry)
        outputConfig = inputConfig.copy()
        outputConfig['datastore.root'] = os.path.join(
            self.testDir, "./test_output_datastore")
        outputPosixDatastore = PosixDatastore(config=outputConfig,
                                              registry=Registry.fromConfig(
                                                  self.configFile))

        inputPosixDatastore.put(metrics, ref)
        outputPosixDatastore.transfer(inputPosixDatastore, ref)

        metricsOut = outputPosixDatastore.get(ref)
        self.assertEqual(metrics, metricsOut)
Esempio n. 7
0
    def test_register(self):
        """Test that register() sets appropriate Dimensions."""
        registryConfig = RegistryConfig()
        registryConfig["db"] = "sqlite://"
        registry = Registry.createFromConfig(registryConfig)
        # Check that the registry starts out empty.
        self.instrument.importAll(registry)
        self.assertFalse(list(registry.queryDimensionRecords("instrument")))

        # Register and check again.
        self.instrument.register(registry)
        instruments = list(registry.queryDimensionRecords("instrument"))
        self.assertEqual(len(instruments), 1)
        self.assertEqual(instruments[0].name, self.name)
        self.assertEqual(instruments[0].detector_max, 2)
        self.assertIn("DummyInstrument", instruments[0].class_name)

        self.instrument.importAll(registry)
        from_registry = DummyInstrument.fromName("DummyInstrument", registry)
        self.assertIsInstance(from_registry, Instrument)
        with self.assertRaises(LookupError):
            Instrument.fromName("NotThrere", registry)

        # Register a bad instrument.
        BadInstrument().register(registry)
        with self.assertRaises(TypeError):
            Instrument.fromName("BadInstrument", registry)

        UnimportableInstrument().register(registry)
        with self.assertRaises(ImportError):
            Instrument.fromName("NoImportInstr", registry)

        # This should work even with the bad class name.
        self.instrument.importAll(registry)
Esempio n. 8
0
 def __init__(self, registry: Registry, name: str):
     self.name = name
     try:
         self.chain = tuple(registry.getCollectionChain(name))
         self.exists = True
     except MissingCollectionError:
         self.chain = ()
         self.exists = False
Esempio n. 9
0
    def makeButler(self, **kwargs: Any) -> Butler:
        """Return new Butler instance on each call.
        """
        config = ButlerConfig()

        # make separate temporary directory for registry of this instance
        tmpdir = tempfile.mkdtemp(dir=self.root)
        config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
        config["root"] = self.root

        # have to make a registry first
        registryConfig = RegistryConfig(config.get("registry"))
        Registry.createFromConfig(registryConfig)

        butler = Butler(config, **kwargs)
        DatastoreMock.apply(butler)
        return butler
Esempio n. 10
0
 def __init__(self, registry: Registry, name: str):
     self.name = name
     try:
         actualType = registry.getCollectionType(name)
         if actualType is not CollectionType.RUN:
             raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
         self.exists = True
     except MissingCollectionError:
         self.exists = False
Esempio n. 11
0
    def fromName(name: str,
                 registry: Registry,
                 collection_prefix: Optional[str] = None) -> Instrument:
        """Given an instrument name and a butler registry, retrieve a
        corresponding instantiated instrument object.

        Parameters
        ----------
        name : `str`
            Name of the instrument (must match the return value of `getName`).
        registry : `lsst.daf.butler.Registry`
            Butler registry to query to find the information.
        collection_prefix : `str`, optional
            Prefix for collection names to use instead of the intrument's own
            name.  This is primarily for use in simulated-data repositories,
            where the instrument name may not be necessary and/or sufficient to
            distinguish between collections.

        Returns
        -------
        instrument : `Instrument`
            An instance of the relevant `Instrument`.

        Notes
        -----
        The instrument must be registered in the corresponding butler.

        Raises
        ------
        LookupError
            Raised if the instrument is not known to the supplied registry.
        ModuleNotFoundError
            Raised if the class could not be imported.  This could mean
            that the relevant obs package has not been setup.
        TypeError
            Raised if the class name retrieved is not a string or the imported
            symbol is not an `Instrument` subclass.
        """
        try:
            records = list(
                registry.queryDimensionRecords("instrument", instrument=name))
        except DataIdError:
            records = None
        if not records:
            raise LookupError(f"No registered instrument with name '{name}'.")
        cls_name = records[0].class_name
        if not isinstance(cls_name, str):
            raise TypeError(
                f"Unexpected class name retrieved from {name} instrument dimension (got {cls_name})"
            )
        instrument_cls: type = doImportType(cls_name)
        if not issubclass(instrument_cls, Instrument):
            raise TypeError(
                f"{instrument_cls!r}, obtained from importing {cls_name}, is not an Instrument subclass."
            )
        return instrument_cls(collection_prefix=collection_prefix)
Esempio n. 12
0
def registerDatasetTypes(registry: Registry,
                         pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None:
    """Register all dataset types used by tasks in a registry.

    Copied and modified from `PreExecInit.initializeDatasetTypes`.

    Parameters
    ----------
    registry : `~lsst.daf.butler.Registry`
        Registry instance.
    pipeline : `typing.Iterable` of `TaskDef`
        Iterable of TaskDef instances, likely the output of the method
        toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object
    """
    for taskDef in pipeline:
        configDatasetType = DatasetType(taskDef.configDatasetName, {},
                                        storageClass="Config",
                                        universe=registry.dimensions)
        storageClass = "Packages"
        packagesDatasetType = DatasetType("packages", {},
                                          storageClass=storageClass,
                                          universe=registry.dimensions)
        datasetTypes = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry)
        for datasetType in itertools.chain(
                datasetTypes.initInputs,
                datasetTypes.initOutputs,
                datasetTypes.inputs,
                datasetTypes.outputs,
                datasetTypes.prerequisites,
            [configDatasetType, packagesDatasetType],
        ):
            _LOG.info("Registering %s with registry", datasetType)
            # this is a no-op if it already exists and is consistent,
            # and it raises if it is inconsistent. But components must be
            # skipped
            if not datasetType.isComponent():
                registry.registerDatasetType(datasetType)
Esempio n. 13
0
    def test_register(self):
        """Test that register() sets appropriate Dimensions.
        """
        registryConfig = RegistryConfig()
        registryConfig["db"] = "sqlite://"
        registry = Registry.createFromConfig(registryConfig)
        # check that the registry starts out empty
        self.assertFalse(registry.queryDataIds(["instrument"]).toSequence())
        self.assertFalse(registry.queryDataIds(["detector"]).toSequence())
        self.assertFalse(
            registry.queryDataIds(["physical_filter"]).toSequence())

        # register the instrument and check that certain dimensions appear
        self.instrument.register(registry)
        instrumentDataIds = registry.queryDataIds(["instrument"]).toSequence()
        self.assertEqual(len(instrumentDataIds), 1)
        instrumentNames = {
            dataId["instrument"]
            for dataId in instrumentDataIds
        }
        self.assertEqual(instrumentNames, {self.data.name})
        detectorDataIds = registry.queryDataIds(["detector"
                                                 ]).expanded().toSequence()
        self.assertEqual(len(detectorDataIds), self.data.nDetectors)
        detectorNames = {
            dataId.records["detector"].full_name
            for dataId in detectorDataIds
        }
        self.assertIn(self.data.firstDetectorName, detectorNames)
        physicalFilterDataIds = registry.queryDataIds(["physical_filter"
                                                       ]).toSequence()
        filterNames = {
            dataId['physical_filter']
            for dataId in physicalFilterDataIds
        }
        self.assertGreaterEqual(filterNames, self.data.physical_filters)

        # Check that the instrument class can be retrieved
        registeredInstrument = Instrument.fromName(self.instrument.getName(),
                                                   registry)
        self.assertEqual(type(registeredInstrument), type(self.instrument))

        # Check that re-registration is not an error.
        self.instrument.register(registry)
Esempio n. 14
0
    def test_makeFullIODatasetTypes(self):
        """Test for _makeFullIODatasetTypes() implementation.
        """
        taskFactory = TaskFactoryMock()
        reg = Registry.fromConfig(RegistryConfig(), SchemaConfig())
        gbuilder = GraphBuilder(taskFactory, reg)

        # build a pipeline
        tasks = self._makePipeline()

        # collect inputs/outputs from each task
        taskDatasets = []
        for taskDef in tasks:
            taskClass = taskDef.taskClass
            taskInputs = taskClass.getInputDatasetTypes(taskDef.config) or {}
            taskInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInputs.values()]
            taskOutputs = taskClass.getOutputDatasetTypes(taskDef.config) or {}
            taskOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskOutputs.values()]

            taskInitInputs = taskClass.getInitInputDatasetTypes(taskDef.config) or {}
            taskInitInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitInputs.values()]

            taskInitOutputs = taskClass.getInitOutputDatasetTypes(taskDef.config) or {}
            taskInitOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitOutputs.values()]
            taskDatasets.append(_TaskDatasetTypes(taskDef=taskDef,
                                                  inputs=taskInputs,
                                                  outputs=taskOutputs,
                                                  initInputs=taskInitInputs,
                                                  initOutputs=taskInitOutputs))

        # make inputs and outputs from per-task dataset types
        inputs, outputs, initInputs, initOutputs = gbuilder._makeFullIODatasetTypes(taskDatasets)

        self.assertIsInstance(inputs, set)
        self.assertIsInstance(outputs, set)
        self.assertIsInstance(initInputs, set)
        self.assertIsInstance(initOutputs, set)
        self.assertEqual([x.name for x in inputs], ["input"])
        self.assertEqual(set(x.name for x in outputs), set(["output", "output2"]))
        self.assertEqual([x.name for x in initInputs], ['initInput'])
        self.assertEqual([x.name for x in initOutputs], ['initOutputs'])
Esempio n. 15
0
def _fillRelationships(dimension: Dimension, dimensionInfo: Mapping[str, Any],
                       existing: Registry) -> Mapping[str, Any]:
    """Create arbitrary mappings from one dimension to all dimensions it
    depends on.

    Parameters
    ----------
    dimension : `lsst.daf.butler.Dimension`
        The dimension for which to generate relationships.
    dimensionInfo : `dict` [`str`]
        A mapping of dimension keys to values.
    existing : `lsst.daf.butler.Registry`
        The registry with all previously registered dimensions.

    Returns
    -------
    filledInfo : `dict` [`str`]
        A version of ``dimensionInfo`` with extra mappings for any
        relationships required by ``dimension``. Any relationships already
        defined in ``dimensionInfo`` are preserved.

    Raises
    ------
    ValueError
        Raised if ``dimension`` depends on a dimension for which no values
        exist yet.
    """
    filledInfo = dimensionInfo.copy()
    for other in dimension.required:
        if other != dimension and other.name not in filledInfo:
            _matchAnyDataId(filledInfo, existing, other)
    # Do not recurse, to keep the user from having to provide
    # irrelevant dimensions.
    for other in dimension.implied:
        toUpdate = other != dimension and other.name not in filledInfo
        updatable = other.viewOf is None
        # Do not run query if either toUpdate or updatable is false
        if toUpdate and updatable and list(
                existing.queryDimensionRecords(other)):
            _matchAnyDataId(filledInfo, existing, other)
    return filledInfo
Esempio n. 16
0
    def addSkyPix(self, registry: Registry, dimension: SkyPixDimension):
        """Populate the included skypix IDs for the given dimension from those
        that overlap the visits the `ConversionSubset` was initialized with.

        Parameters
        ----------
        registry : `lsst.daf.butler.Registry`
            Registry that can be queried for visit regions.
        name : `str`
            SkyMap name used in Gen3 data IDs.
        """
        if self.regions is None:
            self.regions = []
            for visit in self.visits:
                dataId = registry.expandDataId(instrument=self.instrument,
                                               visit=visit)
                self.regions.append(dataId.region)
        ranges = RangeSet()
        for region in self.regions:
            ranges = ranges.union(dimension.pixelization.envelope(region))
        self.skypix[dimension] = ranges
Esempio n. 17
0
    def testTransfer(self):
        catalog = self.makeExampleCatalog()
        dataUnits = frozenset(("visit", "filter"))
        dataId = {"visit": 12345, "filter": "red"}

        storageClass = self.storageClassFactory.getStorageClass("SourceCatalog")
        ref = self.makeDatasetRef("calexp", dataUnits, storageClass, dataId)

        inputConfig = DatastoreConfig(self.configFile)
        inputConfig['datastore.root'] = os.path.join(self.testDir, "./test_input_datastore")
        inputPosixDatastore = PosixDatastore(config=inputConfig, registry=self.registry)
        outputConfig = inputConfig.copy()
        outputConfig['datastore.root'] = os.path.join(self.testDir, "./test_output_datastore")
        outputPosixDatastore = PosixDatastore(config=outputConfig,
                                              registry=Registry.fromConfig(self.configFile))

        inputPosixDatastore.put(catalog, ref)
        outputPosixDatastore.transfer(inputPosixDatastore, ref)

        catalogOut = outputPosixDatastore.get(ref)
        self.assertCatalogEqual(catalog, catalogOut)
Esempio n. 18
0
    def addSkyMap(self, registry: Registry, name: str):
        """Populate the included tract IDs for the given skymap from those that
        overlap the visits the `ConversionSubset` was initialized with.

        Parameters
        ----------
        registry : `lsst.daf.butler.Registry`
            Registry that can be queried for visit/tract overlaps.
        name : `str`
            SkyMap name used in Gen3 data IDs.
        """
        tracts = set()
        self.tracts[name] = tracts
        for visit in self.visits:
            for dataId in registry.queryDataIds(["tract"],
                                                dataId={
                                                    "skymap": name,
                                                    "instrument":
                                                    self.instrument,
                                                    "visit": visit
                                                }):
                tracts.add(dataId["tract"])
Esempio n. 19
0
    def importAll(registry: Registry) -> None:
        """Import all the instruments known to this registry.

        This will ensure that all metadata translators have been registered.

        Parameters
        ----------
        registry : `lsst.daf.butler.Registry`
            Butler registry to query to find the information.

        Notes
        -----
        It is allowed for a particular instrument class to fail on import.
        This might simply indicate that a particular obs package has
        not been setup.
        """
        records = list(registry.queryDimensionRecords("instrument"))
        for record in records:
            cls = record.class_name
            try:
                doImportType(cls)
            except Exception:
                pass
    def test_register(self):
        """Test that register() sets appropriate Dimensions.
        """
        registryConfigPath = os.path.join(getPackageDir("daf_butler"),
                                          "tests/config/basic/butler.yaml")
        registry = Registry.fromConfig(ButlerConfig(registryConfigPath))
        # check that the registry starts out empty
        self.assertEqual(registry.findDimensionEntries('instrument'), [])
        self.assertEqual(registry.findDimensionEntries('detector'), [])
        self.assertEqual(registry.findDimensionEntries('physical_filter'), [])

        # register the instrument and check that certain dimensions appear
        self.instrument.register(registry)
        self.assertEqual(len(registry.findDimensionEntries('instrument')), 1)
        self.assertEqual(
            registry.findDimensionEntries('instrument')[0]['instrument'],
            self.data.name)
        self.assertEqual(len(registry.findDimensionEntries('detector')),
                         self.data.nDetectors)
        filterNames = {
            x['physical_filter']
            for x in registry.findDimensionEntries('physical_filter')
        }
        self.assertGreaterEqual(filterNames, self.data.physical_filters)
Esempio n. 21
0
def _matchAnyDataId(record: Mapping[str, Any], registry: Registry,
                    dimension: Dimension):
    """Matches a partial dimension record to an existing record along a
    specific dimension.

    Parameters
    ----------
    record : `dict` [`str`]
        A mapping representing the record to be matched.
    registry : `lsst.daf.butler.Registry`
        The registry with all known dimension records.
    dimension : `lsst.daf.butler.Dimension`
        The dimension on which to find a match for ``record``.

    Raises
    ------
    RuntimeError
        Raised if there are no existing records for ``dimension``.
    """
    matches = list(registry.queryDimensionRecords(dimension.name))
    if matches:
        record[dimension.name] = matches[0].dataId[dimension.name]
    else:
        raise RuntimeError(f"No matching values for {dimension.name} found.")
    def setUp(self):
        self.registry = Registry.fromConfig(self.configFile)

        # Need to keep ID for each datasetRef since we have no butler
        # for these tests
        self.id = 1
Esempio n. 23
0
 def makeRegistry(self):
     testDir = os.path.dirname(__file__)
     configFile = os.path.join(testDir, "config/basic/butler.yaml")
     butlerConfig = ButlerConfig(configFile)
     butlerConfig["registry", "limited"] = True
     return Registry.fromConfig(butlerConfig, create=True)
Esempio n. 24
0
 def getRegistry(self):
     return Registry.fromConfig(self.butlerConfig, butlerRoot=self.root)
Esempio n. 25
0
 def setUp(self):
     self.testDir = os.path.dirname(__file__)
     self.configFile = os.path.join(self.testDir, "config/basic/butler.yaml")
     self.butlerConfig = ButlerConfig(self.configFile)
     self.registry = Registry.fromConfig(self.butlerConfig)