Example #1
0
def registerDatasetTypes(registry, pipeline):
    """Register all dataset types used by tasks in a registry.

    Copied and modified from `PreExecInit.initializeDatasetTypes`.

    Parameters
    ----------
    registry : `~lsst.daf.butler.Registry`
        Registry instance.
    pipeline : `typing.Iterable` of `TaskDef`
        Iterable of TaskDef instances, likely the output of the method
        toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object
    """
    for taskDef in pipeline:
        configDatasetType = DatasetType(taskDef.configDatasetName, {},
                                        storageClass="Config",
                                        universe=registry.dimensions)
        packagesDatasetType = DatasetType("packages", {},
                                          storageClass="Packages",
                                          universe=registry.dimensions)
        datasetTypes = pipeBase.TaskDatasetTypes.fromTaskDef(taskDef,
                                                             registry=registry)
        for datasetType in itertools.chain(
                datasetTypes.initInputs, datasetTypes.initOutputs,
                datasetTypes.inputs, datasetTypes.outputs,
                datasetTypes.prerequisites,
            [configDatasetType, packagesDatasetType]):
            _LOG.info("Registering %s with registry", datasetType)
            # this is a no-op if it already exists and is consistent,
            # and it raises if it is inconsistent. But components must be
            # skipped
            if not datasetType.isComponent():
                registry.registerDatasetType(datasetType)
Example #2
0
    def testConstructor(self):
        """Test construction preserves values.

        Note that construction doesn't check for valid storageClass.
        This can only be verified for a particular schema.
        """
        datasetTypeName = "test"
        storageClass = StorageClass("test_StructuredData")
        dimensions = self.universe.extract(("instrument", "visit"))
        datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
        self.assertEqual(datasetType.name, datasetTypeName)
        self.assertEqual(datasetType.storageClass, storageClass)
        self.assertEqual(datasetType.dimensions, dimensions)

        with self.assertRaises(
                ValueError,
                msg="Construct component without parent storage class"):
            DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
                        dimensions, storageClass)
        with self.assertRaises(
                ValueError,
                msg="Construct non-component with parent storage class"):
            DatasetType(datasetTypeName,
                        dimensions,
                        storageClass,
                        parentStorageClass="NotAllowed")
    def registerDatasetTypes(datasetTypeName, dimensions, storageClass, registry):
        """Bulk register DatasetTypes
        """
        datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
        registry.registerDatasetType(datasetType)

        for compName, compStorageClass in storageClass.components.items():
            compType = DatasetType(datasetType.componentTypeName(compName), dimensions, compStorageClass)
            registry.registerDatasetType(compType)
Example #4
0
 def setUp(self):
     """Create a new butler root for each test."""
     self.root = makeTestTempDir(TESTDIR)
     Butler.makeRepo(self.root)
     self.butler = Butler(self.root, run="test_run")
     # No dimensions in dataset type so we don't have to worry about
     # inserting dimension data or defining data IDs.
     self.datasetType = DatasetType("data", dimensions=(), storageClass="DataFrame",
                                    universe=self.butler.registry.dimensions)
     self.butler.registry.registerDatasetType(self.datasetType)
Example #5
0
 def setUp(self):
     self.universe = DimensionUniverse()
     datasetTypeName = "test"
     self.componentStorageClass1 = StorageClass("Component1")
     self.componentStorageClass2 = StorageClass("Component2")
     self.parentStorageClass = StorageClass("Parent", components={"a": self.componentStorageClass1,
                                                                  "b": self.componentStorageClass2})
     dimensions = self.universe.extract(("instrument", "visit"))
     self.dataId = dict(instrument="DummyCam", visit=42)
     self.datasetType = DatasetType(datasetTypeName, dimensions, self.parentStorageClass)
Example #6
0
 def testDatasetLocations(self):
     registry = self.makeRegistry()
     storageClass = StorageClass("testStorageInfo")
     registry.storageClasses.registerStorageClass(storageClass)
     datasetType = DatasetType(name="test",
                               dimensions=registry.dimensions.extract(
                                   ("instrument", )),
                               storageClass=storageClass)
     datasetType2 = DatasetType(name="test2",
                                dimensions=registry.dimensions.extract(
                                    ("instrument", )),
                                storageClass=storageClass)
     registry.registerDatasetType(datasetType)
     registry.registerDatasetType(datasetType2)
     if not registry.limited:
         registry.addDimensionEntry("instrument",
                                    {"instrument": "DummyCam"})
     run = registry.makeRun(collection="test")
     ref = registry.addDataset(datasetType,
                               dataId={"instrument": "DummyCam"},
                               run=run)
     ref2 = registry.addDataset(datasetType2,
                                dataId={"instrument": "DummyCam"},
                                run=run)
     datastoreName = "dummystore"
     datastoreName2 = "dummystore2"
     # Test adding information about a new dataset
     registry.addDatasetLocation(ref, datastoreName)
     addresses = registry.getDatasetLocations(ref)
     self.assertIn(datastoreName, addresses)
     self.assertEqual(len(addresses), 1)
     registry.addDatasetLocation(ref, datastoreName2)
     registry.addDatasetLocation(ref2, datastoreName2)
     addresses = registry.getDatasetLocations(ref)
     self.assertEqual(len(addresses), 2)
     self.assertIn(datastoreName, addresses)
     self.assertIn(datastoreName2, addresses)
     registry.removeDatasetLocation(datastoreName, ref)
     addresses = registry.getDatasetLocations(ref)
     self.assertEqual(len(addresses), 1)
     self.assertNotIn(datastoreName, addresses)
     self.assertIn(datastoreName2, addresses)
     with self.assertRaises(OrphanedRecordError):
         registry.removeDataset(ref)
     registry.removeDatasetLocation(datastoreName2, ref)
     addresses = registry.getDatasetLocations(ref)
     self.assertEqual(len(addresses), 0)
     self.assertNotIn(datastoreName2, addresses)
     registry.removeDataset(ref)  # should not raise
     addresses = registry.getDatasetLocations(ref2)
     self.assertEqual(len(addresses), 1)
     self.assertIn(datastoreName2, addresses)
Example #7
0
 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
     # Docstring inherited from DatasetRecordStorageManager.
     if datasetType.isComponent():
         raise ValueError("Component dataset types can not be stored in registry."
                          f" Rejecting {datasetType.name}")
     storage = self._byName.get(datasetType.name)
     if storage is None:
         dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions)
         tagTableName = makeTagTableName(datasetType, dimensionsKey)
         calibTableName = (makeCalibTableName(datasetType, dimensionsKey)
                           if datasetType.isCalibration() else None)
         row, inserted = self._db.sync(
             self._static.dataset_type,
             keys={"name": datasetType.name},
             compared={
                 "dimensions_key": dimensionsKey,
                 "storage_class": datasetType.storageClass.name,
             },
             extra={
                 "tag_association_table": tagTableName,
                 "calibration_association_table": calibTableName,
             },
             returning=["id", "tag_association_table"],
         )
         assert row is not None
         tags = self._db.ensureTableExists(
             tagTableName,
             makeTagTableSpec(datasetType, type(self._collections)),
         )
         if calibTableName is not None:
             calibs = self._db.ensureTableExists(
                 calibTableName,
                 makeCalibTableSpec(datasetType, type(self._collections),
                                    self._db.getTimespanRepresentation()),
             )
         else:
             calibs = None
         storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
                                                    static=self._static, summaries=self._summaries,
                                                    tags=tags, calibs=calibs,
                                                    dataset_type_id=row["id"],
                                                    collections=self._collections)
         self._byName[datasetType.name] = storage
         self._byId[storage._dataset_type_id] = storage
     else:
         if datasetType != storage.datasetType:
             raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
                                              f"with database definition {storage.datasetType}.")
         inserted = False
     return storage, inserted
Example #8
0
    def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass",
                       run="run2", conform=True):
        """Make a simple DatasetRef"""
        if dataId is None:
            dataId = self.dataId

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None

        datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()),
                                  StorageClass(storageClassName),
                                  parentStorageClass=parentStorageClass)
        return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
Example #9
0
 def testComponents(self):
     registry = self.makeRegistry()
     childStorageClass = StorageClass("testComponentsChild")
     registry.storageClasses.registerStorageClass(childStorageClass)
     parentStorageClass = StorageClass("testComponentsParent",
                                       components={
                                           "child1": childStorageClass,
                                           "child2": childStorageClass
                                       })
     registry.storageClasses.registerStorageClass(parentStorageClass)
     parentDatasetType = DatasetType(name="parent",
                                     dimensions=registry.dimensions.extract(
                                         ("instrument", )),
                                     storageClass=parentStorageClass)
     childDatasetType1 = DatasetType(name="parent.child1",
                                     dimensions=registry.dimensions.extract(
                                         ("instrument", )),
                                     storageClass=childStorageClass)
     childDatasetType2 = DatasetType(name="parent.child2",
                                     dimensions=registry.dimensions.extract(
                                         ("instrument", )),
                                     storageClass=childStorageClass)
     registry.registerDatasetType(parentDatasetType)
     registry.registerDatasetType(childDatasetType1)
     registry.registerDatasetType(childDatasetType2)
     dataId = {"instrument": "DummyCam"}
     if not registry.limited:
         registry.addDimensionEntry("instrument", dataId)
     run = registry.makeRun(collection="test")
     parent = registry.addDataset(parentDatasetType, dataId=dataId, run=run)
     children = {
         "child1":
         registry.addDataset(childDatasetType1, dataId=dataId, run=run),
         "child2":
         registry.addDataset(childDatasetType2, dataId=dataId, run=run)
     }
     for name, child in children.items():
         registry.attachComponent(name, parent, child)
     self.assertEqual(parent.components, children)
     outParent = registry.getDataset(parent.id)
     self.assertEqual(outParent.components, children)
     # Remove the parent; this should remove both children.
     registry.removeDataset(parent)
     self.assertIsNone(
         registry.find(run.collection, parentDatasetType, dataId))
     self.assertIsNone(
         registry.find(run.collection, childDatasetType1, dataId))
     self.assertIsNone(
         registry.find(run.collection, childDatasetType2, dataId))
Example #10
0
    def testSorting(self):
        """Can we sort a DatasetType"""
        storage = StorageClass("test_a")
        dimensions = self.universe.extract(["instrument"])

        d_a = DatasetType("a", dimensions, storage)
        d_f = DatasetType("f", dimensions, storage)
        d_p = DatasetType("p", dimensions, storage)

        sort = sorted([d_p, d_f, d_a])
        self.assertEqual(sort, [d_a, d_f, d_p])

        # Now with strings
        with self.assertRaises(TypeError):
            sort = sorted(["z", d_p, "c", d_f, d_a, "d"])
Example #11
0
 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
     # Docstring inherited from DatasetRecordStorageManager.
     if datasetType.isComponent():
         raise ValueError("Component dataset types can not be stored in registry."
                          f" Rejecting {datasetType.name}")
     storage = self._byName.get(datasetType.name)
     if storage is None:
         row, inserted = self._db.sync(
             self._static.dataset_type,
             keys={"name": datasetType.name},
             compared={
                 "dimensions_encoded": datasetType.dimensions.encode(),
                 "storage_class": datasetType.storageClass.name,
             },
             returning=["id"],
         )
         assert row is not None
         dynamic = self._db.ensureTableExists(
             makeDynamicTableName(datasetType),
             makeDynamicTableSpec(datasetType, type(self._collections)),
         )
         storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
                                                    static=self._static, dynamic=dynamic,
                                                    dataset_type_id=row["id"],
                                                    collections=self._collections)
         self._byName[datasetType.name] = storage
         self._byId[storage._dataset_type_id] = storage
     else:
         if datasetType != storage.datasetType:
             raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent "
                                              f"with database definition {storage.datasetType}.")
         inserted = False
     return storage, inserted
Example #12
0
    def run(self, butler):
        """Construct and optionally save a SkyMap into a gen3 repository
        Parameters
        ----------
        butler : `lsst.daf.butler.Butler`
            Butler repository to which the new skymap will be written
        """
        skyMap = self.config.skyMap.apply()
        skyMap.logSkyMapInfo(self.log)
        skyMapHash = skyMap.getSha1()
        try:
            existing, = butler.registry.query("SELECT skymap FROM skymap WHERE hash=:hash",
                                              hash=skyMapHash)
            raise RuntimeError(
                (f"SkyMap with name {existing.name} and hash {skyMapHash} already exist in "
                 f"the butler collection {self.collection}, SkyMaps must be unique within "
                 "a collection")
            )
        except ValueError:
            self.log.info(f"Inserting SkyMap {self.config.name} with hash={skyMapHash}")
            with butler.registry.transaction():
                skyMap.register(self.config.name, butler.registry)
                butler.registry.registerDatasetType(DatasetType(name=self.config.datasetTypeName,
                                                                dimensions=["skymap"],
                                                                storageClass="SkyMap",
                                                                universe=butler.registry.dimensions))
                butler.put(skyMap, self.config.datasetTypeName, {"skymap": self.config.name})

        return pipeBase.Struct(
            skyMap=skyMap
        )
Example #13
0
 def assertGetComponents(self, butler, datasetTypeName, dataId, components,
                         reference):
     for component in components:
         compTypeName = DatasetType.nameWithComponent(
             datasetTypeName, component)
         result = butler.get(compTypeName, dataId)
         self.assertEqual(result, getattr(reference, component))
Example #14
0
 def refresh(self) -> None:
     # Docstring inherited from DatasetRecordStorageManager.
     byName = {}
     byId = {}
     c = self._static.dataset_type.columns
     for row in self._db.query(self._static.dataset_type.select()).fetchall():
         name = row[c.name]
         dimensions = self._dimensions.loadDimensionGraph(row[c.dimensions_key])
         calibTableName = row[c.calibration_association_table]
         datasetType = DatasetType(name, dimensions, row[c.storage_class],
                                   isCalibration=(calibTableName is not None))
         tags = self._db.getExistingTable(row[c.tag_association_table],
                                          makeTagTableSpec(datasetType, type(self._collections)))
         if calibTableName is not None:
             calibs = self._db.getExistingTable(row[c.calibration_association_table],
                                                makeCalibTableSpec(datasetType, type(self._collections),
                                                                   self._db.getTimespanRepresentation()))
         else:
             calibs = None
         storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType,
                                                    static=self._static, summaries=self._summaries,
                                                    tags=tags, calibs=calibs,
                                                    dataset_type_id=row["id"],
                                                    collections=self._collections)
         byName[datasetType.name] = storage
         byId[storage._dataset_type_id] = storage
     self._byName = byName
     self._byId = byId
     self._summaries.refresh(lambda dataset_type_id: self._byId[dataset_type_id].datasetType)
Example #15
0
    def testRegistryWithStorageClass(self):
        """Test that the registry can be given a StorageClass object.
        """
        formatterTypeName = "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter"
        storageClassName = "TestClass"
        sc = StorageClass(storageClassName, dict, None)

        universe = DimensionUniverse.fromConfig()
        datasetType = DatasetType("calexp", universe.extract([]), sc)

        # Store using an instance
        self.factory.registerFormatter(sc, formatterTypeName)

        # Retrieve using the class
        f = self.factory.getFormatter(sc, self.fileDescriptor)
        self.assertIsFormatter(f)
        self.assertEqual(f.fileDescriptor, self.fileDescriptor)

        # Retrieve using the DatasetType
        f2 = self.factory.getFormatter(datasetType, self.fileDescriptor)
        self.assertIsFormatter(f2)
        self.assertEqual(f.name(), f2.name())

        # Class directly
        f2cls = self.factory.getFormatterClass(datasetType)
        self.assertIsFormatter(f2cls)

        # This might defer the import, pytest may have already loaded it
        from lsst.daf.butler.formatters.yamlFormatter import YamlFormatter
        self.assertEqual(type(f), YamlFormatter)

        with self.assertRaises(KeyError):
            # Attempt to overwrite using a different value
            self.factory.registerFormatter(storageClassName,
                                           "lsst.daf.butler.formatters.jsonFormatter.JsonFormatter")
Example #16
0
 def __init__(self,
              *,
              datasetTypeName: str,
              template: str,
              keys: Dict[str, type],
              storageClass: StorageClass,
              universe: DimensionUniverse,
              formatter: FormatterParameter,
              translatorFactory: TranslatorFactory,
              targetHandler: Optional[PathElementHandler] = None,
              **kwargs: Any):
     # strip off [%HDU] identifiers from e.g. DECAM Community Pipeline
     # products
     template = template.split('[%(')[0]
     super().__init__(template=template, keys=keys)
     self._translator = translatorFactory.makeMatching(
         datasetTypeName, keys, **kwargs)
     self.datasetType = DatasetType(
         datasetTypeName,
         dimensions=self._translator.dimensionNames,
         storageClass=storageClass,
         universe=universe,
         isCalibration=("calibDate" in keys))
     self._formatter = formatter
     if targetHandler is None:
         targetHandler = TargetFileHandler
     self._handler = targetHandler
    def testHealSparseMapFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("map", [],
                                  "HealSparseMap",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        ref = butler.put(self.hspMap, datasetType)
        uri = butler.getURI(ref)
        self.assertEqual(uri.getExtension(), '.hsp')

        # Retrieve the full map.
        hspMap = butler.get('map')
        self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map))

        # Retrieve the coverage map
        coverage = butler.get('map.coverage')
        self.assertTrue(
            np.all(coverage.coverage_mask == self.hspMap.coverage_mask))

        # Retrieve a partial map
        pixels = [0, 6]
        partialMap = butler.get('map', parameters={'pixels': pixels})

        self.assertTrue(
            np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels)))
        self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000]))
        self.assertTrue(
            np.all(partialMap[100000:110000] == self.hspMap[100000:110000]))

        # Retrieve a degraded map
        degradedMapRead = butler.get('map', parameters={'degrade_nside': 512})
        degradedMap = self.hspMap.degrade(512)

        self.assertTrue(
            np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
Example #18
0
    def makeTask(self, taskClass, config, overrides, butler):
        """Create new PipelineTask instance from its class.

        Parameters
        ----------
        taskClass : type
            PipelineTask class.
        config : `pex.Config` or None
            Configuration object, if ``None`` then use task-defined
            configuration class to create new instance.
        overrides : `ConfigOverrides` or None
            Configuration overrides, this should contain all overrides to be
            applied to a default task config, including instrument-specific,
            obs-package specific, and possibly command-line overrides.
        butler : `lsst.daf.butler.Butler` or None
            Butler instance used to obtain initialization inputs for
            PipelineTasks.  If None, some PipelineTasks will not be usable

        Returns
        -------
        Instance of a PipelineTask class or None on errors.

        Raises
        ------
        Any exceptions that are raised by PipelineTask constructor or its
        configuration class are propagated back to caller.
        """

        # configuration
        if config is None:
            config = taskClass.ConfigClass()
            if overrides:
                overrides.applyTo(config)
        elif overrides is not None:
            _LOG.warning(
                "Both config and overrides are specified for task %s, overrides are ignored",
                taskClass.__name__)

        # if we don't have a butler, try to construct without initInputs;
        # let PipelineTasks raise if that's impossible
        if butler is None:
            initInputs = None
        else:
            connections = config.connections.ConnectionsClass(config=config)
            descriptorMap = {}
            for name in connections.initInputs:
                attribute = getattr(connections, name)
                dsType = DatasetType(attribute.name,
                                     butler.registry.dimensions.extract(set()),
                                     attribute.storageClass)
                descriptorMap[name] = dsType
            initInputs = {k: butler.get(v) for k, v in descriptorMap.items()}

        # Freeze the config
        config.freeze()

        # make task instance
        task = taskClass(config=config, initInputs=initInputs)

        return task
Example #19
0
def getInitInputs(butler: Butler,
                  config: PipelineTaskConfig) -> Dict[str, Any]:
    """Return the initInputs object that would have been passed to a
    `~lsst.pipe.base.PipelineTask` constructor.

    Parameters
    ----------
    butler : `lsst.daf.butler.Butler`
        The repository to search for input datasets. Must have
        pre-configured collections.
    config : `lsst.pipe.base.PipelineTaskConfig`
        The config for the task to be constructed.

    Returns
    -------
    initInputs : `dict` [`str`]
        A dictionary of objects in the format of the ``initInputs`` parameter
        to `lsst.pipe.base.PipelineTask`.
    """
    connections = config.connections.ConnectionsClass(config=config)
    initInputs = {}
    for name in connections.initInputs:
        attribute = getattr(connections, name)
        # Get full dataset type to check for consistency problems
        dsType = DatasetType(attribute.name,
                             butler.registry.dimensions.extract(set()),
                             attribute.storageClass)
        # All initInputs have empty data IDs
        initInputs[name] = butler.get(dsType)

    return initInputs
Example #20
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     # Does not have to be a random image
     pyplot.imshow([
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
     ])
     ref = butler.put(pyplot.gcf(), datasetType)
     uri = butler.getURI(ref)
     # The test after this will not work if we don't have local file
     self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}")
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.pruneDatasets([ref], unstore=True, purge=True)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
Example #21
0
def put_values(repo, visit, detector, instrument, out_collection,
               ra=None, dec=None, size=None, filename=None):
    butler = Butler(repo, writeable=True, run=out_collection)
    # This doesn't strictly need to be done every time,
    # but doesn't seem to hurt if the
    # dataset type already exists
    position_dataset_type = DatasetType('cutout_positions', dimensions=['visit', 'detector', 'instrument'],
                                        universe=butler.registry.dimensions,
                                        storageClass='AstropyQTable')
    butler.registry.registerDatasetType(position_dataset_type)

    if filename:
        poslist = numpy.genfromtxt(filename, dtype=None, delimiter=',')
    else:
        poslist = [(ra, dec, size), ]
    ident = []
    pos = []
    size = []
    for i, rec in enumerate(poslist):
        pt = SkyCoord(rec[0], rec[1], frame='icrs', unit=u.deg)
        pos.append(pt)
        ident.append(i*u.dimensionless_unscaled)
        size.append(float(rec[2])*u.dimensionless_unscaled)
    out_table = QTable([ident, pos, size], names=['id', 'position', 'size'])
    butler.put(out_table, 'cutout_positions', visit=visit, detector=detector, instrument=instrument)
Example #22
0
 def run(self, butler):
     """Construct and optionally save a SkyMap into a gen3 repository
     Parameters
     ----------
     butler : `lsst.daf.butler.Butler`
         Butler repository to which the new skymap will be written
     """
     skyMap = self.config.skyMap.apply()
     skyMap.logSkyMapInfo(self.log)
     skyMapHash = skyMap.getSha1()
     self.log.info(
         f"Inserting SkyMap {self.config.name} with hash={skyMapHash}")
     with butler.registry.transaction():
         try:
             skyMap.register(self.config.name, butler.registry)
         except IntegrityError as err:
             raise RuntimeError(
                 "A skymap with the same name or hash already exists."
             ) from err
         butler.registry.registerDatasetType(
             DatasetType(name=self.config.datasetTypeName,
                         dimensions=["skymap"],
                         storageClass="SkyMap",
                         universe=butler.registry.dimensions))
         butler.put(skyMap, self.config.datasetTypeName,
                    {"skymap": self.config.name})
     return pipeBase.Struct(skyMap=skyMap)
Example #23
0
 def refresh(self, *, universe: DimensionUniverse) -> None:
     # Docstring inherited from DatasetRecordStorageManager.
     byName = {}
     byId = {}
     c = self._static.dataset_type.columns
     for row in self._db.query(
             self._static.dataset_type.select()).fetchall():
         name = row[c.name]
         dimensions = DimensionGraph.decode(row[c.dimensions_encoded],
                                            universe=universe)
         datasetType = DatasetType(name, dimensions, row[c.storage_class])
         dynamic = self._db.getExistingTable(
             makeDynamicTableName(datasetType),
             makeDynamicTableSpec(datasetType, type(self._collections)))
         storage = ByDimensionsDatasetRecordStorage(
             db=self._db,
             datasetType=datasetType,
             static=self._static,
             dynamic=dynamic,
             dataset_type_id=row["id"],
             collections=self._collections)
         byName[datasetType.name] = storage
         byId[storage._dataset_type_id] = storage
     self._byName = byName
     self._byId = byId
Example #24
0
    def testCollections(self):
        registry = self.makeRegistry()
        storageClass = StorageClass("testCollections")
        registry.storageClasses.registerStorageClass(storageClass)
        datasetType = DatasetType(name="dummytype",
                                  dimensions=registry.dimensions.extract(
                                      ("instrument", "visit")),
                                  storageClass=storageClass)
        registry.registerDatasetType(datasetType)
        if not registry.limited:
            registry.addDimensionEntry("instrument",
                                       {"instrument": "DummyCam"})
            registry.addDimensionEntry("physical_filter", {
                "instrument": "DummyCam",
                "physical_filter": "d-r"
            })
            registry.addDimensionEntry("visit", {
                "instrument": "DummyCam",
                "visit": 0,
                "physical_filter": "d-r"
            })
            registry.addDimensionEntry("visit", {
                "instrument": "DummyCam",
                "visit": 1,
                "physical_filter": "d-r"
            })
        collection = "ingest"
        run = registry.makeRun(collection=collection)
        # Dataset.physical_filter should be populated as well here from the
        # visit Dimension values, if the Registry isn't limited.
        dataId1 = {"instrument": "DummyCam", "visit": 0}
        if registry.limited:
            dataId1.update(physical_filter="d-r", abstract_filter=None)
        inputRef1 = registry.addDataset(datasetType, dataId=dataId1, run=run)
        dataId2 = {"instrument": "DummyCam", "visit": 1}
        if registry.limited:
            dataId2.update(physical_filter="d-r", abstract_filter=None)
        inputRef2 = registry.addDataset(datasetType, dataId=dataId2, run=run)
        # We should be able to find both datasets in their Run.collection
        outputRef = registry.find(run.collection, datasetType, dataId1)
        self.assertEqual(outputRef, inputRef1)
        outputRef = registry.find(run.collection, datasetType, dataId2)
        self.assertEqual(outputRef, inputRef2)
        # and with the associated collection
        newCollection = "something"
        registry.associate(newCollection, [inputRef1, inputRef2])
        outputRef = registry.find(newCollection, datasetType, dataId1)
        self.assertEqual(outputRef, inputRef1)
        outputRef = registry.find(newCollection, datasetType, dataId2)
        self.assertEqual(outputRef, inputRef2)
        # but no more after disassociation
        registry.disassociate(newCollection, [
            inputRef1,
        ])
        self.assertIsNone(registry.find(newCollection, datasetType, dataId1))
        outputRef = registry.find(newCollection, datasetType, dataId2)
        self.assertEqual(outputRef, inputRef2)

        collections = registry.getAllCollections()
        self.assertEqual(collections, {"something", "ingest"})
Example #25
0
 def prep(self):
     # Docstring inherited from RepoConverter.
     self.task.log.info(f"Looking for skymaps in root {self.root}.")
     for coaddName, datasetTypeName in SKYMAP_DATASET_TYPES.items():
         if not self.task.isDatasetTypeIncluded(datasetTypeName):
             continue
         try:
             exists = self.butler2.datasetExists(datasetTypeName)
         except AttributeError:
             # This mapper doesn't even define this dataset type.
             continue
         if not exists:
             continue
         instance = self.butler2.get(datasetTypeName)
         name = self.task.useSkyMap(instance, datasetTypeName)
         datasetType = DatasetType(datasetTypeName,
                                   dimensions=["skymap"],
                                   storageClass="SkyMap",
                                   universe=self.task.universe)
         dataId = DataCoordinate.standardize(skymap=name,
                                             universe=self.task.universe)
         struct = FoundSkyMap(name=name,
                              instance=instance,
                              coaddName=coaddName,
                              ref=DatasetRef(datasetType, dataId),
                              filename=self.butler2.getUri(datasetTypeName))
         self._foundSkyMapsByCoaddName[coaddName] = struct
         self.task.log.info("Found skymap %s in %s in %s.", name,
                            datasetTypeName, self.root)
     super().prep()
Example #26
0
    def _makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None,
                        conform=True):
        # helper for makeDatasetRef

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = StorageClass("component") if componentName else None

        datasetType = DatasetType(datasetTypeName, dimensions, storageClass,
                                  parentStorageClass=parentStorageClass)
        if id is None:
            self.id += 1
            id = self.id
        if run is None:
            run = "dummy"
        return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
Example #27
0
    def makeDatasetType(
        self, universe: DimensionUniverse, parentStorageClass: Optional[Union[StorageClass, str]] = None
    ) -> DatasetType:
        """Construct a true `DatasetType` instance with normalized dimensions.

        Parameters
        ----------
        universe : `lsst.daf.butler.DimensionUniverse`
            Set of all known dimensions to be used to normalize the dimension
            names specified in config.
        parentStorageClass : `lsst.daf.butler.StorageClass` or `str`, optional
            Parent storage class for component datasets; `None` otherwise.

        Returns
        -------
        datasetType : `DatasetType`
            The `DatasetType` defined by this connection.
        """
        return DatasetType(
            self.name,
            universe.extract(self.dimensions),
            self.storageClass,
            isCalibration=self.isCalibration,
            parentStorageClass=parentStorageClass,
        )
Example #28
0
 def makeDatasetRef(self, datasetTypeName, dataUnits, storageClass, dataId, id=None):
     """Make a DatasetType and wrap it in a DatasetRef for a test"""
     datasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
     if id is None:
         self.id += 1
         id = self.id
     return DatasetRef(datasetType, dataId, id=id)
Example #29
0
    def testMatplotlibFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("test_plot", [], "Plot",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        # Does not have to be a random image
        pyplot.imshow([self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       ])
        ref = butler.put(pyplot.gcf(), datasetType)
        uri = butler.getURI(ref)

        # Following test needs a local file
        with uri.as_local() as local:
            with tempfile.NamedTemporaryFile(suffix=".png") as file:
                pyplot.gcf().savefig(file.name)
                self.assertTrue(
                    filecmp.cmp(
                        local.ospath,
                        file.name,
                        shallow=True
                    )
                )
        self.assertTrue(butler.datasetExists(ref))
        with self.assertRaises(ValueError):
            butler.get(ref)
        butler.pruneDatasets([ref], unstore=True, purge=True)
        with self.assertRaises(LookupError):
            butler.datasetExists(ref)
Example #30
0
def make_dataset_type(butler, name, dimensions, storageClass):
    """Create a dataset type in a particular repository.

    Parameters
    ----------
    butler : `lsst.daf.butler.Butler`
        The repository to update.
    name : `str`
        The name of the dataset type.
    dimensions : `set` [`str`]
        The dimensions of the new dataset type.
    storageClass : `str`
        The storage class the dataset will use.

    Returns
    -------
    dataset_type : `lsst.daf.butler.DatasetType`
        The new type.

    Raises
    ------
    ValueError
        Raised if the dimensions or storage class are invalid.
    ConflictingDefinitionError
        Raised if another dataset type with the same name already exists.
    """
    dataset_type = DatasetType(name,
                               dimensions,
                               storageClass,
                               universe=butler.registry.dimensions)
    butler.registry.registerDatasetType(dataset_type)
    return dataset_type