Beispiel #1
0
    def testPickle(self):
        """Test pickle support.
        """
        storageClass = StorageClass("test_pickle")
        datasetTypeName = "test"
        dimensions = self.universe.extract(("instrument", "visit"))
        # Un-pickling requires that storage class is registered with factory.
        StorageClassFactory().registerStorageClass(storageClass)
        datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
        datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
        self.assertIsInstance(datasetTypeOut, DatasetType)
        self.assertEqual(datasetType.name, datasetTypeOut.name)
        self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
        self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
        self.assertIsNone(datasetTypeOut.parentStorageClass)
        self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
        self.assertFalse(datasetTypeOut.isCalibration())

        datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True)
        datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
        self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration())
        self.assertTrue(datasetTypeOut.isCalibration())

        # And again with a composite
        componentStorageClass = StorageClass("pickle_component")
        StorageClassFactory().registerStorageClass(componentStorageClass)
        componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
                                           dimensions, componentStorageClass,
                                           parentStorageClass=storageClass)
        datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
        self.assertIsInstance(datasetTypeOut, DatasetType)
        self.assertEqual(componentDatasetType.name, datasetTypeOut.name)
        self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names)
        self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass)
        self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass)
        self.assertEqual(datasetTypeOut.parentStorageClass.name,
                         storageClass.name)
        self.assertEqual(datasetTypeOut, componentDatasetType)

        # Now with a string and not a real storage class to test that
        # pickling doesn't force the StorageClass to be resolved
        componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
                                           dimensions, "StrangeComponent",
                                           parentStorageClass="UnknownParent")
        datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
        self.assertEqual(datasetTypeOut, componentDatasetType)
        self.assertEqual(datasetTypeOut._parentStorageClassName,
                         componentDatasetType._parentStorageClassName)

        # Now with a storage class that is created by the factory
        factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass")
        factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass")
        componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"),
                                           dimensions, factoryComponentStorageClassClass(),
                                           parentStorageClass=factoryStorageClassClass())
        datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType))
        self.assertEqual(datasetTypeOut, componentDatasetType)
        self.assertEqual(datasetTypeOut._parentStorageClassName,
                         componentDatasetType._parentStorageClassName)
Beispiel #2
0
    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry)
        if "root" not in self.config:
            raise ValueError("No root directory specified in configuration")

        # Name ourselves either using an explicit name or a name
        # derived from the (unexpanded) root
        if "name" in self.config:
            self.name = self.config["name"]
        else:
            self.name = "POSIXDatastore@{}".format(self.config["root"])

        # Support repository relocation in config
        self.root = replaceRoot(self.config["root"], butlerRoot)

        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)

        self.locationFactory = LocationFactory(self.root)
        self.formatterFactory = FormatterFactory()
        self.storageClassFactory = StorageClassFactory()

        # Now associate formatters with storage classes
        self.formatterFactory.registerFormatters(
            self.config["formatters"], universe=self.registry.dimensions)

        # Read the file naming templates
        self.templates = FileTemplates(self.config["templates"],
                                       universe=self.registry.dimensions)

        # And read the constraints list
        constraintsConfig = self.config.get("constraints")
        self.constraints = Constraints(constraintsConfig,
                                       universe=self.registry.dimensions)

        # Storage of paths and formatters, keyed by dataset_id
        types = {
            "path": str,
            "formatter": str,
            "storage_class": str,
            "file_size": int,
            "checksum": str,
            "dataset_id": int
        }
        lengths = {
            "path": 256,
            "formatter": 128,
            "storage_class": 64,
            "checksum": 128
        }
        self.records = DatabaseDict.fromConfig(self.config["records"],
                                               types=types,
                                               value=self.RecordTuple,
                                               key="dataset_id",
                                               lengths=lengths,
                                               registry=registry)
    def setUpClass(cls):
        """Create a new butler once only."""

        cls.storageClassFactory = StorageClassFactory()

        cls.root = tempfile.mkdtemp(dir=TESTDIR)

        data_ids = {
            "instrument": [INSTRUMENT_NAME],
            "detector": [0, 1, 2, 3, 4, 5],
            "exposure": [11, 22],
        }

        configURI = ButlerURI("resource://spherex/configs",
                              forceDirectory=True)
        butlerConfig = Config(configURI.join("butler.yaml"))
        # in-memory db is being phased out
        # butlerConfig["registry", "db"] = 'sqlite:///:memory:'
        cls.creatorButler = makeTestRepo(
            cls.root,
            data_ids,
            config=butlerConfig,
            dimensionConfig=configURI.join("dimensions.yaml"))
        for formatter in FORMATTERS:
            datasetTypeName, storageClassName = (formatter["dataset_type"],
                                                 formatter["storage_class"])
            storageClass = cls.storageClassFactory.getStorageClass(
                storageClassName)
            addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids),
                           storageClass)
Beispiel #4
0
 def setUpClass(cls):
     if lsst.afw.table is None:
         raise unittest.SkipTest("afw not available.")
     cls.testDir = os.path.dirname(__file__)
     cls.storageClassFactory = StorageClassFactory()
     cls.configFile = os.path.join(cls.testDir, "config/basic/butler.yaml")
     cls.storageClassFactory.addFromConfig(cls.configFile)
Beispiel #5
0
    def __init__(self, config=None, root=REPO_ROOT):

        self.root = root
        if config is None:
            config = self.root
        self.butlerConfig = ButlerConfig(config, searchPaths=searchPaths)
        StorageClassFactory().addFromConfig(self.butlerConfig)

        # Force the configuration directory to refer to the ci_hsc root
        self.butlerConfig.configDir = self.root
Beispiel #6
0
 def testConstructor2(self):
     """Test construction from StorageClass name.
     """
     datasetTypeName = "test"
     storageClass = StorageClass("test_constructor2")
     StorageClassFactory().registerStorageClass(storageClass)
     dimensions = self.universe.extract(("instrument", "visit"))
     datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2")
     self.assertEqual(datasetType.name, datasetTypeName)
     self.assertEqual(datasetType.storageClass, storageClass)
     self.assertEqual(datasetType.dimensions, dimensions)
Beispiel #7
0
 def _translateDatasetType(self, datasetType):
     if "_" in datasetType:
         if datasetType.endswith("_md"):
             return f"{datasetType[:-3]}.metadata"
         for component in StorageClassFactory().getStorageClass(
                 "Exposure").components:
             suffix = f"_{component}"
             if datasetType.endswith(suffix):
                 return "{}.{}".format(datasetType[:-len(suffix)],
                                       component)
     return datasetType
    def setUpClass(cls):
        # Storage Classes are fixed for all datastores in these tests
        scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
        cls.storageClassFactory = StorageClassFactory()
        cls.storageClassFactory.addFromConfig(scConfigFile)

        # Read the Datastore config so we can get the class
        # information (since we should not assume the constructor
        # name here, but rely on the configuration file itself)
        datastoreConfig = DatastoreConfig(cls.configFile)
        cls.datastoreType = doImport(datastoreConfig["cls"])
        cls.universe = DimensionUniverse.fromConfig()
Beispiel #9
0
 def testPickle(self):
     """Test pickle support.
     """
     storageClass = StorageClass("test_pickle")
     datasetTypeName = "test"
     dimensions = self.universe.extract(("instrument", "visit"))
     # Un-pickling requires that storage class is registered with factory.
     StorageClassFactory().registerStorageClass(storageClass)
     datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
     datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
     self.assertIsInstance(datasetTypeOut, DatasetType)
     self.assertEqual(datasetType.name, datasetTypeOut.name)
     self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names)
     self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)
Beispiel #10
0
    def setUpClass(cls):
        """Create a new butler once only."""

        cls.storageClassFactory = StorageClassFactory()

        cls.root = tempfile.mkdtemp(dir=TESTDIR)

        dataIds = {
            "instrument": ["DummyCam"],
            "physical_filter": ["d-r"],
            "visit": [42, 43, 44],
        }

        # Ensure that we test in a directory that will include some
        # metacharacters
        subdir = "sub?#dir"
        butlerRoot = os.path.join(cls.root, subdir)

        cls.creatorButler = makeTestRepo(butlerRoot,
                                         dataIds,
                                         config=Config.fromYaml(BUTLER_CONFIG))

        # Create dataset types used by the tests
        for datasetTypeName, storageClassName in (
            ("calexp", "ExposureF"),
            ("unknown", "ExposureCompositeF"),
            ("testCatalog", "SourceCatalog"),
            ("lossless", "ExposureF"),
            ("uncompressed", "ExposureF"),
            ("lossy", "ExposureF"),
        ):
            storageClass = cls.storageClassFactory.getStorageClass(
                storageClassName)
            addDatasetType(cls.creatorButler, datasetTypeName, set(dataIds),
                           storageClass)

        # And some dataset types that have no dimensions for easy testing
        for datasetTypeName, storageClassName in (
            ("ps", "PropertySet"),
            ("pl", "PropertyList"),
            ("pkg", "Packages"),
            ("config", "Config"),
        ):
            storageClass = cls.storageClassFactory.getStorageClass(
                storageClassName)
            addDatasetType(cls.creatorButler, datasetTypeName, {},
                           storageClass)
    def setUpClass(cls):
        if lsst.afw is None:
            raise unittest.SkipTest("afw not available.")

        # Base classes need to know where the test directory is
        cls.testDir = TESTDIR

        # Storage Classes are fixed for all datastores in these tests
        scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
        cls.storageClassFactory = StorageClassFactory()
        cls.storageClassFactory.addFromConfig(scConfigFile)

        # Read the Datastore config so we can get the class
        # information (since we should not assume the constructor
        # name here, but rely on the configuration file itself)
        datastoreConfig = DatastoreConfig(cls.configFile)
        cls.datastoreType = doImport(datastoreConfig["cls"])
Beispiel #12
0
def assertValidOutput(task, result):
    """Test that the output of a call to ``run`` conforms to its own
    connections.

    Parameters
    ----------
    task : `lsst.pipe.base.PipelineTask`
        The task whose connections need validation. This is a fully-configured
        task object to support features such as optional outputs.
    result : `lsst.pipe.base.Struct`
        A result object produced by calling ``task.run``.

    Raises
    -------
    AssertionError:
        Raised if ``result`` does not match what's expected from ``task's``
        connections.
    """
    connections = task.config.ConnectionsClass(config=task.config)
    recoveredOutputs = result.getDict()

    for name in connections.outputs:
        connection = connections.__getattribute__(name)
        # name
        try:
            output = recoveredOutputs[name]
        except KeyError:
            raise AssertionError(f"No such output: {name}")
        # multiple
        if connection.multiple:
            if not isinstance(output, collections.abc.Sequence):
                raise AssertionError(
                    f"Expected {name} to be a sequence, got {output} instead.")
        else:
            # use lazy evaluation to not use StorageClassFactory unless
            # necessary
            if isinstance(output, collections.abc.Sequence) \
                    and not issubclass(
                        StorageClassFactory().getStorageClass(connection.storageClass).pytype,
                        collections.abc.Sequence):
                raise AssertionError(
                    f"Expected {name} to be a single value, got {output} instead."
                )
Beispiel #13
0
    def __init__(self, config, registry=None, butlerRoot=None):
        super().__init__(config, registry)

        self.storageClassFactory = StorageClassFactory()

        # Name ourselves with the timestamp the datastore
        # was created.
        self.name = "InMemoryDatastore@{}".format(time.time())
        log.debug("Creating datastore %s", self.name)

        # Storage of datasets, keyed by dataset_id
        self.datasets = {}

        # Records is distinct in order to track concrete composite components
        # where we register multiple components for a single dataset.
        self.records = {}

        # And read the constraints list
        constraintsConfig = self.config.get("constraints")
        self.constraints = Constraints(constraintsConfig,
                                       universe=self.registry.dimensions)
Beispiel #14
0
def _assertAttributeMatchesConnection(obj: Any, attrName: str,
                                      connection: BaseConnection) -> None:
    """Test that an attribute on an object matches the specification given in
    a connection.

    Parameters
    ----------
    obj
        An object expected to contain the attribute ``attrName``.
    attrName : `str`
        The name of the attribute to be tested.
    connection : `lsst.pipe.base.connectionTypes.BaseConnection`
        The connection, usually some type of output, specifying ``attrName``.

    Raises
    ------
    AssertionError:
        Raised if ``obj.attrName`` does not match what's expected
        from ``connection``.
    """
    # name
    try:
        attrValue = obj.__getattribute__(attrName)
    except AttributeError:
        raise AssertionError(f"No such attribute on {obj!r}: {attrName}")
    # multiple
    if connection.multiple:
        if not isinstance(attrValue, collections.abc.Sequence):
            raise AssertionError(
                f"Expected {attrName} to be a sequence, got {attrValue!r} instead."
            )
    else:
        # use lazy evaluation to not use StorageClassFactory unless
        # necessary
        if isinstance(attrValue, collections.abc.Sequence) and not issubclass(
                StorageClassFactory().getStorageClass(
                    connection.storageClass).pytype, collections.abc.Sequence):
            raise AssertionError(
                f"Expected {attrName} to be a single value, got {attrValue!r} instead."
            )
 def setUpClass(cls):
     cls.testDir = os.path.dirname(__file__)
     cls.storageClassFactory = StorageClassFactory()
     cls.configFile = os.path.join(cls.testDir, "config/basic/butler.yaml")
     cls.storageClassFactory.addFromConfig(cls.configFile)
Beispiel #16
0
class QueryDatasetsTest(unittest.TestCase, ButlerTestHelper):

    mockFuncName = "lsst.daf.butler.cli.cmd.commands.script.queryDatasets"

    configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
    storageClassFactory = StorageClassFactory()

    @staticmethod
    def _queryDatasets(repo, glob=(), collections=(), where="", find_first=False, show_uri=False):
        return script.queryDatasets(repo, glob, collections, where, find_first, show_uri)

    def setUp(self):
        self.root = makeTestTempDir(TESTDIR)
        self.testRepo = MetricTestRepo(self.root,
                                       configFile=os.path.join(TESTDIR, "config/basic/butler.yaml"))

    def tearDown(self):
        removeTestTempDir(self.root)

    def testShowURI(self):
        """Test for expected output with show_uri=True."""
        tables = self._queryDatasets(repo=self.root, show_uri=True)

        expectedTables = (
            AstropyTable(array((
                ("test_metric_comp.data", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                 self.testRepo.butler.datastore.root.join(
                     "ingest/run/test_metric_comp.data/"
                     "test_metric_comp_v00000423_fDummyCamComp_data.yaml")),
                ("test_metric_comp.data", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                 self.testRepo.butler.datastore.root.join(
                     "ingest/run/test_metric_comp.data/"
                     "test_metric_comp_v00000424_fDummyCamComp_data.yaml")))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array((
                ("test_metric_comp.output", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                    self.testRepo.butler.datastore.root.join(
                        "ingest/run/test_metric_comp.output/"
                        "test_metric_comp_v00000423_fDummyCamComp_output.yaml")),
                ("test_metric_comp.output", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                    self.testRepo.butler.datastore.root.join(
                        "ingest/run/test_metric_comp.output/"
                        "test_metric_comp_v00000424_fDummyCamComp_output.yaml")))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array((
                ("test_metric_comp.summary", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                    self.testRepo.butler.datastore.root.join(
                        "ingest/run/test_metric_comp.summary/"
                        "test_metric_comp_v00000423_fDummyCamComp_summary.yaml")),
                ("test_metric_comp.summary", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                    self.testRepo.butler.datastore.root.join(
                        "ingest/run/test_metric_comp.summary/"
                        "test_metric_comp_v00000424_fDummyCamComp_summary.yaml")))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
        )

        self.assertAstropyTablesEqual(tables, expectedTables)

    def testNoShowURI(self):
        """Test for expected output without show_uri (default is False)."""
        tables = self._queryDatasets(repo=self.root)

        expectedTables = (
            AstropyTable(array((
                ("test_metric_comp", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423"),
                ("test_metric_comp", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424"))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system", "visit")
            ),
        )

        self.assertAstropyTablesEqual(tables, expectedTables)

    def testWhere(self):
        """Test using the where clause to reduce the number of rows returned.
        """
        tables = self._queryDatasets(repo=self.root, where="instrument='DummyCamComp' AND visit=423")

        expectedTables = (
            AstropyTable(array(
                ("test_metric_comp", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423")),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system", "visit"),
            ),
        )

        self.assertAstropyTablesEqual(tables, expectedTables)

    def testGlobDatasetType(self):
        """Test specifying dataset type."""
        # Create and register an additional DatasetType

        self.testRepo.butler.registry.insertDimensionData("visit",
                                                          {"instrument": "DummyCamComp", "id": 425,
                                                           "name": "fourtwentyfive", "physical_filter": "d-r",
                                                           "visit_system": 1})

        datasetType = addDatasetType(self.testRepo.butler,
                                     "alt_test_metric_comp",
                                     ("instrument", "visit"),
                                     "StructuredCompositeReadComp")

        self.testRepo.addDataset(dataId={"instrument": "DummyCamComp", "visit": 425}, datasetType=datasetType)

        # verify the new dataset type increases the number of tables found:
        tables = self._queryDatasets(repo=self.root)

        expectedTables = (
            AstropyTable(array((
                ("test_metric_comp", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423"),
                ("test_metric_comp", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424"))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system", "visit")
            ),
            AstropyTable(array((
                ("alt_test_metric_comp", "ingest/run", "3", "R", "DummyCamComp", "d-r", "1", "425"))),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system", "visit")
            )
        )

        self.assertAstropyTablesEqual(tables, expectedTables)

    def testFindFirstAndCollections(self):
        """Test the find-first option, and the collections option, since it
        is required for find-first."""

        # Add a new run, and add a dataset to shadow an existing dataset.
        self.testRepo.addDataset(run="foo",
                                 dataId={"instrument": "DummyCamComp", "visit": 424})

        # Verify that without find-first, duplicate datasets are returned
        tables = self._queryDatasets(repo=self.root,
                                     collections=["foo", "ingest/run"],
                                     show_uri=True)

        expectedTables = (
            AstropyTable(array(
                (
                    ("test_metric_comp.data", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.data/"
                            "test_metric_comp_v00000424_fDummyCamComp_data.yaml")),
                    ("test_metric_comp.data", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.data/"
                            "test_metric_comp_v00000423_fDummyCamComp_data.yaml")),
                    ("test_metric_comp.data", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.data/"
                            "test_metric_comp_v00000424_fDummyCamComp_data.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array(
                (
                    ("test_metric_comp.output", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.output/"
                            "test_metric_comp_v00000424_fDummyCamComp_output.yaml")),
                    ("test_metric_comp.output", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.output/"
                            "test_metric_comp_v00000423_fDummyCamComp_output.yaml")),
                    ("test_metric_comp.output", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.output/"
                            "test_metric_comp_v00000424_fDummyCamComp_output.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array(
                (
                    ("test_metric_comp.summary", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.summary/"
                            "test_metric_comp_v00000424_fDummyCamComp_summary.yaml")),
                    ("test_metric_comp.summary", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.summary/"
                            "test_metric_comp_v00000423_fDummyCamComp_summary.yaml")),
                    ("test_metric_comp.summary", "ingest/run", "2", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.summary/"
                            "test_metric_comp_v00000424_fDummyCamComp_summary.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
        )

        self.assertAstropyTablesEqual(tables, expectedTables)

        # Verify that with find first the duplicate dataset is eliminated and
        # the more recent dataset is returned.
        tables = self._queryDatasets(repo=self.root,
                                     collections=["foo", "ingest/run"],
                                     show_uri=True,
                                     find_first=True)

        expectedTables = (
            AstropyTable(array(
                (
                    ("test_metric_comp.data", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.data/test_metric_comp_v00000424_fDummyCamComp_data.yaml")),
                    ("test_metric_comp.data", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.data/"
                            "test_metric_comp_v00000423_fDummyCamComp_data.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array(
                (
                    ("test_metric_comp.output", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.output/"
                            "test_metric_comp_v00000424_fDummyCamComp_output.yaml")),
                    ("test_metric_comp.output", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.output/"
                            "test_metric_comp_v00000423_fDummyCamComp_output.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
            AstropyTable(array(
                (
                    ("test_metric_comp.summary", "foo", "3", "R", "DummyCamComp", "d-r", "1", "424",
                        self.testRepo.butler.datastore.root.join(
                            "foo/test_metric_comp.summary/"
                            "test_metric_comp_v00000424_fDummyCamComp_summary.yaml")),
                    ("test_metric_comp.summary", "ingest/run", "1", "R", "DummyCamComp", "d-r", "1", "423",
                        self.testRepo.butler.datastore.root.join(
                            "ingest/run/test_metric_comp.summary/"
                            "test_metric_comp_v00000423_fDummyCamComp_summary.yaml")),
                )),
                names=("type", "run", "id", "band", "instrument", "physical_filter", "visit_system",
                       "visit", "URI")),
        )

        self.assertAstropyTablesEqual(tables, expectedTables)
 def setUpClass(cls):
     cls.storageClassFactory = StorageClassFactory()
     cls.storageClassFactory.addFromConfig(cls.configFile)
Beispiel #18
0
class PosixDatastore(Datastore):
    """Basic POSIX filesystem backed Datastore.

    Attributes
    ----------
    config : `DatastoreConfig`
        Configuration used to create Datastore.
    registry : `Registry`
        `Registry` to use when recording the writing of Datasets.
    root : `str`
        Root directory of this `Datastore`.
    locationFactory : `LocationFactory`
        Factory for creating locations relative to this root.
    formatterFactory : `FormatterFactory`
        Factory for creating instances of formatters.
    storageClassFactory : `StorageClassFactory`
        Factory for creating storage class instances from name.
    templates : `FileTemplates`
        File templates that can be used by this `Datastore`.
    name : `str`
        Label associated with this Datastore.

    Parameters
    ----------
    config : `DatastoreConfig` or `str`
        Configuration.

    Raises
    ------
    ValueError
        If root location does not exist and ``create`` is `False` in the
        configuration.
    """

    defaultConfigFile = "datastores/posixDatastore.yaml"
    """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or
    absolute path. Can be None if no defaults specified.
    """

    RecordTuple = namedtuple(
        "PosixDatastoreRecord",
        ["formatter", "path", "storage_class", "checksum", "file_size"])

    @classmethod
    def setConfigRoot(cls, root, config, full, overwrite=True):
        """Set any filesystem-dependent config options for this Datastore to
        be appropriate for a new empty repository with the given root.

        Parameters
        ----------
        root : `str`
            Filesystem path to the root of the data repository.
        config : `Config`
            A `Config` to update. Only the subset understood by
            this component will be updated. Will not expand
            defaults.
        full : `Config`
            A complete config with all defaults expanded that can be
            converted to a `DatastoreConfig`. Read-only and will not be
            modified by this method.
            Repository-specific options that should not be obtained
            from defaults when Butler instances are constructed
            should be copied from ``full`` to ``config``.
        overwrite : `bool`, optional
            If `False`, do not modify a value in ``config`` if the value
            already exists.  Default is always to overwrite with the provided
            ``root``.

        Notes
        -----
        If a keyword is explicitly defined in the supplied ``config`` it
        will not be overridden by this method if ``overwrite`` is `False`.
        This allows explicit values set in external configs to be retained.
        """
        Config.updateParameters(DatastoreConfig,
                                config,
                                full,
                                toUpdate={"root": root},
                                toCopy=("cls", ("records", "table")),
                                overwrite=overwrite)

    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry)
        if "root" not in self.config:
            raise ValueError("No root directory specified in configuration")

        # Name ourselves either using an explicit name or a name
        # derived from the (unexpanded) root
        if "name" in self.config:
            self.name = self.config["name"]
        else:
            self.name = "POSIXDatastore@{}".format(self.config["root"])

        # Support repository relocation in config
        self.root = replaceRoot(self.config["root"], butlerRoot)

        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)

        self.locationFactory = LocationFactory(self.root)
        self.formatterFactory = FormatterFactory()
        self.storageClassFactory = StorageClassFactory()

        # Now associate formatters with storage classes
        self.formatterFactory.registerFormatters(
            self.config["formatters"], universe=self.registry.dimensions)

        # Read the file naming templates
        self.templates = FileTemplates(self.config["templates"],
                                       universe=self.registry.dimensions)

        # And read the constraints list
        constraintsConfig = self.config.get("constraints")
        self.constraints = Constraints(constraintsConfig,
                                       universe=self.registry.dimensions)

        # Storage of paths and formatters, keyed by dataset_id
        types = {
            "path": str,
            "formatter": str,
            "storage_class": str,
            "file_size": int,
            "checksum": str,
            "dataset_id": int
        }
        lengths = {
            "path": 256,
            "formatter": 128,
            "storage_class": 64,
            "checksum": 128
        }
        self.records = DatabaseDict.fromConfig(self.config["records"],
                                               types=types,
                                               value=self.RecordTuple,
                                               key="dataset_id",
                                               lengths=lengths,
                                               registry=registry)

    def __str__(self):
        return self.root

    def addStoredFileInfo(self, ref, info):
        """Record internal storage information associated with this
        `DatasetRef`

        Parameters
        ----------
        ref : `DatasetRef`
            The Dataset that has been stored.
        info : `StoredFileInfo`
            Metadata associated with the stored Dataset.
        """
        self.records[ref.id] = self.RecordTuple(
            formatter=info.formatter,
            path=info.path,
            storage_class=info.storageClass.name,
            checksum=info.checksum,
            file_size=info.size)

    def removeStoredFileInfo(self, ref):
        """Remove information about the file associated with this dataset.

        Parameters
        ----------
        ref : `DatasetRef`
            The Dataset that has been removed.
        """
        del self.records[ref.id]

    def getStoredFileInfo(self, ref):
        """Retrieve information associated with file stored in this
        `Datastore`.

        Parameters
        ----------
        ref : `DatasetRef`
            The Dataset that is to be queried.

        Returns
        -------
        info : `StoredFileInfo`
            Stored information about this file and its formatter.

        Raises
        ------
        KeyError
            Dataset with that id can not be found.
        """
        record = self.records.get(ref.id, None)
        if record is None:
            raise KeyError(
                "Unable to retrieve formatter associated with Dataset {}".
                format(ref.id))
        # Convert name of StorageClass to instance
        storageClass = self.storageClassFactory.getStorageClass(
            record.storage_class)
        return StoredFileInfo(record.formatter,
                              record.path,
                              storageClass,
                              checksum=record.checksum,
                              size=record.file_size)

    def exists(self, ref):
        """Check if the dataset exists in the datastore.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required dataset.

        Returns
        -------
        exists : `bool`
            `True` if the entity exists in the `Datastore`.
        """
        # Get the file information (this will fail if no file)
        try:
            storedFileInfo = self.getStoredFileInfo(ref)
        except KeyError:
            return False

        # Use the path to determine the location
        location = self.locationFactory.fromPath(storedFileInfo.path)
        return os.path.exists(location.path)

    def get(self, ref, parameters=None):
        """Load an InMemoryDataset from the store.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.
        parameters : `dict`
            `StorageClass`-specific parameters that specify, for example,
            a slice of the Dataset to be loaded.

        Returns
        -------
        inMemoryDataset : `object`
            Requested Dataset or slice thereof as an InMemoryDataset.

        Raises
        ------
        FileNotFoundError
            Requested dataset can not be retrieved.
        TypeError
            Return value from formatter has unexpected type.
        ValueError
            Formatter failed to process the dataset.
        """
        log.debug("Retrieve %s from %s with parameters %s", ref, self.name,
                  parameters)

        # Get file metadata and internal metadata
        try:
            storedFileInfo = self.getStoredFileInfo(ref)
        except KeyError:
            raise FileNotFoundError(
                "Could not retrieve Dataset {}".format(ref))

        # Use the path to determine the location
        location = self.locationFactory.fromPath(storedFileInfo.path)

        # Too expensive to recalculate the checksum on fetch
        # but we can check size and existence
        if not os.path.exists(location.path):
            raise FileNotFoundError(
                "Dataset with Id {} does not seem to exist at"
                " expected location of {}".format(ref.id, location.path))
        stat = os.stat(location.path)
        size = stat.st_size
        if size != storedFileInfo.size:
            raise RuntimeError(
                "Integrity failure in Datastore. Size of file {} ({}) does not"
                " match recorded size of {}".format(location.path, size,
                                                    storedFileInfo.size))

        # We have a write storage class and a read storage class and they
        # can be different for concrete composites.
        readStorageClass = ref.datasetType.storageClass
        writeStorageClass = storedFileInfo.storageClass

        # Check that the supplied parameters are suitable for the type read
        readStorageClass.validateParameters(parameters)

        # Is this a component request?
        component = ref.datasetType.component()

        formatter = getInstanceOf(storedFileInfo.formatter)
        formatterParams, assemblerParams = formatter.segregateParameters(
            parameters)
        try:
            result = formatter.read(FileDescriptor(
                location,
                readStorageClass=readStorageClass,
                storageClass=writeStorageClass,
                parameters=parameters),
                                    component=component)
        except Exception as e:
            raise ValueError(
                "Failure from formatter for Dataset {}: {}".format(ref.id, e))

        # Process any left over parameters
        if parameters:
            result = readStorageClass.assembler().handleParameters(
                result, assemblerParams)

        # Validate the returned data type matches the expected data type
        pytype = readStorageClass.pytype
        if pytype and not isinstance(result, pytype):
            raise TypeError(
                "Got type {} from formatter but expected {}".format(
                    type(result), pytype))

        return result

    @transactional
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.

        Notes
        -----
        If the datastore is configured to reject certain dataset types it
        is possible that the put will fail and raise a
        `DatasetTypeNotSupportedError`.  The main use case for this is to
        allow `ChainedDatastore` to put to multiple datastores without
        requiring that every datastore accepts the dataset.
        """
        datasetType = ref.datasetType
        storageClass = datasetType.storageClass

        # Sanity check
        if not isinstance(inMemoryDataset, storageClass.pytype):
            raise TypeError("Inconsistency between supplied object ({}) "
                            "and storage class type ({})".format(
                                type(inMemoryDataset), storageClass.pytype))

        # Confirm that we can accept this dataset
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        # Work out output file name
        try:
            template = self.templates.getTemplate(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find template for {ref}") from e

        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        try:
            formatter = self.formatterFactory.getFormatter(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find formatter for {ref}") from e

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                safeMakeDir(storageDir)

        # Write the file
        predictedFullPath = os.path.join(self.root,
                                         formatter.predictPath(location))

        if os.path.exists(predictedFullPath):
            raise FileExistsError(
                f"Cannot write file for ref {ref} as "
                f"output file {predictedFullPath} already exists")

        with self._transaction.undoWith("write", os.remove, predictedFullPath):
            path = formatter.write(
                inMemoryDataset,
                FileDescriptor(location, storageClass=storageClass))
            assert predictedFullPath == os.path.join(self.root, path)
            log.debug("Wrote file to %s", path)

        self.ingest(path, ref, formatter=formatter)

    @transactional
    def ingest(self, path, ref, formatter=None, transfer=None):
        """Add an on-disk file with the given `DatasetRef` to the store,
        possibly transferring it.

        The caller is responsible for ensuring that the given (or predicted)
        Formatter is consistent with how the file was written; `ingest` will
        in general silently ignore incorrect formatters (as it cannot
        efficiently verify their correctness), deferring errors until ``get``
        is first called on the ingested dataset.

        Parameters
        ----------
        path : `str`
            File path.  Treated as relative to the repository root if not
            absolute.
        ref : `DatasetRef`
            Reference to the associated Dataset.
        formatter : `Formatter` (optional)
            Formatter that should be used to retreive the Dataset.  If not
            provided, the formatter will be constructed according to
            Datastore configuration.
        transfer : str (optional)
            If not None, must be one of 'move', 'copy', 'hardlink', or
            'symlink' indicating how to transfer the file.  The new
            filename and location will be determined via template substitution,
            as with ``put``.  If the file is outside the datastore root, it
            must be transferred somehow.

        Raises
        ------
        RuntimeError
            Raised if ``transfer is None`` and path is outside the repository
            root.
        FileNotFoundError
            Raised if the file at ``path`` does not exist.
        FileExistsError
            Raised if ``transfer is not None`` but a file already exists at the
            location computed from the template.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.
        """

        # Confirm that we can accept this dataset
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        if formatter is None:
            formatter = self.formatterFactory.getFormatter(ref)

        fullPath = os.path.normpath(os.path.join(self.root, path))
        if not os.path.exists(fullPath):
            raise FileNotFoundError(
                "File at '{}' does not exist; note that paths to ingest are "
                "assumed to be relative to self.root unless they are absolute."
                .format(fullPath))

        if transfer is None:
            if os.path.isabs(path):
                absRoot = os.path.abspath(self.root)
                if os.path.commonpath([absRoot, path]) != absRoot:
                    raise RuntimeError(
                        "'{}' is not inside repository root '{}'".format(
                            path, self.root))
                path = os.path.relpath(path, absRoot)
            elif path.startswith(os.path.pardir):
                raise RuntimeError(
                    f"'{path}' is outside repository root '{self.root}'")
        else:
            template = self.templates.getTemplate(ref)
            location = self.locationFactory.fromPath(template.format(ref))
            newPath = formatter.predictPath(location)
            newFullPath = os.path.join(self.root, newPath)
            if os.path.exists(newFullPath):
                raise FileExistsError(
                    "File '{}' already exists".format(newFullPath))
            storageDir = os.path.dirname(newFullPath)
            if not os.path.isdir(storageDir):
                with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                    safeMakeDir(storageDir)
            if transfer == "move":
                with self._transaction.undoWith("move", shutil.move,
                                                newFullPath, fullPath):
                    shutil.move(fullPath, newFullPath)
            elif transfer == "copy":
                with self._transaction.undoWith("copy", os.remove,
                                                newFullPath):
                    shutil.copy(fullPath, newFullPath)
            elif transfer == "hardlink":
                with self._transaction.undoWith("hardlink", os.unlink,
                                                newFullPath):
                    os.link(fullPath, newFullPath)
            elif transfer == "symlink":
                with self._transaction.undoWith("symlink", os.unlink,
                                                newFullPath):
                    os.symlink(fullPath, newFullPath)
            else:
                raise NotImplementedError(
                    "Transfer type '{}' not supported.".format(transfer))
            path = newPath
            fullPath = newFullPath

        # Create Storage information in the registry
        checksum = self.computeChecksum(fullPath)
        stat = os.stat(fullPath)
        size = stat.st_size
        self.registry.addDatasetLocation(ref, self.name)

        # Associate this dataset with the formatter for later read.
        fileInfo = StoredFileInfo(formatter,
                                  path,
                                  ref.datasetType.storageClass,
                                  size=size,
                                  checksum=checksum)
        # TODO: this is only transactional if the DatabaseDict uses
        #       self.registry internally.  Probably need to add
        #       transactions to DatabaseDict to do better than that.
        self.addStoredFileInfo(ref, fileInfo)

        # Register all components with same information
        for compRef in ref.components.values():
            self.registry.addDatasetLocation(compRef, self.name)
            self.addStoredFileInfo(compRef, fileInfo)

    def getUri(self, ref, predict=False):
        """URI to the Dataset.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.
        predict : `bool`
            If `True`, allow URIs to be returned of datasets that have not
            been written.

        Returns
        -------
        uri : `str`
            URI string pointing to the Dataset within the datastore. If the
            Dataset does not exist in the datastore, and if ``predict`` is
            `True`, the URI will be a prediction and will include a URI
            fragment "#predicted".
            If the datastore does not have entities that relate well
            to the concept of a URI the returned URI string will be
            descriptive. The returned URI is not guaranteed to be obtainable.

        Raises
        ------
        FileNotFoundError
            A URI has been requested for a dataset that does not exist and
            guessing is not allowed.

        """
        # if this has never been written then we have to guess
        if not self.exists(ref):
            if not predict:
                raise FileNotFoundError(
                    "Dataset {} not in this datastore".format(ref))

            template = self.templates.getTemplate(ref)
            location = self.locationFactory.fromPath(
                template.format(ref) + "#predicted")
        else:
            # If this is a ref that we have written we can get the path.
            # Get file metadata and internal metadata
            storedFileInfo = self.getStoredFileInfo(ref)

            # Use the path to determine the location
            location = self.locationFactory.fromPath(storedFileInfo.path)

        return location.uri

    def remove(self, ref):
        """Indicate to the Datastore that a Dataset can be removed.

        .. warning::

            This method does not support transactions; removals are
            immediate, cannot be undone, and are not guaranteed to
            be atomic if deleting either the file or the internal
            database records fails.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required Dataset.

        Raises
        ------
        FileNotFoundError
            Attempt to remove a dataset that does not exist.
        """
        # Get file metadata and internal metadata

        try:
            storedFileInfo = self.getStoredFileInfo(ref)
        except KeyError:
            raise FileNotFoundError(
                "Requested dataset ({}) does not exist".format(ref))
        location = self.locationFactory.fromPath(storedFileInfo.path)
        if not os.path.exists(location.path):
            raise FileNotFoundError("No such file: {0}".format(location.uri))
        os.remove(location.path)

        # Remove rows from registries
        self.removeStoredFileInfo(ref)
        self.registry.removeDatasetLocation(self.name, ref)
        for compRef in ref.components.values():
            self.registry.removeDatasetLocation(self.name, compRef)
            self.removeStoredFileInfo(compRef)

    def transfer(self, inputDatastore, ref):
        """Retrieve a Dataset from an input `Datastore`,
        and store the result in this `Datastore`.

        Parameters
        ----------
        inputDatastore : `Datastore`
            The external `Datastore` from which to retreive the Dataset.
        ref : `DatasetRef`
            Reference to the required Dataset in the input data store.

        """
        assert inputDatastore is not self  # unless we want it for renames?
        inMemoryDataset = inputDatastore.get(ref)
        return self.put(inMemoryDataset, ref)

    def validateConfiguration(self, entities, logFailures=False):
        """Validate some of the configuration for this datastore.

        Parameters
        ----------
        entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
            Entities to test against this configuration.  Can be differing
            types.
        logFailures : `bool`, optional
            If `True`, output a log message for every validation error
            detected.

        Raises
        ------
        DatastoreValidationError
            Raised if there is a validation problem with a configuration.
            All the problems are reported in a single exception.

        Notes
        -----
        This method checks that all the supplied entities have valid file
        templates and also have formatters defined.
        """

        templateFailed = None
        try:
            self.templates.validateTemplates(entities, logFailures=logFailures)
        except FileTemplateValidationError as e:
            templateFailed = str(e)

        formatterFailed = []
        for entity in entities:
            try:
                self.formatterFactory.getFormatter(entity)
            except KeyError as e:
                formatterFailed.append(str(e))
                if logFailures:
                    log.fatal("Formatter failure: %s", e)

        if templateFailed or formatterFailed:
            messages = []
            if templateFailed:
                messages.append(templateFailed)
            if formatterFailed:
                messages.append(",".join(formatterFailed))
            msg = ";\n".join(messages)
            raise DatastoreValidationError(msg)

    def getLookupKeys(self):
        # Docstring is inherited from base class
        return self.templates.getLookupKeys() | self.formatterFactory.getLookupKeys() | \
            self.constraints.getLookupKeys()

    def validateKey(self, lookupKey, entity):
        # Docstring is inherited from base class
        # The key can be valid in either formatters or templates so we can
        # only check the template if it exists
        if lookupKey in self.templates:
            try:
                self.templates[lookupKey].validateTemplate(entity)
            except FileTemplateValidationError as e:
                raise DatastoreValidationError(e) from e

    @staticmethod
    def computeChecksum(filename, algorithm="blake2b", block_size=8192):
        """Compute the checksum of the supplied file.

        Parameters
        ----------
        filename : `str`
            Name of file to calculate checksum from.
        algorithm : `str`, optional
            Name of algorithm to use. Must be one of the algorithms supported
            by :py:class`hashlib`.
        block_size : `int`
            Number of bytes to read from file at one time.

        Returns
        -------
        hexdigest : `str`
            Hex digest of the file.
        """
        if algorithm not in hashlib.algorithms_guaranteed:
            raise NameError(
                "The specified algorithm '{}' is not supported by hashlib".
                format(algorithm))

        hasher = hashlib.new(algorithm)

        with open(filename, "rb") as f:
            for chunk in iter(lambda: f.read(block_size), b""):
                hasher.update(chunk)

        return hasher.hexdigest()
Beispiel #19
0
    def checkInstrumentWithRegistry(self, cls, testRaw):

        Butler.makeRepo(self.root)
        butler = Butler(self.root, run="tests")
        instrument = cls()
        scFactory = StorageClassFactory()

        # Check instrument class and metadata translator agree on
        # instrument name, using readRawFitsHeader to read the metadata.
        filename = os.path.join(DATAROOT, testRaw)
        md = readRawFitsHeader(filename, translator_class=cls.translatorClass)
        obsInfo = ObservationInfo(md,
                                  translator_class=cls.translatorClass,
                                  filename=filename)
        self.assertEqual(instrument.getName(), obsInfo.instrument)

        # Add Instrument, Detector, and PhysicalFilter entries to the
        # Butler Registry.
        instrument.register(butler.registry)

        # Define a DatasetType for the cameraGeom.Camera, which can be
        # accessed just by identifying its Instrument.
        # A real-world Camera DatasetType should be identified by a
        # validity range as well.
        cameraDatasetType = DatasetType(
            "camera",
            dimensions=["instrument"],
            storageClass=scFactory.getStorageClass("Camera"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(cameraDatasetType)

        # Define a DatasetType for cameraGeom.Detectors, which can be
        # accessed by identifying its Instrument and (Butler) Detector.
        # A real-world Detector DatasetType probably doesn't need to exist,
        # as  it would just duplicate information in the Camera, and
        # reading a full Camera just to get a single Detector should be
        # plenty efficient.
        detectorDatasetType = DatasetType(
            "detector",
            dimensions=["instrument", "detector"],
            storageClass=scFactory.getStorageClass("Detector"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(detectorDatasetType)

        # Put and get the Camera.
        dataId = dict(instrument=instrument.instrument)
        butler.put(instrument.getCamera(), "camera", dataId=dataId)
        camera = butler.get("camera", dataId)
        # Full camera comparisons are *slow*; just compare names.
        self.assertEqual(instrument.getCamera().getName(), camera.getName())

        # Put and get a random subset of the Detectors.
        allDetectors = list(instrument.getCamera())
        numDetectors = min(3, len(allDetectors))
        someDetectors = [
            allDetectors[i] for i in self.rng.choice(
                len(allDetectors), size=numDetectors, replace=False)
        ]
        for cameraGeomDetector in someDetectors:
            # Right now we only support integer detector IDs in data IDs;
            # support for detector names and groups (i.e. rafts) is
            # definitely planned but not yet implemented.
            dataId = dict(instrument=instrument.instrument,
                          detector=cameraGeomDetector.getId())
            butler.put(cameraGeomDetector, "detector", dataId=dataId)
            cameraGeomDetector2 = butler.get("detector", dataId=dataId)
            # Full detector comparisons are *slow*; just compare names and
            # serials.
            self.assertEqual(cameraGeomDetector.getName(),
                             cameraGeomDetector2.getName())
            self.assertEqual(cameraGeomDetector.getSerial(),
                             cameraGeomDetector2.getSerial())
Beispiel #20
0
 def getDatasetType(cls):
     """Return the DatasetType of the Datasets ingested by this Task.
     """
     return DatasetType("raw", ("Instrument", "Detector", "Exposure"),
                        StorageClassFactory().getStorageClass("Exposure"))
 def setUpClass(cls):
     if lsst.afw is None:
         raise unittest.SkipTest("afw not available.")
     cls.storageClassFactory = StorageClassFactory()
     cls.storageClassFactory.addFromConfig(cls.configFile)
    def testRegistry(self):
        """Check that storage classes can be created on the fly and stored
        in a registry."""
        className = "TestImage"
        factory = StorageClassFactory()
        newclass = StorageClass(className, pytype=PythonType)
        factory.registerStorageClass(newclass)
        sc = factory.getStorageClass(className)
        self.assertIsInstance(sc, StorageClass)
        self.assertEqual(sc.name, className)
        self.assertFalse(sc.components)
        self.assertEqual(sc.pytype, PythonType)
        self.assertIn(sc, factory)
        newclass2 = StorageClass("Temporary2", pytype=str)
        self.assertNotIn(newclass2, factory)
        factory.registerStorageClass(newclass2)
        self.assertIn(newclass2, factory)
        self.assertIn("Temporary2", factory)
        self.assertNotIn("Temporary3", factory)
        self.assertNotIn({}, factory)

        # Make sure we can't register a storage class with the same name
        # but different values
        newclass3 = StorageClass("Temporary2", pytype=dict)
        with self.assertRaises(ValueError):
            factory.registerStorageClass(newclass3)

        factory._unregisterStorageClass(newclass3.name)
        self.assertNotIn(newclass3, factory)
        self.assertNotIn(newclass3.name, factory)
        factory.registerStorageClass(newclass3)
        self.assertIn(newclass3, factory)
        self.assertIn(newclass3.name, factory)

        # Check you can silently insert something that is already there
        factory.registerStorageClass(newclass3)
Beispiel #23
0
class QueryDimensionRecordsTest(unittest.TestCase, ButlerTestHelper):

    mockFuncName = "lsst.daf.butler.cli.cmd.commands.script.queryDimensionRecords"

    configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
    storageClassFactory = StorageClassFactory()

    expectedColumnNames = ("instrument", "id", "physical_filter",
                           "visit_system", "name", "day_obs", "exposure_time",
                           "target_name", "observation_reason",
                           "science_program", "zenith_angle", "region",
                           "timespan [2]")

    def setUp(self):
        self.root = makeTestTempDir(TESTDIR)
        self.testRepo = MetricTestRepo(self.root,
                                       configFile=os.path.join(
                                           TESTDIR,
                                           "config/basic/butler.yaml"))
        self.runner = LogCliRunner()

    def tearDown(self):
        removeTestTempDir(self.root)

    def testBasic(self):
        result = self.runner.invoke(
            butlerCli, ["query-dimension-records", self.root, "visit"])
        self.assertEqual(result.exit_code, 0, clickResultMsg(result))
        rows = array(
            (("DummyCamComp", "423", "d-r", "1", "fourtwentythree", "None",
              "None", "None", "None", "None", "None", "None", "None .. None"),
             ("DummyCamComp", "424", "d-r", "1", "fourtwentyfour", "None",
              "None", "None", "None", "None", "None", "None", "None .. None")))
        expected = AstropyTable(rows, names=self.expectedColumnNames)
        self.assertAstropyTablesEqual(readTable(result.output), expected)

    def testWhere(self):
        result = self.runner.invoke(butlerCli, [
            "query-dimension-records", self.root, "visit", "--where",
            "instrument='DummyCamComp' AND visit.name='fourtwentythree'"
        ])
        self.assertEqual(result.exit_code, 0, clickResultMsg(result))
        rows = array((("DummyCamComp", "423", "d-r", "1", "fourtwentythree",
                       "None", "None", "None", "None", "None", "None", "None",
                       "None .. None"), ))
        expected = AstropyTable(rows, names=self.expectedColumnNames)
        self.assertAstropyTablesEqual(readTable(result.output), expected)

    def testCollection(self):

        butler = Butler(self.root, run="foo")

        # try replacing the testRepo's butler with the one with the "foo" run.
        self.testRepo.butler = butler

        self.testRepo.butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 425,
                "name": "fourtwentyfive",
                "physical_filter": "d-r",
                "visit_system": 1
            })
        self.testRepo.addDataset(dataId={
            "instrument": "DummyCamComp",
            "visit": 425
        },
                                 run="foo")

        # verify getting records from the "ingest/run" collection
        result = self.runner.invoke(butlerCli, [
            "query-dimension-records", self.root, "visit", "--collections",
            "ingest/run", "--datasets", "test_metric_comp"
        ])
        self.assertEqual(result.exit_code, 0, clickResultMsg(result))
        rows = array(
            (("DummyCamComp", "423", "d-r", "1", "fourtwentythree", "None",
              "None", "None", "None", "None", "None", "None", "None .. None"),
             ("DummyCamComp", "424", "d-r", "1", "fourtwentyfour", "None",
              "None", "None", "None", "None", "None", "None", "None .. None")))
        expected = AstropyTable(rows, names=self.expectedColumnNames)
        self.assertAstropyTablesEqual(readTable(result.output), expected)

        # verify getting records from the "foo" collection
        result = self.runner.invoke(butlerCli, [
            "query-dimension-records", self.root, "visit", "--collections",
            "foo", "--datasets", "test_metric_comp"
        ])
        self.assertEqual(result.exit_code, 0, clickResultMsg(result))
        rows = array((("DummyCamComp", "425", "d-r", "1", "fourtwentyfive",
                       "None", "None", "None", "None", "None", "None", "None",
                       "None .. None"), ))
        expected = AstropyTable(rows, names=self.expectedColumnNames)
        self.assertAstropyTablesEqual(readTable(result.output), expected)
    def testFactoryConfig(self):
        factory = StorageClassFactory()
        factory.addFromConfig(StorageClassConfig())
        image = factory.getStorageClass("Image")
        imageF = factory.getStorageClass("ImageF")
        self.assertIsInstance(imageF, type(image))
        self.assertNotEqual(imageF, image)

        # Check component inheritance
        exposure = factory.getStorageClass("Exposure")
        exposureF = factory.getStorageClass("ExposureF")
        self.assertIsInstance(exposureF, type(exposure))
        self.assertIsInstance(exposure.components["image"], type(image))
        self.assertNotIsInstance(exposure.components["image"], type(imageF))
        self.assertIsInstance(exposureF.components["image"], type(image))
        self.assertIsInstance(exposureF.components["image"], type(imageF))
        self.assertIn("wcs", exposure.components)
        self.assertIn("wcs", exposureF.components)

        # Check parameters
        factory.addFromConfig(
            os.path.join(TESTDIR, "config", "basic", "storageClasses.yaml"))
        thing1 = factory.getStorageClass("ThingOne")
        thing2 = factory.getStorageClass("ThingTwo")
        self.assertIsInstance(thing2, type(thing1))
        param1 = thing1.parameters
        param2 = thing2.parameters
        self.assertIn("param3", thing2.parameters)
        self.assertNotIn("param3", thing1.parameters)
        param2.remove("param3")
        self.assertEqual(param1, param2)

        # Check that we can't have a new StorageClass that does not
        # inherit from StorageClass
        with self.assertRaises(ValueError):
            factory.makeNewStorageClass("ClassName",
                                        baseClass=StorageClassFactory)

        sc = factory.makeNewStorageClass("ClassName")
        self.assertIsInstance(sc(), StorageClass)
Beispiel #25
0
 def setUpClass(cls):
     for name in ("SCA", "SCB", "SCC", "SCX", "SCY"):
         StorageClassFactory().registerStorageClass(StorageClass(name))
Beispiel #26
0
class CliRetrieveArtifactsTest(unittest.TestCase, ButlerTestHelper):

    configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
    storageClassFactory = StorageClassFactory()

    def setUp(self):
        self.root = makeTestTempDir(TESTDIR)
        self.testRepo = MetricTestRepo(self.root, configFile=self.configFile)

    def tearDown(self):
        removeTestTempDir(self.root)

    @staticmethod
    def find_files(root: str) -> List[ButlerURI]:
        return list(ButlerURI.findFileResources([root]))

    def testRetrieveAll(self):
        runner = LogCliRunner()
        with runner.isolated_filesystem():

            # When preserving the path the run will be in the directory along
            # with a . in the component name.  When not preserving paths the
            # filename will have an underscore rather than dot.
            for counter, (preserve_path, prefix) in enumerate(
                (("--preserve-path", "ingest/run/test_metric_comp."),
                 ("--no-preserve-path", "test_metric_comp_"))):
                destdir = f"tmp{counter}/"
                result = runner.invoke(
                    cli,
                    ["retrieve-artifacts", self.root, destdir, preserve_path])
                self.assertEqual(result.exit_code, 0, clickResultMsg(result))
                self.assertTrue(result.stdout.endswith(": 6\n"),
                                f"Expected 6 got: {result.stdout}")

                artifacts = self.find_files(destdir)
                self.assertEqual(len(artifacts), 6,
                                 f"Expected 6 artifacts: {artifacts}")
                self.assertIn(f"{destdir}{prefix}", str(artifacts[1]))

    def testRetrieveSubset(self):
        runner = LogCliRunner()
        with runner.isolated_filesystem():
            destdir = "tmp1/"
            result = runner.invoke(cli, [
                "retrieve-artifacts", self.root, destdir, "--where",
                "instrument='DummyCamComp' AND visit=423"
            ])
            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
            self.assertTrue(result.stdout.endswith(": 3\n"),
                            f"Expected 3 got: {result.stdout}")
            artifacts = self.find_files(destdir)
            self.assertEqual(len(artifacts), 3,
                             f"Expected 3 artifacts: {artifacts}")

    def testClobber(self):
        runner = LogCliRunner()
        with runner.isolated_filesystem():
            destdir = "tmp2/"
            result = runner.invoke(cli,
                                   ["retrieve-artifacts", self.root, destdir])
            self.assertEqual(result.exit_code, 0, clickResultMsg(result))

            # Running again should fail
            result = runner.invoke(cli,
                                   ["retrieve-artifacts", self.root, destdir])
            self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))

            # But with clobber should pass
            result = runner.invoke(
                cli, ["retrieve-artifacts", self.root, destdir, "--clobber"])
            self.assertEqual(result.exit_code, 0, clickResultMsg(result))