Esempio n. 1
0
def put_values(repo, visit, detector, instrument, out_collection,
               ra=None, dec=None, size=None, filename=None):
    butler = Butler(repo, writeable=True, run=out_collection)
    # This doesn't strictly need to be done every time,
    # but doesn't seem to hurt if the
    # dataset type already exists
    position_dataset_type = DatasetType('cutout_positions', dimensions=['visit', 'detector', 'instrument'],
                                        universe=butler.registry.dimensions,
                                        storageClass='AstropyQTable')
    butler.registry.registerDatasetType(position_dataset_type)

    if filename:
        poslist = numpy.genfromtxt(filename, dtype=None, delimiter=',')
    else:
        poslist = [(ra, dec, size), ]
    ident = []
    pos = []
    size = []
    for i, rec in enumerate(poslist):
        pt = SkyCoord(rec[0], rec[1], frame='icrs', unit=u.deg)
        pos.append(pt)
        ident.append(i*u.dimensionless_unscaled)
        size.append(float(rec[2])*u.dimensionless_unscaled)
    out_table = QTable([ident, pos, size], names=['id', 'position', 'size'])
    butler.put(out_table, 'cutout_positions', visit=visit, detector=detector, instrument=instrument)
    def runExposureCompositePutGetTest(self, storageClass, datasetTypeName):
        example = os.path.join(TESTDIR, "data", "basic", "small.fits")
        exposure = lsst.afw.image.ExposureF(example)
        butler = Butler(self.tmpConfigFile)
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        self.registerDatasetTypes(datasetTypeName, dimensions, storageClass,
                                  butler.registry)
        dataId = {
            "visit": 42,
            "instrument": "DummyCam",
            "physical_filter": "d-r"
        }
        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCam"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCam",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCam",
            "visit": 42,
            "physical_filter": "d-r"
        })
        butler.put(exposure, datasetTypeName, dataId)
        # Get the full thing
        butler.get(datasetTypeName, dataId)
        # TODO enable check for equality (fix for Exposure type)
        # self.assertEqual(full, exposure)
        # Get a component
        compsRead = {}
        for compName in ("wcs", "image", "mask", "coaddInputs", "psf"):
            compTypeName = DatasetType.nameWithComponent(
                datasetTypeName, compName)
            component = butler.get(compTypeName, dataId)
            # TODO enable check for component instance types
            # compRef = butler.registry.find(butler.run.collection,
            #                                f"calexp.{compName}", dataId)
            # self.assertIsInstance(component,
            #                       compRef.datasetType.storageClass.pytype)
            compsRead[compName] = component
        # Simple check of WCS
        bbox = lsst.afw.geom.Box2I(lsst.afw.geom.Point2I(0, 0),
                                   lsst.afw.geom.Extent2I(9, 9))
        self.assertWcsAlmostEqualOverBBox(compsRead["wcs"], exposure.getWcs(),
                                          bbox)

        # With parameters
        inBBox = Box2I(minimum=Point2I(0, 0), maximum=Point2I(3, 3))
        parameters = dict(bbox=inBBox, origin=LOCAL)
        subset = butler.get(datasetTypeName, dataId, parameters=parameters)
        outBBox = subset.getBBox()
        self.assertEqual(inBBox, outBBox)
Esempio n. 3
0
class PexConfigFormatterTestCase(unittest.TestCase):
    """Tests for PexConfigFormatter, using local file datastore."""
    def setUp(self):
        """Create a new butler root for each test."""
        self.root = makeTestTempDir(TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="test_run")
        # No dimensions in dataset type so we don't have to worry about
        # inserting dimension data or defining data IDs.
        self.datasetType = DatasetType(
            "config",
            dimensions=(),
            storageClass="Config",
            universe=self.butler.registry.dimensions)
        self.butler.registry.registerDatasetType(self.datasetType)

    def tearDown(self):
        removeTestTempDir(self.root)

    def testPexConfig(self) -> None:
        """Test that we can put and get pex_config Configs"""
        c = SimpleConfig(i=10, c="hello")
        self.assertEqual(c.i, 10)
        ref = self.butler.put(c, "config")
        butler_c = self.butler.get(ref)
        self.assertEqual(c, butler_c)
        self.assertIsInstance(butler_c, SimpleConfig)
Esempio n. 4
0
    def testBasicPutGet(self):
        butler = Butler(self.configFile)
        # Create and register a DatasetType
        datasetTypeName = "test_metric"
        dataUnits = ("Camera", "Visit")
        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredData")
        self.registerDatasetTypes(datasetTypeName, dataUnits, storageClass,
                                  butler.registry)

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"camera": "DummyCam", "visit": 42}
        ref = butler.put(metric, datasetTypeName, dataId)
        self.assertIsInstance(ref, DatasetRef)
        # Test getDirect
        metricOut = butler.getDirect(ref)
        self.assertEqual(metric, metricOut)
        # Test get
        metricOut = butler.get(datasetTypeName, dataId)
        self.assertEqual(metric, metricOut)

        # Check we can get components
        self.assertGetComponents(butler, datasetTypeName, dataId,
                                 ("summary", "data", "output"), metric)
    def testHealSparseMapFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("map", [],
                                  "HealSparseMap",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        ref = butler.put(self.hspMap, datasetType)
        uri = butler.getURI(ref)
        self.assertEqual(uri.getExtension(), '.hsp')

        # Retrieve the full map.
        hspMap = butler.get('map')
        self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map))

        # Retrieve the coverage map
        coverage = butler.get('map.coverage')
        self.assertTrue(
            np.all(coverage.coverage_mask == self.hspMap.coverage_mask))

        # Retrieve a partial map
        pixels = [0, 6]
        partialMap = butler.get('map', parameters={'pixels': pixels})

        self.assertTrue(
            np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels)))
        self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000]))
        self.assertTrue(
            np.all(partialMap[100000:110000] == self.hspMap[100000:110000]))

        # Retrieve a degraded map
        degradedMapRead = butler.get('map', parameters={'degrade_nside': 512})
        degradedMap = self.hspMap.degrade(512)

        self.assertTrue(
            np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
Esempio n. 6
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     # Does not have to be a random image
     pyplot.imshow([
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
     ])
     ref = butler.put(pyplot.gcf(), datasetType)
     uri = butler.getURI(ref)
     # The test after this will not work if we don't have local file
     self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}")
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.pruneDatasets([ref], unstore=True, purge=True)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
Esempio n. 7
0
    def testMatplotlibFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("test_plot", [], "Plot",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        # Does not have to be a random image
        pyplot.imshow([self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       ])
        ref = butler.put(pyplot.gcf(), datasetType)
        uri = butler.getURI(ref)

        # Following test needs a local file
        with uri.as_local() as local:
            with tempfile.NamedTemporaryFile(suffix=".png") as file:
                pyplot.gcf().savefig(file.name)
                self.assertTrue(
                    filecmp.cmp(
                        local.ospath,
                        file.name,
                        shallow=True
                    )
                )
        self.assertTrue(butler.datasetExists(ref))
        with self.assertRaises(ValueError):
            butler.get(ref)
        butler.pruneDatasets([ref], unstore=True, purge=True)
        with self.assertRaises(LookupError):
            butler.datasetExists(ref)
Esempio n. 8
0
    def testTransaction(self):
        butler = Butler(self.tmpConfigFile, run="ingest")
        datasetTypeName = "test_metric"
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        dimensionEntries = (("instrument", {
            "instrument": "DummyCam"
        }), ("physical_filter", {
            "instrument": "DummyCam",
            "name": "d-r",
            "abstract_filter": "R"
        }), ("visit", {
            "instrument": "DummyCam",
            "id": 42,
            "name": "fortytwo",
            "physical_filter": "d-r"
        }))
        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredData")
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCam", "visit": 42}
        with self.assertRaises(TransactionTestError):
            with butler.transaction():
                # Create and register a DatasetType
                datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                                  storageClass,
                                                  butler.registry)
                # Add needed Dimensions
                for args in dimensionEntries:
                    butler.registry.insertDimensionData(*args)
                # Store a dataset
                ref = butler.put(metric, datasetTypeName, dataId)
                self.assertIsInstance(ref, DatasetRef)
                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(datasetTypeName, dataId)
                self.assertEqual(metric, metricOut)
                # Check we can get components
                self.assertGetComponents(butler, ref,
                                         ("summary", "data", "output"), metric)
                raise TransactionTestError(
                    "This should roll back the entire transaction")

        with self.assertRaises(KeyError):
            butler.registry.getDatasetType(datasetTypeName)
        with self.assertRaises(LookupError):
            butler.registry.expandDataId(dataId)
        # Should raise KeyError for missing DatasetType
        with self.assertRaises(KeyError):
            butler.get(datasetTypeName, dataId)
        # Also check explicitly if Dataset entry is missing
        self.assertIsNone(
            butler.registry.find(butler.collection, datasetType, dataId))
        # Direct retrieval should not find the file in the Datastore
        with self.assertRaises(FileNotFoundError):
            butler.getDirect(ref)
 def testAstropyTableFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("table", [],
                               "AstropyTable",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     ref = butler.put(self.table, datasetType)
     uri = butler.getURI(ref)
     self.assertEqual(uri.getExtension(), '.ecsv')
     table = butler.get('table')
     self.assertTrue(numpy.all(table == self.table))
Esempio n. 10
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     pyplot.imshow(np.random.randn(3, 4))
     ref = butler.put(pyplot.gcf(), datasetType)
     parsed = urllib.parse.urlparse(butler.getUri(ref))
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.remove(ref)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
Esempio n. 11
0
    def checkInstrumentWithRegistry(self, cls, testRaw):

        Butler.makeRepo(self.root)
        butler = Butler(self.root, run="tests")
        instrument = cls()
        scFactory = StorageClassFactory()

        # Check instrument class and metadata translator agree on
        # instrument name, using readRawFitsHeader to read the metadata.
        filename = os.path.join(DATAROOT, testRaw)
        md = readRawFitsHeader(filename, translator_class=cls.translatorClass)
        obsInfo = ObservationInfo(md,
                                  translator_class=cls.translatorClass,
                                  filename=filename)
        self.assertEqual(instrument.getName(), obsInfo.instrument)

        # Add Instrument, Detector, and PhysicalFilter entries to the
        # Butler Registry.
        instrument.register(butler.registry)

        # Define a DatasetType for the cameraGeom.Camera, which can be
        # accessed just by identifying its Instrument.
        # A real-world Camera DatasetType should be identified by a
        # validity range as well.
        cameraDatasetType = DatasetType(
            "camera",
            dimensions=["instrument"],
            storageClass=scFactory.getStorageClass("Camera"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(cameraDatasetType)

        # Define a DatasetType for cameraGeom.Detectors, which can be
        # accessed by identifying its Instrument and (Butler) Detector.
        # A real-world Detector DatasetType probably doesn't need to exist,
        # as  it would just duplicate information in the Camera, and
        # reading a full Camera just to get a single Detector should be
        # plenty efficient.
        detectorDatasetType = DatasetType(
            "detector",
            dimensions=["instrument", "detector"],
            storageClass=scFactory.getStorageClass("Detector"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(detectorDatasetType)

        # Put and get the Camera.
        dataId = dict(instrument=instrument.instrument)
        butler.put(instrument.getCamera(), "camera", dataId=dataId)
        camera = butler.get("camera", dataId)
        # Full camera comparisons are *slow*; just compare names.
        self.assertEqual(instrument.getCamera().getName(), camera.getName())

        # Put and get a random subset of the Detectors.
        allDetectors = list(instrument.getCamera())
        numDetectors = min(3, len(allDetectors))
        someDetectors = [
            allDetectors[i] for i in self.rng.choice(
                len(allDetectors), size=numDetectors, replace=False)
        ]
        for cameraGeomDetector in someDetectors:
            # Right now we only support integer detector IDs in data IDs;
            # support for detector names and groups (i.e. rafts) is
            # definitely planned but not yet implemented.
            dataId = dict(instrument=instrument.instrument,
                          detector=cameraGeomDetector.getId())
            butler.put(cameraGeomDetector, "detector", dataId=dataId)
            cameraGeomDetector2 = butler.get("detector", dataId=dataId)
            # Full detector comparisons are *slow*; just compare names and
            # serials.
            self.assertEqual(cameraGeomDetector.getName(),
                             cameraGeomDetector2.getName())
            self.assertEqual(cameraGeomDetector.getSerial(),
                             cameraGeomDetector2.getSerial())
Esempio n. 12
0
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)
                # Test getDeferred with dataId
                metricOut = butler.getDeferred(ref.datasetType.name,
                                               dataId).get()
                self.assertEqual(metric, metricOut)
                # Test getDeferred with a datasetRef
                metricOut = butler.getDeferred(ref).get()
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with parameters
        sliced = butler.getDeferred(ref, parameters={
            "slice": slice(stop)
        }).get()
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with deferred parameters
        sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        if storageClass.isComposite():
            # Delete one component and check that the other components
            # can still be retrieved
            metricOut = butler.get(ref.datasetType.name, dataId)
            compNameS = DatasetType.nameWithComponent(datasetTypeName,
                                                      "summary")
            compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
            summary = butler.get(compNameS, dataId)
            self.assertEqual(summary, metric.summary)
            self.assertTrue(butler.datastore.exists(ref.components["summary"]))

            butler.remove(compNameS, dataId, remember=True)
            with self.assertRaises(LookupError):
                butler.datasetExists(compNameS, dataId)
            self.assertFalse(butler.datastore.exists(
                ref.components["summary"]))
            self.assertTrue(butler.datastore.exists(ref.components["data"]))
            data = butler.get(compNameD, dataId)
            self.assertEqual(data, metric.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })

        # Clean up to check that we can remove something that may have
        # already had a component removed
        butler.remove(ref.datasetType.name, dataId)

        # Add a dataset back in since some downstream tests require
        # something to be present
        ref = butler.put(metric, refIn)

        return butler

        # Construct a butler with no run or collection, but make it writeable.
        butler = Butler(self.tmpConfigFile, writeable=True)
        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        datasetType = self.addDatasetType(
            "example", dimensions,
            self.storageClassFactory.getStorageClass("StructuredData"),
            butler.registry)
        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })
        dataId = {"instrument": "DummyCamComp", "visit": 423}
        # Create dataset.
        metric = makeExampleMetrics()
        # Register a new run and put dataset.
        run = "deferred"
        butler.registry.registerRun(run)
        ref = butler.put(metric, datasetType, dataId, run=run)
        # Putting with no run should fail with TypeError.
        with self.assertRaises(TypeError):
            butler.put(metric, datasetType, dataId)
        # Dataset should exist.
        self.assertTrue(
            butler.datasetExists(datasetType, dataId, collection=run))
        # We should be able to get the dataset back, but with and without
        # a deferred dataset handle.
        self.assertEqual(metric, butler.get(datasetType,
                                            dataId,
                                            collection=run))
        self.assertEqual(
            metric,
            butler.getDeferred(datasetType, dataId, collection=run).get())
        # Trying to find the dataset without any collection is a TypeError.
        with self.assertRaises(TypeError):
            butler.datasetExists(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.get(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.remove(datasetType, dataId)
        # Associate the dataset with a different collection.
        butler.registry.associate("tagged", [ref])
        # Deleting the dataset from the new collection should make it findable
        # in the original collection but without a Datastore entry.
        butler.remove(datasetType, dataId, collection="tagged")
        self.assertFalse(
            butler.datasetExists(datasetType, dataId, collection=run))
class FormattersTests(DatasetTestHelper, lsst.utils.tests.TestCase):
    root = None
    storageClassFactory = None

    @classmethod
    def setUpClass(cls):
        """Create a new butler once only."""

        cls.storageClassFactory = StorageClassFactory()

        cls.root = tempfile.mkdtemp(dir=TESTDIR)

        data_ids = {
            "instrument": [INSTRUMENT_NAME],
            "detector": [0, 1, 2, 3, 4, 5],
            "exposure": [11, 22],
        }

        configURI = ButlerURI("resource://spherex/configs",
                              forceDirectory=True)
        butlerConfig = Config(configURI.join("butler.yaml"))
        # in-memory db is being phased out
        # butlerConfig["registry", "db"] = 'sqlite:///:memory:'
        cls.creatorButler = makeTestRepo(
            cls.root,
            data_ids,
            config=butlerConfig,
            dimensionConfig=configURI.join("dimensions.yaml"))
        for formatter in FORMATTERS:
            datasetTypeName, storageClassName = (formatter["dataset_type"],
                                                 formatter["storage_class"])
            storageClass = cls.storageClassFactory.getStorageClass(
                storageClassName)
            addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids),
                           storageClass)

    @classmethod
    def tearDownClass(cls):
        if cls.root is not None:
            shutil.rmtree(cls.root, ignore_errors=True)

    def setUp(self):
        # make test collection
        # self.butler = makeTestCollection(self.creatorButler)
        self.collection = self._testMethodName
        self.butler = Butler(butler=self.creatorButler, run=self.collection)

    def test_putget(self):
        fitsPath = os.path.join(TESTDIR, "data", "small.fits")
        dataid = {"exposure": 11, "detector": 0, "instrument": INSTRUMENT_NAME}
        for formatter in FORMATTERS:
            # in-memory object, representing fits
            inmemobj = formatter["reader"](fitsPath)

            # save in-memory object into butler dataset
            datasetTypeName = formatter["dataset_type"]
            self.butler.put(inmemobj, datasetTypeName, dataid)

            # get butler dataset
            retrievedobj = self.butler.get(datasetTypeName, dataid)
            self.assertTrue(isinstance(retrievedobj, formatter["inmem_cls"]))
            self.assertTrue(retrievedobj.__class__.__name__,
                            inmemobj.__class__.__name__)

    def test_ingest(self):

        fitsPath = os.path.join(TESTDIR, "data", "small.fits")

        formatter = FORMATTERS[0]
        datasetTypeName, formatterCls = (formatter["dataset_type"],
                                         formatter["formatter_cls"])

        datasetType = self.butler.registry.getDatasetType(datasetTypeName)
        datasets = []
        for exposure in range(3, 5):
            for detector in range(6):
                # use the same fits to test ingest
                if not os.path.exists(fitsPath):
                    log.warning(
                        f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}."
                    )
                    continue
                ref = DatasetRef(datasetType,
                                 dataId={
                                     "instrument": INSTRUMENT_NAME,
                                     "detector": detector,
                                     "exposure": exposure * 11
                                 })
                datasets.append(
                    FileDataset(refs=ref,
                                path=fitsPath,
                                formatter=formatterCls))

        # register new collection
        # run = "rawIngestedRun"
        # self.butler.registry.registerCollection(run, type=CollectionType.RUN)

        # collection is registered as a part of setUp
        run = self.collection

        with self.butler.transaction():
            for exposure in range(3, 5):
                expid = exposure * 11
                self.butler.registry.insertDimensionData(
                    "exposure", {
                        "instrument": INSTRUMENT_NAME,
                        "id": expid,
                        "name": f"{expid}",
                        "group_name": "day1",
                        "timespan": Timespan(begin=None, end=None)
                    })
            # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink'
            # or 'symlink'
            self.butler.ingest(*datasets, transfer="symlink", run=run)

        # verify that 12 files were ingested (2 exposures for each detector)
        refsSet = set(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run]))
        self.assertEqual(
            len(refsSet), 12,
            f"Collection {run} should have 12 elements after ingest")

        # verify that data id is present
        dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME}
        refsList = list(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run],
                                               dataId=dataid))
        self.assertEqual(
            len(refsList), 1,
            f"Collection {run} should have 1 element with {dataid}")
Esempio n. 14
0
class ParquetFormatterTestCase(unittest.TestCase):
    """Tests for ParquetFormatter, using PosixDatastore.
    """
    def setUp(self):
        """Create a new butler root for each test."""
        self.root = tempfile.mkdtemp(dir=TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="test_run")
        # No dimensions in dataset type so we don't have to worry about
        # inserting dimension data or defining data IDs.
        self.datasetType = DatasetType(
            "data",
            dimensions=(),
            storageClass="DataFrame",
            universe=self.butler.registry.dimensions)
        self.butler.registry.registerDatasetType(self.datasetType)

    def tearDown(self):
        if os.path.exists(self.root):
            shutil.rmtree(self.root, ignore_errors=True)

    def testSingleIndexDataFrame(self):
        columns1 = pd.Index(["a", "b", "c"])
        df1 = pd.DataFrame(np.random.randn(5, 3),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": ["a", "c"]})
        self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3))
        df4 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": "a"})
        self.assertTrue(df1.loc[:, ["a"]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})

    def testMultiIndexDataFrame(self):
        columns1 = pd.MultiIndex.from_tuples(
            [
                ("g", "a"),
                ("g", "b"),
                ("g", "c"),
                ("r", "a"),
                ("r", "b"),
                ("r", "c"),
            ],
            names=["filter", "column"],
        )
        df1 = pd.DataFrame(np.random.randn(5, 6),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": {
                                  "filter": "g"
                              }})
        self.assertTrue(df1.loc[:, ["g"]].equals(df3))
        df4 = self.butler.get(
            self.datasetType,
            dataId={},
            parameters={"columns": {
                "filter": ["r"],
                "column": "a"
            }})
        self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})
Esempio n. 15
0
    def testPutTemplates(self):
        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredDataNoComponents")
        butler = Butler(self.tmpConfigFile)

        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCamComp"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCamComp",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCamComp",
            "visit": 423,
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCamComp",
            "visit": 425,
            "physical_filter": "d-r"
        })

        # Create and store a dataset
        metric = makeExampleMetrics()

        # Create two almost-identical DatasetTypes (both will use default
        # template)
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        butler.registry.registerDatasetType(
            DatasetType("metric1", dimensions, storageClass))
        butler.registry.registerDatasetType(
            DatasetType("metric2", dimensions, storageClass))
        butler.registry.registerDatasetType(
            DatasetType("metric3", dimensions, storageClass))

        dataId1 = {"instrument": "DummyCamComp", "visit": 423}
        dataId2 = {
            "instrument": "DummyCamComp",
            "visit": 423,
            "physical_filter": "d-r"
        }
        dataId3 = {"instrument": "DummyCamComp", "visit": 425}

        # Put with exactly the data ID keys needed
        ref = butler.put(metric, "metric1", dataId1)
        self.assertTrue(
            os.path.exists(
                os.path.join(butler.datastore.root,
                             "ingest/metric1/DummyCamComp_423.pickle")))

        # Check the template based on dimensions
        butler.datastore.templates.validateTemplates([ref])

        # Put with extra data ID keys (physical_filter is an optional
        # dependency); should not change template (at least the way we're
        # defining them  to behave now; the important thing is that they
        # must be consistent).
        ref = butler.put(metric, "metric2", dataId2)
        self.assertTrue(
            os.path.exists(
                os.path.join(butler.datastore.root,
                             "ingest/metric2/DummyCamComp_423.pickle")))

        # Check the template based on dimensions
        butler.datastore.templates.validateTemplates([ref])

        # Now use a file template that will not result in unique filenames
        ref = butler.put(metric, "metric3", dataId1)

        # Check the template based on dimensions. This one is a bad template
        with self.assertRaises(FileTemplateValidationError):
            butler.datastore.templates.validateTemplates([ref])

        with self.assertRaises(FileExistsError):
            butler.put(metric, "metric3", dataId3)
Esempio n. 16
0
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile)

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCamComp"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCamComp",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCamComp",
            "visit": 423,
            "physical_filter": "d-r"
        })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })
Esempio n. 17
0
def makeDiscreteSkyMap(repo,
                       config_file,
                       collections,
                       instrument,
                       skymap_id='discrete',
                       old_skymap_id=None):
    """Implements the command line interface `butler make-discrete-skymap` subcommand,
    should only be called by command line tools and unit test code that tests
    this function.

    Constructs a skymap from calibrated exposure in the butler repository

    Parameters
    ----------
    repo : `str`
        URI to the location to read the repo.
    config_file : `str` or `None`
        Path to a config file that contains overrides to the skymap config.
    collections : `list` [`str`]
        An expression specifying the collections to be searched (in order) when
        reading datasets, and optionally dataset type restrictions on them.
        At least one collection must be specified.  This is the collection
        with the calibrated exposures.
    instrument : `str`
        The name or fully-qualified class name of an instrument.
    skymap_id : `str`, optional
        The identifier of the skymap to save.  Default is 'discrete'.
    old_skymap_id : `str`, optional
        The identifer of the skymap to append to.  Must differ from
        ``skymap_id``.  Ignored unless ``config.doAppend=True``.
    """
    butler = Butler(repo, collections=collections, writeable=True)
    instr = getInstrument(instrument, butler.registry)
    config = MakeDiscreteSkyMapConfig()
    instr.applyConfigOverrides(MakeDiscreteSkyMapTask._DefaultName, config)

    if config_file is not None:
        config.load(config_file)
    # The coaddName for a SkyMap is only relevant in Gen2, and we completely
    # ignore it here; once Gen2 is gone it can be removed.
    oldSkyMap = None
    if config.doAppend:
        if old_skymap_id is None:
            raise ValueError(
                "old_skymap_id must be provided if config.doAppend is True.")
        dataId = {'skymap': old_skymap_id}
        try:
            oldSkyMap = butler.get(BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
                                   collections=collections,
                                   dataId=dataId)
        except LookupError as e:
            msg = (
                f"Could not find seed skymap with dataId {dataId} "
                f"in collections {collections} but doAppend is {config.doAppend}.  Aborting..."
            )
            raise LookupError(msg, *e.args[1:])

    datasets = butler.registry.queryDatasets('calexp', collections=collections)
    wcs_md_tuple_list = [(butler.getDirect('calexp.metadata', ref),
                          butler.getDirect('calexp.wcs', ref))
                         for ref in datasets]
    task = MakeDiscreteSkyMapTask(config=config)
    result = task.run(wcs_md_tuple_list, oldSkyMap)
    result.skyMap.register(skymap_id, butler)
    butler.put(result.skyMap,
               BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
               dataId={'skymap': skymap_id},
               run=BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
Esempio n. 18
0
def populateButler(
        pipeline: Pipeline,
        butler: Butler,
        datasetTypes: Dict[Optional[str], List[str]] = None) -> None:
    """Populate data butler with data needed for test.

    Initializes data butler with a bunch of items:
    - registers dataset types which are defined by pipeline
    - create dimensions data for (instrument, detector)
    - adds datasets based on ``datasetTypes`` dictionary, if dictionary is
      missing then a single dataset with type "add_dataset0" is added

    All datasets added to butler have ``dataId={instrument=instrument,
    detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is
    used if pipeline is missing instrument definition. Type of the dataset is
    guessed from dataset type name (assumes that pipeline is made of `AddTask`
    tasks).

    Parameters
    ----------
    pipeline : `~lsst.pipe.base.Pipeline`
        Pipeline instance.
    butler : `~lsst.daf.butler.Butler`
        Data butler instance.
    datasetTypes : `dict` [ `str`, `list` ], optional
        Dictionary whose keys are collection names and values are lists of
        dataset type names. By default a single dataset of type "add_dataset0"
        is added to a ``butler.run`` collection.
    """

    # Add dataset types to registry
    taskDefs = list(pipeline.toExpandedPipeline())
    registerDatasetTypes(butler.registry, taskDefs)

    instrument = pipeline.getInstrument()
    if instrument is not None:
        instrument_class = doImportType(instrument)
        instrumentName = instrument_class.getName()
    else:
        instrumentName = "INSTR"

    # Add all needed dimensions to registry
    butler.registry.insertDimensionData("instrument",
                                        dict(name=instrumentName))
    butler.registry.insertDimensionData(
        "detector", dict(instrument=instrumentName, id=0, full_name="det0"))

    taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs)
    # Add inputs to butler
    if not datasetTypes:
        datasetTypes = {None: ["add_dataset0"]}
    for run, dsTypes in datasetTypes.items():
        if run is not None:
            butler.registry.registerRun(run)
        for dsType in dsTypes:
            if dsType == "packages":
                # Version is intentionally inconsistent.
                # Dict is convertible to Packages if Packages is installed.
                data: Any = {"python": "9.9.99"}
                butler.put(data, dsType, run=run)
            else:
                if dsType.endswith("_config"):
                    # find a confing from matching task name or make a new one
                    taskLabel, _, _ = dsType.rpartition("_")
                    taskDef = taskDefMap.get(taskLabel)
                    if taskDef is not None:
                        data = taskDef.config
                    else:
                        data = AddTaskConfig()
                elif dsType.endswith("_metadata"):
                    data = _TASK_FULL_METADATA_TYPE()
                elif dsType.endswith("_log"):
                    data = ButlerLogRecords.from_records([])
                else:
                    data = numpy.array([0.0, 1.0, 2.0, 5.0])
                butler.put(data,
                           dsType,
                           run=run,
                           instrument=instrumentName,
                           detector=0)
Esempio n. 19
0
def makeSimpleQGraph(nQuanta=5,
                     pipeline=None,
                     butler=None,
                     root=None,
                     skipExisting=False,
                     inMemory=True,
                     userQuery=""):
    """Make simple QuantumGraph for tests.

    Makes simple one-task pipeline with AddTask, sets up in-memory
    registry and butler, fills them with minimal data, and generates
    QuantumGraph with all of that.

    Parameters
    ----------
    nQuanta : `int`
        Number of quanta in a graph.
    pipeline : `~lsst.pipe.base.Pipeline`
        If `None` then one-task pipeline is made with `AddTask` and
        default `AddTaskConfig`.
    butler : `~lsst.daf.butler.Butler`, optional
        Data butler instance, this should be an instance returned from a
        previous call to this method.
    root : `str`
        Path or URI to the root location of the new repository. Only used if
        ``butler`` is None.
    skipExisting : `bool`, optional
        If `True` (default), a Quantum is not created if all its outputs
        already exist.
    inMemory : `bool`, optional
        If true make in-memory repository.
    userQuery : `str`, optional
        The user query to pass to ``makeGraph``, by default an empty string.

    Returns
    -------
    butler : `~lsst.daf.butler.Butler`
        Butler instance
    qgraph : `~lsst.pipe.base.QuantumGraph`
        Quantum graph instance
    """

    if pipeline is None:
        pipeline = makeSimplePipeline(nQuanta=nQuanta)

    if butler is None:

        if root is None:
            raise ValueError("Must provide `root` when `butler` is None")

        config = Config()
        if not inMemory:
            config["registry", "db"] = f"sqlite:///{root}/gen3.sqlite"
            config[
                "datastore",
                "cls"] = "lsst.daf.butler.datastores.posixDatastore.PosixDatastore"
        repo = butlerTests.makeTestRepo(root, {}, config=config)
        collection = "test"
        butler = Butler(butler=repo, run=collection)

        # Add dataset types to registry
        registerDatasetTypes(butler.registry, pipeline.toExpandedPipeline())

        instrument = pipeline.getInstrument()
        if instrument is not None:
            if isinstance(instrument, str):
                instrument = doImport(instrument)
            instrumentName = instrument.getName()
        else:
            instrumentName = "INSTR"

        # Add all needed dimensions to registry
        butler.registry.insertDimensionData("instrument",
                                            dict(name=instrumentName))
        butler.registry.insertDimensionData(
            "detector", dict(instrument=instrumentName, id=0,
                             full_name="det0"))

        # Add inputs to butler
        data = numpy.array([0., 1., 2., 5.])
        butler.put(data, "add_dataset0", instrument=instrumentName, detector=0)

    # Make the graph
    builder = pipeBase.GraphBuilder(registry=butler.registry,
                                    skipExisting=skipExisting)
    qgraph = builder.makeGraph(pipeline,
                               collections=[butler.run],
                               run=butler.run,
                               userQuery=userQuery)

    return butler, qgraph