def testNameValidation(self): """Test that dataset type names only contain certain characters in certain positions. """ dimensions = self.universe.extract(("instrument", "visit")) goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b") badNames = ("1", "_", "a%b", "B+Z", "T[0]") # Construct storage class with all the good names included as # components so that we can test internal consistency storageClass = StorageClass("test_StructuredData", components={n: StorageClass("component") for n in goodNames}) for name in goodNames: composite = DatasetType(name, dimensions, storageClass) self.assertEqual(composite.name, name) for suffix in goodNames: full = DatasetType.nameWithComponent(name, suffix) component = composite.makeComponentDatasetType(suffix) self.assertEqual(component.name, full) self.assertEqual(component.parentStorageClass.name, "test_StructuredData") for suffix in badNames: full = DatasetType.nameWithComponent(name, suffix) with self.subTest(full=full): with self.assertRaises(ValueError): DatasetType(full, dimensions, storageClass) for name in badNames: with self.subTest(name=name): with self.assertRaises(ValueError): DatasetType(name, dimensions, storageClass)
def testPickle(self): """Test pickle support. """ storageClass = StorageClass("test_pickle") datasetTypeName = "test" dimensions = self.universe.extract(("instrument", "visit")) # Un-pickling requires that storage class is registered with factory. StorageClassFactory().registerStorageClass(storageClass) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) self.assertIsInstance(datasetTypeOut, DatasetType) self.assertEqual(datasetType.name, datasetTypeOut.name) self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names) self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass) self.assertIsNone(datasetTypeOut.parentStorageClass) self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) self.assertFalse(datasetTypeOut.isCalibration()) datasetType = DatasetType(datasetTypeName, dimensions, storageClass, isCalibration=True) datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) self.assertTrue(datasetTypeOut.isCalibration()) # And again with a composite componentStorageClass = StorageClass("pickle_component") StorageClassFactory().registerStorageClass(componentStorageClass) componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, componentStorageClass, parentStorageClass=storageClass) datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) self.assertIsInstance(datasetTypeOut, DatasetType) self.assertEqual(componentDatasetType.name, datasetTypeOut.name) self.assertEqual(componentDatasetType.dimensions.names, datasetTypeOut.dimensions.names) self.assertEqual(componentDatasetType.storageClass, datasetTypeOut.storageClass) self.assertEqual(componentDatasetType.parentStorageClass, datasetTypeOut.parentStorageClass) self.assertEqual(datasetTypeOut.parentStorageClass.name, storageClass.name) self.assertEqual(datasetTypeOut, componentDatasetType) # Now with a string and not a real storage class to test that # pickling doesn't force the StorageClass to be resolved componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, "StrangeComponent", parentStorageClass="UnknownParent") datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) self.assertEqual(datasetTypeOut, componentDatasetType) self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName) # Now with a storage class that is created by the factory factoryStorageClassClass = StorageClassFactory.makeNewStorageClass("ParentClass") factoryComponentStorageClassClass = StorageClassFactory.makeNewStorageClass("ComponentClass") componentDatasetType = DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, factoryComponentStorageClassClass(), parentStorageClass=factoryStorageClassClass()) datasetTypeOut = pickle.loads(pickle.dumps(componentDatasetType)) self.assertEqual(datasetTypeOut, componentDatasetType) self.assertEqual(datasetTypeOut._parentStorageClassName, componentDatasetType._parentStorageClassName)
def assertGetComponents(self, butler, datasetTypeName, dataId, components, reference): for component in components: compTypeName = DatasetType.nameWithComponent( datasetTypeName, component) result = butler.get(compTypeName, dataId) self.assertEqual(result, getattr(reference, component))
def testConstructor(self): """Test construction preserves values. Note that construction doesn't check for valid storageClass. This can only be verified for a particular schema. """ datasetTypeName = "test" storageClass = StorageClass("test_StructuredData") dimensions = self.universe.extract(("instrument", "visit")) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) self.assertEqual(datasetType.name, datasetTypeName) self.assertEqual(datasetType.storageClass, storageClass) self.assertEqual(datasetType.dimensions, dimensions) with self.assertRaises( ValueError, msg="Construct component without parent storage class"): DatasetType(DatasetType.nameWithComponent(datasetTypeName, "comp"), dimensions, storageClass) with self.assertRaises( ValueError, msg="Construct non-component with parent storage class"): DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass="NotAllowed")
def runExposureCompositePutGetTest(self, storageClass, datasetTypeName): example = os.path.join(TESTDIR, "data", "basic", "small.fits") exposure = lsst.afw.image.ExposureF(example) butler = Butler(self.tmpConfigFile) dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) self.registerDatasetTypes(datasetTypeName, dimensions, storageClass, butler.registry) dataId = { "visit": 42, "instrument": "DummyCam", "physical_filter": "d-r" } # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCam"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCam", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 42, "physical_filter": "d-r" }) butler.put(exposure, datasetTypeName, dataId) # Get the full thing butler.get(datasetTypeName, dataId) # TODO enable check for equality (fix for Exposure type) # self.assertEqual(full, exposure) # Get a component compsRead = {} for compName in ("wcs", "image", "mask", "coaddInputs", "psf"): compTypeName = DatasetType.nameWithComponent( datasetTypeName, compName) component = butler.get(compTypeName, dataId) # TODO enable check for component instance types # compRef = butler.registry.find(butler.run.collection, # f"calexp.{compName}", dataId) # self.assertIsInstance(component, # compRef.datasetType.storageClass.pytype) compsRead[compName] = component # Simple check of WCS bbox = lsst.afw.geom.Box2I(lsst.afw.geom.Point2I(0, 0), lsst.afw.geom.Extent2I(9, 9)) self.assertWcsAlmostEqualOverBBox(compsRead["wcs"], exposure.getWcs(), bbox) # With parameters inBBox = Box2I(minimum=Point2I(0, 0), maximum=Point2I(3, 3)) parameters = dict(bbox=inBBox, origin=LOCAL) subset = butler.get(datasetTypeName, dataId, parameters=parameters) outBBox = subset.getBBox() self.assertEqual(inBBox, outBBox)
def putFits(self, exposure, datasetTypeName, visit): """Put different datasetTypes and return information.""" dataId = { "visit": visit, "instrument": "DummyCam", "physical_filter": "d-r" } refC = self.butler.put(exposure, datasetTypeName, dataId) uriC = self.butler.getURI(refC) stat = os.stat(uriC.ospath) size = stat.st_size metaDatasetTypeName = DatasetType.nameWithComponent( datasetTypeName, "metadata") meta = self.butler.get(metaDatasetTypeName, dataId) return meta, size
def testDeepCopy(self): """Test that we can copy a dataset type.""" storageClass = StorageClass("test_copy") datasetTypeName = "test" dimensions = self.universe.extract(("instrument", "visit")) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) dcopy = copy.deepcopy(datasetType) self.assertEqual(dcopy, datasetType) # And again with a composite componentStorageClass = StorageClass("copy_component") componentDatasetType = DatasetType(DatasetType.nameWithComponent( datasetTypeName, "comp"), dimensions, componentStorageClass, parentStorageClass=storageClass) dcopy = copy.deepcopy(componentDatasetType) self.assertEqual(dcopy, componentDatasetType)
def runExposureCompositePutGetTest(self, datasetTypeName: str) -> DatasetRef: example = os.path.join(TESTDIR, "data", "calexp.fits") exposure = lsst.afw.image.ExposureF(example) dataId = { "visit": 42, "instrument": "DummyCam", "physical_filter": "d-r" } ref = self.butler.put(exposure, datasetTypeName, dataId) # Get the full thing composite = self.butler.get(datasetTypeName, dataId) # There is no assert for Exposure so just look at maskedImage self.assertMaskedImagesEqual(composite.maskedImage, exposure.maskedImage) # Helper for extracting components assembler = ExposureAssembler(ref.datasetType.storageClass) # Check all possible components that can be read allComponents = set() allComponents.update(COMPONENTS, READ_COMPONENTS) # Get each component from butler independently for compName in allComponents: compTypeName = DatasetType.nameWithComponent( datasetTypeName, compName) component = self.butler.get(compTypeName, dataId) reference = assembler.getComponent(exposure, compName) self.assertIsInstance(component, type(reference), f"Checking type of component {compName}") if compName in ("image", "variance"): self.assertImagesEqual(component, reference) elif compName == "mask": self.assertMasksEqual(component, reference) elif compName == "wcs": self.assertWcsAlmostEqualOverBBox(component, reference, exposure.getBBox()) elif compName == "coaddInputs": self.assertEqual(len(component.visits), len(reference.visits), f"cf visits {component.visits}") self.assertEqual(len(component.ccds), len(reference.ccds), f"cf CCDs {component.ccds}") elif compName == "psf": # Equality for PSF does not work pass elif compName == "filter": self.assertEqual(component.getCanonicalName(), reference.getCanonicalName()) elif compName == "filterLabel": self.assertEqual(component, reference) elif compName == "visitInfo": self.assertEqual(component.getExposureId(), reference.getExposureId(), "VisitInfo comparison") elif compName == "metadata": # The component metadata has extra fields in it so cannot # compare directly. for k, v in reference.items(): self.assertEqual(component[k], v) elif compName == "photoCalib": # This example has a # "spatially constant with mean: inf error: nan" entry # which does not compare directly. self.assertEqual(str(component), str(reference)) self.assertIn("spatially constant with mean: 1.99409", str(component), "Checking photoCalib") elif compName in ("bbox", "xy0", "dimensions", "validPolygon"): self.assertEqual(component, reference) elif compName == "apCorrMap": self.assertEqual(set(component.keys()), set(reference.keys())) elif compName == "transmissionCurve": self.assertEqual(component.getThroughputAtBounds(), reference.getThroughputAtBounds()) elif compName == "detector": c_amps = {a.getName() for a in component.getAmplifiers()} r_amps = {a.getName() for a in reference.getAmplifiers()} self.assertEqual(c_amps, r_amps) elif compName == 'summaryStats': self.assertEqual(component.psfSigma, reference.psfSigma) else: raise RuntimeError( f"Unexpected component '{compName}' encountered in test") # Full Exposure with parameters inBBox = Box2I(minimum=Point2I(3, 3), maximum=Point2I(21, 16)) parameters = dict(bbox=inBBox, origin=LOCAL) subset = self.butler.get(datasetTypeName, dataId, parameters=parameters) outBBox = subset.getBBox() self.assertEqual(inBBox, outBBox) self.assertImagesEqual( subset.getImage(), exposure.subset(inBBox, origin=LOCAL).getImage()) return ref
def testGetDatasetTypes(self): butler = Butler(self.tmpConfigFile) dimensions = butler.registry.dimensions.extract( ["instrument", "visit", "physical_filter"]) dimensionEntries = (("instrument", { "instrument": "DummyCam" }), ("instrument", { "instrument": "DummyHSC" }), ("instrument", { "instrument": "DummyCamComp" }), ("physical_filter", { "instrument": "DummyCam", "physical_filter": "d-r" }), ("visit", { "instrument": "DummyCam", "visit": 42, "physical_filter": "d-r" })) storageClass = self.storageClassFactory.getStorageClass( "StructuredData") # Add needed Dimensions for name, value in dimensionEntries: butler.registry.addDimensionEntry(name, value) # When a DatasetType is added to the registry entries are created # for each component. Need entries for each component in the test # configuration otherwise validation won't work. The ones that # are deliberately broken will be ignored later. datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} components = set() for datasetTypeName in datasetTypeNames: # Create and register a DatasetType self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) for componentName in storageClass.components: components.add( DatasetType.nameWithComponent(datasetTypeName, componentName)) fromRegistry = butler.registry.getAllDatasetTypes() self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) # Now that we have some dataset types registered, validate them butler.validateConfiguration(ignore=[ "test_metric_comp", "metric3", "calexp", "DummySC", "datasetType.component" ]) # Add a new datasetType that will fail template validation self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) if self.validationCanFail: with self.assertRaises(ValidationError): butler.validateConfiguration() # Rerun validation but with a subset of dataset type names butler.validateConfiguration(datasetTypeNames=["metric4"]) # Rerun validation but ignore the bad datasetType butler.validateConfiguration(ignore=[ "test_metric_comp", "metric3", "calexp", "DummySC", "datasetType.component" ])
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile, run="ingest") # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Test getDeferred with dataId metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() self.assertEqual(metric, metricOut) # Test getDeferred with a datasetRef metricOut = butler.getDeferred(ref).get() self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with parameters sliced = butler.getDeferred(ref, parameters={ "slice": slice(stop) }).get() self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with deferred parameters sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) if storageClass.isComposite(): # Delete one component and check that the other components # can still be retrieved metricOut = butler.get(ref.datasetType.name, dataId) compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") summary = butler.get(compNameS, dataId) self.assertEqual(summary, metric.summary) self.assertTrue(butler.datastore.exists(ref.components["summary"])) butler.remove(compNameS, dataId, remember=True) with self.assertRaises(LookupError): butler.datasetExists(compNameS, dataId) self.assertFalse(butler.datastore.exists( ref.components["summary"])) self.assertTrue(butler.datastore.exists(ref.components["data"])) data = butler.get(compNameD, dataId) self.assertEqual(data, metric.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", }) # Clean up to check that we can remove something that may have # already had a component removed butler.remove(ref.datasetType.name, dataId) # Add a dataset back in since some downstream tests require # something to be present ref = butler.put(metric, refIn) return butler # Construct a butler with no run or collection, but make it writeable. butler = Butler(self.tmpConfigFile, writeable=True) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType( "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) dataId = {"instrument": "DummyCamComp", "visit": 423} # Create dataset. metric = makeExampleMetrics() # Register a new run and put dataset. run = "deferred" butler.registry.registerRun(run) ref = butler.put(metric, datasetType, dataId, run=run) # Putting with no run should fail with TypeError. with self.assertRaises(TypeError): butler.put(metric, datasetType, dataId) # Dataset should exist. self.assertTrue( butler.datasetExists(datasetType, dataId, collection=run)) # We should be able to get the dataset back, but with and without # a deferred dataset handle. self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) self.assertEqual( metric, butler.getDeferred(datasetType, dataId, collection=run).get()) # Trying to find the dataset without any collection is a TypeError. with self.assertRaises(TypeError): butler.datasetExists(datasetType, dataId) with self.assertRaises(TypeError): butler.get(datasetType, dataId) with self.assertRaises(TypeError): butler.remove(datasetType, dataId) # Associate the dataset with a different collection. butler.registry.associate("tagged", [ref]) # Deleting the dataset from the new collection should make it findable # in the original collection but without a Datastore entry. butler.remove(datasetType, dataId, collection="tagged") self.assertFalse( butler.datasetExists(datasetType, dataId, collection=run))