def put_values(repo, visit, detector, instrument, out_collection, ra=None, dec=None, size=None, filename=None): butler = Butler(repo, writeable=True, run=out_collection) # This doesn't strictly need to be done every time, # but doesn't seem to hurt if the # dataset type already exists position_dataset_type = DatasetType('cutout_positions', dimensions=['visit', 'detector', 'instrument'], universe=butler.registry.dimensions, storageClass='AstropyQTable') butler.registry.registerDatasetType(position_dataset_type) if filename: poslist = numpy.genfromtxt(filename, dtype=None, delimiter=',') else: poslist = [(ra, dec, size), ] ident = [] pos = [] size = [] for i, rec in enumerate(poslist): pt = SkyCoord(rec[0], rec[1], frame='icrs', unit=u.deg) pos.append(pt) ident.append(i*u.dimensionless_unscaled) size.append(float(rec[2])*u.dimensionless_unscaled) out_table = QTable([ident, pos, size], names=['id', 'position', 'size']) butler.put(out_table, 'cutout_positions', visit=visit, detector=detector, instrument=instrument)
def runExposureCompositePutGetTest(self, storageClass, datasetTypeName): example = os.path.join(TESTDIR, "data", "basic", "small.fits") exposure = lsst.afw.image.ExposureF(example) butler = Butler(self.tmpConfigFile) dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) self.registerDatasetTypes(datasetTypeName, dimensions, storageClass, butler.registry) dataId = { "visit": 42, "instrument": "DummyCam", "physical_filter": "d-r" } # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCam"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCam", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 42, "physical_filter": "d-r" }) butler.put(exposure, datasetTypeName, dataId) # Get the full thing butler.get(datasetTypeName, dataId) # TODO enable check for equality (fix for Exposure type) # self.assertEqual(full, exposure) # Get a component compsRead = {} for compName in ("wcs", "image", "mask", "coaddInputs", "psf"): compTypeName = DatasetType.nameWithComponent( datasetTypeName, compName) component = butler.get(compTypeName, dataId) # TODO enable check for component instance types # compRef = butler.registry.find(butler.run.collection, # f"calexp.{compName}", dataId) # self.assertIsInstance(component, # compRef.datasetType.storageClass.pytype) compsRead[compName] = component # Simple check of WCS bbox = lsst.afw.geom.Box2I(lsst.afw.geom.Point2I(0, 0), lsst.afw.geom.Extent2I(9, 9)) self.assertWcsAlmostEqualOverBBox(compsRead["wcs"], exposure.getWcs(), bbox) # With parameters inBBox = Box2I(minimum=Point2I(0, 0), maximum=Point2I(3, 3)) parameters = dict(bbox=inBBox, origin=LOCAL) subset = butler.get(datasetTypeName, dataId, parameters=parameters) outBBox = subset.getBBox() self.assertEqual(inBBox, outBBox)
class PexConfigFormatterTestCase(unittest.TestCase): """Tests for PexConfigFormatter, using local file datastore.""" def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( "config", dimensions=(), storageClass="Config", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType) def tearDown(self): removeTestTempDir(self.root) def testPexConfig(self) -> None: """Test that we can put and get pex_config Configs""" c = SimpleConfig(i=10, c="hello") self.assertEqual(c.i, 10) ref = self.butler.put(c, "config") butler_c = self.butler.get(ref) self.assertEqual(c, butler_c) self.assertIsInstance(butler_c, SimpleConfig)
def testBasicPutGet(self): butler = Butler(self.configFile) # Create and register a DatasetType datasetTypeName = "test_metric" dataUnits = ("Camera", "Visit") storageClass = self.storageClassFactory.getStorageClass( "StructuredData") self.registerDatasetTypes(datasetTypeName, dataUnits, storageClass, butler.registry) # Create and store a dataset metric = makeExampleMetrics() dataId = {"camera": "DummyCam", "visit": 42} ref = butler.put(metric, datasetTypeName, dataId) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(datasetTypeName, dataId) self.assertEqual(metric, metricOut) # Check we can get components self.assertGetComponents(butler, datasetTypeName, dataId, ("summary", "data", "output"), metric)
def testHealSparseMapFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("map", [], "HealSparseMap", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.hspMap, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.hsp') # Retrieve the full map. hspMap = butler.get('map') self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map)) # Retrieve the coverage map coverage = butler.get('map.coverage') self.assertTrue( np.all(coverage.coverage_mask == self.hspMap.coverage_mask)) # Retrieve a partial map pixels = [0, 6] partialMap = butler.get('map', parameters={'pixels': pixels}) self.assertTrue( np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels))) self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000])) self.assertTrue( np.all(partialMap[100000:110000] == self.hspMap[100000:110000])) # Retrieve a degraded map degradedMapRead = butler.get('map', parameters={'degrade_nside': 512}) degradedMap = self.hspMap.degrade(512) self.assertTrue( np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([ self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # The test after this will not work if we don't have local file self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}") with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # Following test needs a local file with uri.as_local() as local: with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue( filecmp.cmp( local.ospath, file.name, shallow=True ) ) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testTransaction(self): butler = Butler(self.tmpConfigFile, run="ingest") datasetTypeName = "test_metric" dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) dimensionEntries = (("instrument", { "instrument": "DummyCam" }), ("physical_filter", { "instrument": "DummyCam", "name": "d-r", "abstract_filter": "R" }), ("visit", { "instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r" })) storageClass = self.storageClassFactory.getStorageClass( "StructuredData") metric = makeExampleMetrics() dataId = {"instrument": "DummyCam", "visit": 42} with self.assertRaises(TransactionTestError): with butler.transaction(): # Create and register a DatasetType datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions for args in dimensionEntries: butler.registry.insertDimensionData(*args) # Store a dataset ref = butler.put(metric, datasetTypeName, dataId) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(datasetTypeName, dataId) self.assertEqual(metric, metricOut) # Check we can get components self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) raise TransactionTestError( "This should roll back the entire transaction") with self.assertRaises(KeyError): butler.registry.getDatasetType(datasetTypeName) with self.assertRaises(LookupError): butler.registry.expandDataId(dataId) # Should raise KeyError for missing DatasetType with self.assertRaises(KeyError): butler.get(datasetTypeName, dataId) # Also check explicitly if Dataset entry is missing self.assertIsNone( butler.registry.find(butler.collection, datasetType, dataId)) # Direct retrieval should not find the file in the Datastore with self.assertRaises(FileNotFoundError): butler.getDirect(ref)
def testAstropyTableFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("table", [], "AstropyTable", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.table, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.ecsv') table = butler.get('table') self.assertTrue(numpy.all(table == self.table))
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) pyplot.imshow(np.random.randn(3, 4)) ref = butler.put(pyplot.gcf(), datasetType) parsed = urllib.parse.urlparse(butler.getUri(ref)) with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.remove(ref) with self.assertRaises(LookupError): butler.datasetExists(ref)
def checkInstrumentWithRegistry(self, cls, testRaw): Butler.makeRepo(self.root) butler = Butler(self.root, run="tests") instrument = cls() scFactory = StorageClassFactory() # Check instrument class and metadata translator agree on # instrument name, using readRawFitsHeader to read the metadata. filename = os.path.join(DATAROOT, testRaw) md = readRawFitsHeader(filename, translator_class=cls.translatorClass) obsInfo = ObservationInfo(md, translator_class=cls.translatorClass, filename=filename) self.assertEqual(instrument.getName(), obsInfo.instrument) # Add Instrument, Detector, and PhysicalFilter entries to the # Butler Registry. instrument.register(butler.registry) # Define a DatasetType for the cameraGeom.Camera, which can be # accessed just by identifying its Instrument. # A real-world Camera DatasetType should be identified by a # validity range as well. cameraDatasetType = DatasetType( "camera", dimensions=["instrument"], storageClass=scFactory.getStorageClass("Camera"), universe=butler.registry.dimensions) butler.registry.registerDatasetType(cameraDatasetType) # Define a DatasetType for cameraGeom.Detectors, which can be # accessed by identifying its Instrument and (Butler) Detector. # A real-world Detector DatasetType probably doesn't need to exist, # as it would just duplicate information in the Camera, and # reading a full Camera just to get a single Detector should be # plenty efficient. detectorDatasetType = DatasetType( "detector", dimensions=["instrument", "detector"], storageClass=scFactory.getStorageClass("Detector"), universe=butler.registry.dimensions) butler.registry.registerDatasetType(detectorDatasetType) # Put and get the Camera. dataId = dict(instrument=instrument.instrument) butler.put(instrument.getCamera(), "camera", dataId=dataId) camera = butler.get("camera", dataId) # Full camera comparisons are *slow*; just compare names. self.assertEqual(instrument.getCamera().getName(), camera.getName()) # Put and get a random subset of the Detectors. allDetectors = list(instrument.getCamera()) numDetectors = min(3, len(allDetectors)) someDetectors = [ allDetectors[i] for i in self.rng.choice( len(allDetectors), size=numDetectors, replace=False) ] for cameraGeomDetector in someDetectors: # Right now we only support integer detector IDs in data IDs; # support for detector names and groups (i.e. rafts) is # definitely planned but not yet implemented. dataId = dict(instrument=instrument.instrument, detector=cameraGeomDetector.getId()) butler.put(cameraGeomDetector, "detector", dataId=dataId) cameraGeomDetector2 = butler.get("detector", dataId=dataId) # Full detector comparisons are *slow*; just compare names and # serials. self.assertEqual(cameraGeomDetector.getName(), cameraGeomDetector2.getName()) self.assertEqual(cameraGeomDetector.getSerial(), cameraGeomDetector2.getSerial())
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile, run="ingest") # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Test getDeferred with dataId metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() self.assertEqual(metric, metricOut) # Test getDeferred with a datasetRef metricOut = butler.getDeferred(ref).get() self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with parameters sliced = butler.getDeferred(ref, parameters={ "slice": slice(stop) }).get() self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with deferred parameters sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) if storageClass.isComposite(): # Delete one component and check that the other components # can still be retrieved metricOut = butler.get(ref.datasetType.name, dataId) compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") summary = butler.get(compNameS, dataId) self.assertEqual(summary, metric.summary) self.assertTrue(butler.datastore.exists(ref.components["summary"])) butler.remove(compNameS, dataId, remember=True) with self.assertRaises(LookupError): butler.datasetExists(compNameS, dataId) self.assertFalse(butler.datastore.exists( ref.components["summary"])) self.assertTrue(butler.datastore.exists(ref.components["data"])) data = butler.get(compNameD, dataId) self.assertEqual(data, metric.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", }) # Clean up to check that we can remove something that may have # already had a component removed butler.remove(ref.datasetType.name, dataId) # Add a dataset back in since some downstream tests require # something to be present ref = butler.put(metric, refIn) return butler # Construct a butler with no run or collection, but make it writeable. butler = Butler(self.tmpConfigFile, writeable=True) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType( "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) dataId = {"instrument": "DummyCamComp", "visit": 423} # Create dataset. metric = makeExampleMetrics() # Register a new run and put dataset. run = "deferred" butler.registry.registerRun(run) ref = butler.put(metric, datasetType, dataId, run=run) # Putting with no run should fail with TypeError. with self.assertRaises(TypeError): butler.put(metric, datasetType, dataId) # Dataset should exist. self.assertTrue( butler.datasetExists(datasetType, dataId, collection=run)) # We should be able to get the dataset back, but with and without # a deferred dataset handle. self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) self.assertEqual( metric, butler.getDeferred(datasetType, dataId, collection=run).get()) # Trying to find the dataset without any collection is a TypeError. with self.assertRaises(TypeError): butler.datasetExists(datasetType, dataId) with self.assertRaises(TypeError): butler.get(datasetType, dataId) with self.assertRaises(TypeError): butler.remove(datasetType, dataId) # Associate the dataset with a different collection. butler.registry.associate("tagged", [ref]) # Deleting the dataset from the new collection should make it findable # in the original collection but without a Datastore entry. butler.remove(datasetType, dataId, collection="tagged") self.assertFalse( butler.datasetExists(datasetType, dataId, collection=run))
class FormattersTests(DatasetTestHelper, lsst.utils.tests.TestCase): root = None storageClassFactory = None @classmethod def setUpClass(cls): """Create a new butler once only.""" cls.storageClassFactory = StorageClassFactory() cls.root = tempfile.mkdtemp(dir=TESTDIR) data_ids = { "instrument": [INSTRUMENT_NAME], "detector": [0, 1, 2, 3, 4, 5], "exposure": [11, 22], } configURI = ButlerURI("resource://spherex/configs", forceDirectory=True) butlerConfig = Config(configURI.join("butler.yaml")) # in-memory db is being phased out # butlerConfig["registry", "db"] = 'sqlite:///:memory:' cls.creatorButler = makeTestRepo( cls.root, data_ids, config=butlerConfig, dimensionConfig=configURI.join("dimensions.yaml")) for formatter in FORMATTERS: datasetTypeName, storageClassName = (formatter["dataset_type"], formatter["storage_class"]) storageClass = cls.storageClassFactory.getStorageClass( storageClassName) addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids), storageClass) @classmethod def tearDownClass(cls): if cls.root is not None: shutil.rmtree(cls.root, ignore_errors=True) def setUp(self): # make test collection # self.butler = makeTestCollection(self.creatorButler) self.collection = self._testMethodName self.butler = Butler(butler=self.creatorButler, run=self.collection) def test_putget(self): fitsPath = os.path.join(TESTDIR, "data", "small.fits") dataid = {"exposure": 11, "detector": 0, "instrument": INSTRUMENT_NAME} for formatter in FORMATTERS: # in-memory object, representing fits inmemobj = formatter["reader"](fitsPath) # save in-memory object into butler dataset datasetTypeName = formatter["dataset_type"] self.butler.put(inmemobj, datasetTypeName, dataid) # get butler dataset retrievedobj = self.butler.get(datasetTypeName, dataid) self.assertTrue(isinstance(retrievedobj, formatter["inmem_cls"])) self.assertTrue(retrievedobj.__class__.__name__, inmemobj.__class__.__name__) def test_ingest(self): fitsPath = os.path.join(TESTDIR, "data", "small.fits") formatter = FORMATTERS[0] datasetTypeName, formatterCls = (formatter["dataset_type"], formatter["formatter_cls"]) datasetType = self.butler.registry.getDatasetType(datasetTypeName) datasets = [] for exposure in range(3, 5): for detector in range(6): # use the same fits to test ingest if not os.path.exists(fitsPath): log.warning( f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}." ) continue ref = DatasetRef(datasetType, dataId={ "instrument": INSTRUMENT_NAME, "detector": detector, "exposure": exposure * 11 }) datasets.append( FileDataset(refs=ref, path=fitsPath, formatter=formatterCls)) # register new collection # run = "rawIngestedRun" # self.butler.registry.registerCollection(run, type=CollectionType.RUN) # collection is registered as a part of setUp run = self.collection with self.butler.transaction(): for exposure in range(3, 5): expid = exposure * 11 self.butler.registry.insertDimensionData( "exposure", { "instrument": INSTRUMENT_NAME, "id": expid, "name": f"{expid}", "group_name": "day1", "timespan": Timespan(begin=None, end=None) }) # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink' # or 'symlink' self.butler.ingest(*datasets, transfer="symlink", run=run) # verify that 12 files were ingested (2 exposures for each detector) refsSet = set( self.butler.registry.queryDatasets(datasetTypeName, collections=[run])) self.assertEqual( len(refsSet), 12, f"Collection {run} should have 12 elements after ingest") # verify that data id is present dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME} refsList = list( self.butler.registry.queryDatasets(datasetTypeName, collections=[run], dataId=dataid)) self.assertEqual( len(refsList), 1, f"Collection {run} should have 1 element with {dataid}")
class ParquetFormatterTestCase(unittest.TestCase): """Tests for ParquetFormatter, using PosixDatastore. """ def setUp(self): """Create a new butler root for each test.""" self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( "data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType) def tearDown(self): if os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True) def testSingleIndexDataFrame(self): columns1 = pd.Index(["a", "b", "c"]) df1 = pd.DataFrame(np.random.randn(5, 3), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get( self.datasetType.componentTypeName("columns"), dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]}) self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3)) df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"}) self.assertTrue(df1.loc[:, ["a"]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]}) def testMultiIndexDataFrame(self): columns1 = pd.MultiIndex.from_tuples( [ ("g", "a"), ("g", "b"), ("g", "c"), ("r", "a"), ("r", "b"), ("r", "c"), ], names=["filter", "column"], ) df1 = pd.DataFrame(np.random.randn(5, 6), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get( self.datasetType.componentTypeName("columns"), dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": { "filter": "g" }}) self.assertTrue(df1.loc[:, ["g"]].equals(df3)) df4 = self.butler.get( self.datasetType, dataId={}, parameters={"columns": { "filter": ["r"], "column": "a" }}) self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]})
def testPutTemplates(self): storageClass = self.storageClassFactory.getStorageClass( "StructuredDataNoComponents") butler = Butler(self.tmpConfigFile) # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCamComp"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCamComp", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCamComp", "visit": 425, "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() # Create two almost-identical DatasetTypes (both will use default # template) dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) butler.registry.registerDatasetType( DatasetType("metric1", dimensions, storageClass)) butler.registry.registerDatasetType( DatasetType("metric2", dimensions, storageClass)) butler.registry.registerDatasetType( DatasetType("metric3", dimensions, storageClass)) dataId1 = {"instrument": "DummyCamComp", "visit": 423} dataId2 = { "instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r" } dataId3 = {"instrument": "DummyCamComp", "visit": 425} # Put with exactly the data ID keys needed ref = butler.put(metric, "metric1", dataId1) self.assertTrue( os.path.exists( os.path.join(butler.datastore.root, "ingest/metric1/DummyCamComp_423.pickle"))) # Check the template based on dimensions butler.datastore.templates.validateTemplates([ref]) # Put with extra data ID keys (physical_filter is an optional # dependency); should not change template (at least the way we're # defining them to behave now; the important thing is that they # must be consistent). ref = butler.put(metric, "metric2", dataId2) self.assertTrue( os.path.exists( os.path.join(butler.datastore.root, "ingest/metric2/DummyCamComp_423.pickle"))) # Check the template based on dimensions butler.datastore.templates.validateTemplates([ref]) # Now use a file template that will not result in unique filenames ref = butler.put(metric, "metric3", dataId1) # Check the template based on dimensions. This one is a bad template with self.assertRaises(FileTemplateValidationError): butler.datastore.templates.validateTemplates([ref]) with self.assertRaises(FileExistsError): butler.put(metric, "metric3", dataId3)
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile) # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCamComp"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCamComp", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", })
def makeDiscreteSkyMap(repo, config_file, collections, instrument, skymap_id='discrete', old_skymap_id=None): """Implements the command line interface `butler make-discrete-skymap` subcommand, should only be called by command line tools and unit test code that tests this function. Constructs a skymap from calibrated exposure in the butler repository Parameters ---------- repo : `str` URI to the location to read the repo. config_file : `str` or `None` Path to a config file that contains overrides to the skymap config. collections : `list` [`str`] An expression specifying the collections to be searched (in order) when reading datasets, and optionally dataset type restrictions on them. At least one collection must be specified. This is the collection with the calibrated exposures. instrument : `str` The name or fully-qualified class name of an instrument. skymap_id : `str`, optional The identifier of the skymap to save. Default is 'discrete'. old_skymap_id : `str`, optional The identifer of the skymap to append to. Must differ from ``skymap_id``. Ignored unless ``config.doAppend=True``. """ butler = Butler(repo, collections=collections, writeable=True) instr = getInstrument(instrument, butler.registry) config = MakeDiscreteSkyMapConfig() instr.applyConfigOverrides(MakeDiscreteSkyMapTask._DefaultName, config) if config_file is not None: config.load(config_file) # The coaddName for a SkyMap is only relevant in Gen2, and we completely # ignore it here; once Gen2 is gone it can be removed. oldSkyMap = None if config.doAppend: if old_skymap_id is None: raise ValueError( "old_skymap_id must be provided if config.doAppend is True.") dataId = {'skymap': old_skymap_id} try: oldSkyMap = butler.get(BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, collections=collections, dataId=dataId) except LookupError as e: msg = ( f"Could not find seed skymap with dataId {dataId} " f"in collections {collections} but doAppend is {config.doAppend}. Aborting..." ) raise LookupError(msg, *e.args[1:]) datasets = butler.registry.queryDatasets('calexp', collections=collections) wcs_md_tuple_list = [(butler.getDirect('calexp.metadata', ref), butler.getDirect('calexp.wcs', ref)) for ref in datasets] task = MakeDiscreteSkyMapTask(config=config) result = task.run(wcs_md_tuple_list, oldSkyMap) result.skyMap.register(skymap_id, butler) butler.put(result.skyMap, BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, dataId={'skymap': skymap_id}, run=BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
def populateButler( pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] = None) -> None: """Populate data butler with data needed for test. Initializes data butler with a bunch of items: - registers dataset types which are defined by pipeline - create dimensions data for (instrument, detector) - adds datasets based on ``datasetTypes`` dictionary, if dictionary is missing then a single dataset with type "add_dataset0" is added All datasets added to butler have ``dataId={instrument=instrument, detector=0}`` where ``instrument`` is extracted from pipeline, "INSTR" is used if pipeline is missing instrument definition. Type of the dataset is guessed from dataset type name (assumes that pipeline is made of `AddTask` tasks). Parameters ---------- pipeline : `~lsst.pipe.base.Pipeline` Pipeline instance. butler : `~lsst.daf.butler.Butler` Data butler instance. datasetTypes : `dict` [ `str`, `list` ], optional Dictionary whose keys are collection names and values are lists of dataset type names. By default a single dataset of type "add_dataset0" is added to a ``butler.run`` collection. """ # Add dataset types to registry taskDefs = list(pipeline.toExpandedPipeline()) registerDatasetTypes(butler.registry, taskDefs) instrument = pipeline.getInstrument() if instrument is not None: instrument_class = doImportType(instrument) instrumentName = instrument_class.getName() else: instrumentName = "INSTR" # Add all needed dimensions to registry butler.registry.insertDimensionData("instrument", dict(name=instrumentName)) butler.registry.insertDimensionData( "detector", dict(instrument=instrumentName, id=0, full_name="det0")) taskDefMap = dict((taskDef.label, taskDef) for taskDef in taskDefs) # Add inputs to butler if not datasetTypes: datasetTypes = {None: ["add_dataset0"]} for run, dsTypes in datasetTypes.items(): if run is not None: butler.registry.registerRun(run) for dsType in dsTypes: if dsType == "packages": # Version is intentionally inconsistent. # Dict is convertible to Packages if Packages is installed. data: Any = {"python": "9.9.99"} butler.put(data, dsType, run=run) else: if dsType.endswith("_config"): # find a confing from matching task name or make a new one taskLabel, _, _ = dsType.rpartition("_") taskDef = taskDefMap.get(taskLabel) if taskDef is not None: data = taskDef.config else: data = AddTaskConfig() elif dsType.endswith("_metadata"): data = _TASK_FULL_METADATA_TYPE() elif dsType.endswith("_log"): data = ButlerLogRecords.from_records([]) else: data = numpy.array([0.0, 1.0, 2.0, 5.0]) butler.put(data, dsType, run=run, instrument=instrumentName, detector=0)
def makeSimpleQGraph(nQuanta=5, pipeline=None, butler=None, root=None, skipExisting=False, inMemory=True, userQuery=""): """Make simple QuantumGraph for tests. Makes simple one-task pipeline with AddTask, sets up in-memory registry and butler, fills them with minimal data, and generates QuantumGraph with all of that. Parameters ---------- nQuanta : `int` Number of quanta in a graph. pipeline : `~lsst.pipe.base.Pipeline` If `None` then one-task pipeline is made with `AddTask` and default `AddTaskConfig`. butler : `~lsst.daf.butler.Butler`, optional Data butler instance, this should be an instance returned from a previous call to this method. root : `str` Path or URI to the root location of the new repository. Only used if ``butler`` is None. skipExisting : `bool`, optional If `True` (default), a Quantum is not created if all its outputs already exist. inMemory : `bool`, optional If true make in-memory repository. userQuery : `str`, optional The user query to pass to ``makeGraph``, by default an empty string. Returns ------- butler : `~lsst.daf.butler.Butler` Butler instance qgraph : `~lsst.pipe.base.QuantumGraph` Quantum graph instance """ if pipeline is None: pipeline = makeSimplePipeline(nQuanta=nQuanta) if butler is None: if root is None: raise ValueError("Must provide `root` when `butler` is None") config = Config() if not inMemory: config["registry", "db"] = f"sqlite:///{root}/gen3.sqlite" config[ "datastore", "cls"] = "lsst.daf.butler.datastores.posixDatastore.PosixDatastore" repo = butlerTests.makeTestRepo(root, {}, config=config) collection = "test" butler = Butler(butler=repo, run=collection) # Add dataset types to registry registerDatasetTypes(butler.registry, pipeline.toExpandedPipeline()) instrument = pipeline.getInstrument() if instrument is not None: if isinstance(instrument, str): instrument = doImport(instrument) instrumentName = instrument.getName() else: instrumentName = "INSTR" # Add all needed dimensions to registry butler.registry.insertDimensionData("instrument", dict(name=instrumentName)) butler.registry.insertDimensionData( "detector", dict(instrument=instrumentName, id=0, full_name="det0")) # Add inputs to butler data = numpy.array([0., 1., 2., 5.]) butler.put(data, "add_dataset0", instrument=instrumentName, detector=0) # Make the graph builder = pipeBase.GraphBuilder(registry=butler.registry, skipExisting=skipExisting) qgraph = builder.makeGraph(pipeline, collections=[butler.run], run=butler.run, userQuery=userQuery) return butler, qgraph