def testImportExport(self): # Run put/get tests just to create and populate a repo. storageClass = self.storageClassFactory.getStorageClass( "StructuredDataNoComponents") exportButler = self.runPutGetTest(storageClass, "test_metric") # Test that the repo actually has at least one dataset. datasets = list( exportButler.registry.queryDatasets(..., collections=...)) self.assertGreater(len(datasets), 0) # Export those datasets. We used TemporaryDirectory because there # doesn't seem to be a way to get the filename (as opposed to the file # object) from any of tempfile's temporary-file context managers. with tempfile.TemporaryDirectory() as exportDir: # TODO: When PosixDatastore supports transfer-on-exist, add tests # for that. exportFile = os.path.join(exportDir, "exports.yaml") with exportButler.export(filename=exportFile) as export: export.saveDatasets(datasets) self.assertTrue(os.path.exists(exportFile)) with tempfile.TemporaryDirectory() as importDir: Butler.makeRepo(importDir, config=Config(self.configFile)) importButler = Butler(importDir, run="ingest") importButler.import_(filename=exportFile, directory=exportButler.datastore.root, transfer="symlink") for ref in datasets: with self.subTest(ref=ref): # Test for existence by passing in the DatasetType and # data ID separately, to avoid lookup by dataset_id. self.assertTrue( importButler.datasetExists(ref.datasetType, ref.dataId))
def _import( yamlBuffer: io.StringIO, newButler: Butler, inserts: DataSetTypeMap, run: Optional[str], butlerModifier: Optional[Callable[[Butler], Butler]], ) -> Butler: # This method takes the exports from the existing butler, imports # them into the newly created butler, and then inserts the datasets # that are expected to be produced. # import the existing datasets using "split" mode. "split" is safe # because execution butler is assumed to be able to see all the file # locations that the main datastore can see. "split" supports some # absolute URIs in the datastore. newButler.import_(filename=yamlBuffer, format="yaml", reuseIds=True, transfer="split") # If there is modifier callable, run it to make necessary updates # to the new butler. if butlerModifier is not None: newButler = butlerModifier(newButler) # Register datasets to be produced and insert them into the registry for dsType, dataIds in inserts.items(): # There may be inconsistencies with storage class definitions # so those differences must be checked. try: newButler.registry.registerDatasetType(dsType) except ConflictingDefinitionError: # We do not at this point know whether the dataset type is # an intermediate (and so must be able to support conversion # from the registry storage class to an input) or solely an output # dataset type. Test both compatibilities. registryDsType = newButler.registry.getDatasetType(dsType.name) if registryDsType.is_compatible_with(dsType) and dsType.is_compatible_with(registryDsType): # Ensure that we use the registry type when inserting. dsType = registryDsType else: # Not compatible so re-raise the original exception. raise newButler.registry.insertDatasets(dsType, dataIds, run) return newButler
#!/usr/bin/env python from lsst.daf.butler import Butler, CollectionType import shutil import os for filename in ("butler.yaml", "gen3.sqlite3"): if os.path.exists(os.path.join("DATA", filename)): os.remove(os.path.join("DATA", filename)) Butler.makeRepo("DATA") butler = Butler("DATA", writeable=True) butler.import_(filename="DATA/ci_hsc.yaml", transfer=None) butler.registry.registerCollection("HSC/defaults", type=CollectionType.CHAINED) butler.registry.setCollectionChain( "HSC/defaults", [ "HSC/raw/all", "HSC/calib", "HSC/masks", "refcats", "skymaps" ] )
def testChained(self): with self.runner.isolated_filesystem(): # Create a butler and add some chained collections: butlerCfg = Butler.makeRepo("here") butler1 = Butler(butlerCfg, writeable=True) # Replace datastore functions with mocks: DatastoreMock.apply(butler1) butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) registry1 = butler1.registry registry1.registerRun("run1") registry1.registerCollection("tag1", CollectionType.TAGGED) registry1.registerCollection("calibration1", CollectionType.CALIBRATION) registry1.registerCollection("chain1", CollectionType.CHAINED) registry1.registerCollection("chain2", CollectionType.CHAINED) registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) registry1.setCollectionChain("chain2", ["calibration1", "run1"]) # Use the script function to test the query-collections TREE # option, because the astropy.table.Table.read method, which we are # using for verification elsewhere in this file, seems to strip # leading whitespace from columns. This makes it impossible to test # the nested TREE output of the query-collections subcommand from # the command line interface. table = queryCollections("here", glob=(), collection_type=CollectionType.all(), chains="TREE") # self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table(array( (("imported_g", "RUN"), ("imported_r", "RUN"), ("run1", "RUN"), ("tag1", "TAGGED"), ("calibration1", "CALIBRATION"), ("chain1", "CHAINED"), (" tag1", "TAGGED"), (" run1", "RUN"), (" chain2", "CHAINED"), (" calibration1", "CALIBRATION"), (" run1", "RUN"), ("chain2", "CHAINED"), (" calibration1", "CALIBRATION"), (" run1", "RUN"))), names=("Name", "Type")) self.assertAstropyTablesEqual(table, expected) result = self.runner.invoke(cli, ["query-collections", "here"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table(array( (("imported_g", "RUN", ""), ("imported_r", "RUN", ""), ("run1", "RUN", ""), ("tag1", "TAGGED", ""), ("calibration1", "CALIBRATION", ""), ("chain1", "CHAINED", "[tag1, run1, chain2]"), ("chain2", "CHAINED", "[calibration1, run1]"))), names=("Name", "Type", "Definition")) table = readTable(result.output) self.assertAstropyTablesEqual(readTable(result.output), expected) result = self.runner.invoke( cli, ["query-collections", "here", "--chains", "FLATTEN"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table(array( (("imported_g", "RUN"), ("imported_r", "RUN"), ("run1", "RUN"), ("tag1", "TAGGED"), ("calibration1", "CALIBRATION"), ("tag1", "TAGGED"), ("run1", "RUN"), ("calibration1", "CALIBRATION"), ("calibration1", "CALIBRATION"), ("run1", "RUN"))), names=("Name", "Type")) self.assertAstropyTablesEqual(readTable(result.output), expected)