def run(root: str, *, tracts: List[int], filters: List[str], create: bool = False, clobber: bool = False, continue_: bool = False, reruns: List[Rerun]): if create: if continue_: raise ValueError("Cannot continue if create is True.") if os.path.exists(root): if clobber: shutil.rmtree(root) else: raise ValueError("Repo exists and --clobber=False.") Butler.makeRepo(root) if reruns and set(filters) != set("grizy"): raise ValueError( "All filters must be included if reruns are converted.") butler = Butler(root, run="HSC/raw/all") task = makeTask(butler, continue_=continue_, reruns=reruns) task.run(root=GEN2_RAW_ROOT, reruns=reruns, calibs=([CalibRepo(path="CALIB", labels=("gen2", "defaults"))] if not continue_ else []), visits=makeVisitList(tracts, filters)) if not continue_: task.log.info("Ingesting y-band stray light data.") task.instrument.ingestStrayLightData(Butler(root, writeable=True), directory=os.path.join( GEN2_RAW_ROOT, "CALIB", "STRAY_LIGHT"), transfer=task.config.transfer, labels=("gen2", "defaults"))
def testImportExport(self): # Run put/get tests just to create and populate a repo. storageClass = self.storageClassFactory.getStorageClass( "StructuredDataNoComponents") exportButler = self.runPutGetTest(storageClass, "test_metric") # Test that the repo actually has at least one dataset. datasets = list( exportButler.registry.queryDatasets(..., collections=...)) self.assertGreater(len(datasets), 0) # Export those datasets. We used TemporaryDirectory because there # doesn't seem to be a way to get the filename (as opposed to the file # object) from any of tempfile's temporary-file context managers. with tempfile.TemporaryDirectory() as exportDir: # TODO: When PosixDatastore supports transfer-on-exist, add tests # for that. exportFile = os.path.join(exportDir, "exports.yaml") with exportButler.export(filename=exportFile) as export: export.saveDatasets(datasets) self.assertTrue(os.path.exists(exportFile)) with tempfile.TemporaryDirectory() as importDir: Butler.makeRepo(importDir, config=Config(self.configFile)) importButler = Butler(importDir, run="ingest") importButler.import_(filename=exportFile, directory=exportButler.datastore.root, transfer="symlink") for ref in datasets: with self.subTest(ref=ref): # Test for existence by passing in the DatasetType and # data ID separately, to avoid lookup by dataset_id. self.assertTrue( importButler.datasetExists(ref.datasetType, ref.dataId))
def convert_new(self, delete: bool = False): """Create or empty a gen3 a repo and convert a gen2 repo Parameters ---------- delete : bool True if you really want to delete the old gen3 registry and data store Returns ------- task: ConvertRepoTask the completed task object """ if delete: self._delete_gen3_repo(self.registry_connection_uri, self.registry_namespace) if os.path.exists(self.gen3_root): raise ValueError("Cannot create because repo dir exists.") Butler.makeRepo(self.gen3_root, config=self.butler_seed) task = self.run_conversion_task() if self.convert_skymap: try: self._put_skymap() except ConflictingDefinitionError: msg = "Cannot add skyMap; " msg += "maybe it already exists from gen2 conversion?" lsst.log.info(msg) raise return task
def make_test_butler(root, data_ids): """Create an empty repository with default configuration. Parameters ---------- root : `str` The location of the root directory for the repository. data_ids : `dict` [`str`, `iterable` [`dict`]] A dictionary keyed by the dimensions used in the test. Each value is a dictionary of fields and values for that dimension. See :file:`daf/butler/config/dimensions.yaml` for required fields, listed as "keys" and "requires" under each dimension's entry. Returns ------- butler : `lsst.daf.butler.Butler` A Butler referring to the new repository. """ # TODO: takes 5 seconds to run; split up into class-level Butler # with test-level runs after DM-21246 Butler.makeRepo(root) butler = Butler(root, run="test") for dimension, values in data_ids.items(): butler.registry.insertDimensionData(dimension, *values) return butler
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = os.path.join(self.root2, "different.yaml") Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = self.root2 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) ints = [1, 2, 3] names = ['one', 'two', 'three'] transcendentals = [3.14, 2.718, 0.643] self.table = Table([ints, names, transcendentals], names=['ints', 'names', 'transcendentals'])
def setUp(self): """Create a new butler root for each test.""" if self.useTempRoot: self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root, config=Config(self.configFile)) self.tmpConfigFile = os.path.join(self.root, "butler.yaml") else: self.root = None self.tmpConfigFile = self.configFile
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = ButlerURI( os.path.join(self.root2, "something.yaml")).geturl() Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) self.hspMap = hsp.HealSparseMap.make_empty(nside_coverage=32, nside_sparse=4096, dtype=np.float32) self.hspMap[0:10000] = 1.0 self.hspMap[100000:110000] = 2.0 self.hspMap[500000:510000] = 3.0
def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType("data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType)
def testPickle(self): """Test pickle support. """ self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root, config=Config(self.configFile)) self.tmpConfigFile = os.path.join(self.root, "butler.yaml") butler = Butler(self.tmpConfigFile, run="ingest") butlerOut = pickle.loads(pickle.dumps(butler)) self.assertIsInstance(butlerOut, Butler) self.assertEqual(butlerOut.config, butler.config)
def setUp(self): # Use a temporary working directory self.root = tempfile.mkdtemp(dir=self.ingestDir) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="raw") # Register the instrument and its static metadata self.instrument.register(self.butler.registry) # Make a default config for test methods to play with self.config = self.RawIngestTask.ConfigClass() self.config.instrument = \ f"{self.instrument.__class__.__module__}.{self.instrument.__class__.__name__}"
def setUp(self): # Use a temporary working directory self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="raw") # Register the instrument and its static metadata HyperSuprimeCam().register(self.butler.registry) # Make a default config for test methods to play with self.config = RawIngestTask.ConfigClass() self.config.onError = "break" self.file = os.path.join(testDataDirectory, "hsc", "raw", "HSCA90402512.fits.gz") self.dataId = dict(instrument="HSC", exposure=904024, detector=50)
def testPut(self): with lsst.utils.tests.temporaryDirectory() as root: Butler3.makeRepo(root) butler3 = Butler3(root, run="three") butler3.registry.registerDatasetType( DatasetType("cat", ["htm7"], "SourceCatalog", universe=butler3.registry.dimensions) ) butlerShim = ShimButler(butler3) catIn = SourceCatalog(SourceCatalog.Table.makeMinimalSchema()) catIn.addNew().set("id", 42) butlerShim.put(catIn, "cat", htm7=131072) catOut = butlerShim.get("cat", htm7=131072) self.assertEqual(list(catIn["id"]), list(catOut["id"]))
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) # Make a new repository in one place self.dir1 = os.path.join(self.root, "dir1") Butler.makeRepo(self.dir1, config=Config(self.configFile)) # Move the yaml file to a different place and add a "root" self.dir2 = os.path.join(self.root, "dir2") safeMakeDir(self.dir2) configFile1 = os.path.join(self.dir1, "butler.yaml") config = Config(configFile1) config["root"] = self.dir1 configFile2 = os.path.join(self.dir2, "butler2.yaml") config.dumpToFile(configFile2) os.remove(configFile1) self.tmpConfigFile = configFile2
def testPut(self): with TemporaryDirectory(dir=TESTDIR) as root: Butler3.makeRepo(root) butler3 = Butler3(root, run="three") butler3.registry.registerDatasetType( DatasetType("cat", ["label"], "SourceCatalog", universe=butler3.registry.dimensions) ) butlerShim = ShimButler(butler3) catIn = SourceCatalog(SourceCatalog.Table.makeMinimalSchema()) catIn.addNew().set("id", 42) butlerShim.put(catIn, "cat", label="four") catOut = butlerShim.get("cat", label="four") self.assertEqual(list(catIn["id"]), list(catOut["id"])) # Without this the temporary directory can not be removed # if on NFS because these objects have open SQLite registries. del butler3 del butlerShim
def make_butler(self) -> Butler: """Return new Butler instance on each call.""" config = Config() config["root"] = self.root config["registry", "db"] = f"sqlite:///{self.root}/gen3.sqlite3" butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True) DatastoreMock.apply(butler) return butler
def testMakeRepo(self): """Test that we can write butler configuration to a new repository via the Butler.makeRepo interface and then instantiate a butler from the repo root. """ # Do not run the test if we know this datastore configuration does # not support a file system root if self.fullConfigKey is None: return # Remove the file created in setUp os.unlink(self.tmpConfigFile) butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) limited = Config(self.configFile) butler1 = Butler(butlerConfig) butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, config=Config(self.configFile)) full = Config(self.tmpConfigFile) butler2 = Butler(butlerConfig) # Butlers should have the same configuration regardless of whether # defaults were expanded. self.assertEqual(butler1._config, butler2._config) # Config files loaded directly should not be the same. self.assertNotEqual(limited, full) # Make sure "limited" doesn't have a few keys we know it should be # inheriting from defaults. self.assertIn(self.fullConfigKey, full) self.assertNotIn(self.fullConfigKey, limited) # Collections don't appear until something is put in them collections1 = butler1.registry.getAllCollections() self.assertEqual(collections1, set()) self.assertEqual(butler2.registry.getAllCollections(), collections1) # Check that a config with no associated file name will not # work properly with relocatable Butler repo butlerConfig.configFile = None with self.assertRaises(ValueError): Butler(butlerConfig)
def testGetCollections(self): run = "ingest/run" tag = "ingest" expected = {"collections": [run, tag]} runner = LogCliRunner() with runner.isolated_filesystem(): butlerCfg = Butler.makeRepo("here") # the purpose of this call is to create some collections _ = Butler(butlerCfg, run=run, tags=[tag], collections=[tag]) result = runner.invoke(cli, ["query-collections", "here"]) self.assertEqual(expected, yaml.safe_load(result.output))
def testQueryDatasetTypes(self): self.maxDiff = None datasetName = "test" instrumentDimension = "instrument" visitDimension = "visit" storageClassName = "testDatasetType" expectedNotVerbose = AstropyTable((("test", ), ), names=("name", )) runner = LogCliRunner() with runner.isolated_filesystem(): butlerCfg = Butler.makeRepo("here") butler = Butler(butlerCfg, writeable=True) storageClass = StorageClass(storageClassName) butler.registry.storageClasses.registerStorageClass(storageClass) dimensions = butler.registry.dimensions.extract( (instrumentDimension, visitDimension)) datasetType = DatasetType(datasetName, dimensions, storageClass) butler.registry.registerDatasetType(datasetType) # check not-verbose output: result = runner.invoke(cli, ["query-dataset-types", "here"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) self.assertAstropyTablesEqual(readTable(result.output), expectedNotVerbose) # check glob output: result = runner.invoke(cli, ["query-dataset-types", "here", "t*"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) self.assertAstropyTablesEqual(readTable(result.output), expectedNotVerbose) # check verbose output: result = runner.invoke( cli, ["query-dataset-types", "here", "--verbose"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = AstropyTable(array(( "test", "['band', 'instrument', 'physical_filter', 'visit_system', 'visit']", "testDatasetType")), names=("name", "dimensions", "storage class")) self.assertAstropyTablesEqual(readTable(result.output), expected) # Now remove and check that it was removed # First a non-existent one result = runner.invoke(cli, ["remove-dataset-type", "here", "unreal"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) # Now one we now has been registered result = runner.invoke( cli, ["remove-dataset-type", "here", datasetName]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) # and check that it has gone result = runner.invoke(cli, ["query-dataset-types", "here"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) self.assertIn("No results", result.output)
def makeButlerRepo(root, config=None, standalone=False, override=False, outfile=None): """Make a new Butler repository. Parameters ---------- root : `str` Location to seed a butler repository. config : `str`, optional Configuration to seed the repository. standalone : `bool`, optional If `True` a fully expanded configuration will be written. override : `bool`, optional If `True` a root provided in the supplied config will not be overwritten. outfile : `str`, optional Path to output configuration. This can be left `None` if the configuration is to be written to ``root``. """ forceConfigRoot = not override config = Config(config) if config is not None else None Butler.makeRepo(root, config=config, standalone=standalone, forceConfigRoot=forceConfigRoot, outfile=outfile)
def setUp(self): config = Config(self.configFile) uri = ButlerURI(config[".datastore.datastore.root"]) self.bucketName = uri.netloc if self.useTempRoot: self.root = self.genRoot() rooturi = f"s3://{self.bucketName}/{self.root}" config.update({"datastore": {"datastore": {"root": rooturi}}}) # set up some fake credentials if they do not exist self.usingDummyCredentials = setAwsEnvCredentials() # MOTO needs to know that we expect Bucket bucketname to exist # (this used to be the class attribute bucketName) s3 = boto3.resource("s3") s3.create_bucket(Bucket=self.bucketName) self.datastoreStr = f"datastore={self.root}" self.datastoreName = [f"S3Datastore@{rooturi}"] Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) self.tmpConfigFile = os.path.join(rooturi, "butler.yaml")
def setUp(self): np.random.seed(12345) self.datasets = ['forced_src', 'meas', 'ref'] self.bands = ['g', 'r'] self.columns = ['coord_ra', 'coord_dec'] self.nRecords = 5 self.dataDict = { "coord_ra": [3.77654137, 3.77643059, 3.77621148, 3.77611944, 3.77610396], "coord_dec": [0.01127624, 0.01127787, 0.01127543, 0.01127543, 0.01127543] } # Set up butler self.root = tempfile.mkdtemp(dir=ROOT) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") self.datasetType = DatasetType( "data", dimensions=('htm7', ), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType)
def _setupNewButler(butler: Butler, outputLocation: ResourcePath, dirExists: bool) -> Butler: # Set up the new butler object at the specified location if dirExists: # Remove the existing table, if the code got this far and this exists # clobber must be true executionRegistry = outputLocation.join("gen3.sqlite3") if executionRegistry.exists(): executionRegistry.remove() else: outputLocation.mkdir() # Copy the existing butler config, modifying the location of the # registry to the specified location. # Preserve the root path from the existing butler so things like # file data stores continue to look at the old location. config = Config(butler._config) config["root"] = outputLocation.geturl() config["allow_put_of_predefined_dataset"] = True config["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" # Remove any namespace that may be set in main registry. config.pop(("registry", "namespace"), None) # record the current root of the datastore if it is specified relative # to the butler root if config.get(("datastore", "root")) == BUTLER_ROOT_TAG and butler._config.configDir is not None: config["datastore", "root"] = butler._config.configDir.geturl() config["datastore", "trust_get_request"] = True # Requires that we use the dimension configuration from the original # butler and not use the defaults. config = Butler.makeRepo( root=outputLocation, config=config, dimensionConfig=butler.registry.dimensions.dimensionConfig, overwrite=True, forceConfigRoot=False, ) # Return a newly created butler return Butler(config, writeable=True)
def _make_repo_with_instruments(repo_dir, instruments): """Create a repository and populate it with instrument registrations from an existing repository. Parameters ---------- repo_dir : `str` The directory in which to create the new repository. instrument : iterable [`lsst.obs.base.Instrument`] The instruments to register in the new repository. Returns ------- butler : `lsst.daf.butler.Butler` A writeable Butler to the new repo. """ config = Butler.makeRepo(repo_dir) repo = Butler(config, writeable=True) for instrument in instruments: instrument.register(repo.registry) return repo
def testQueryDatasetTypes(self): self.maxDiff = None datasetName = "test" instrumentDimension = "instrument" visitDimension = "visit" storageClassName = "testDatasetType" expectedNotVerbose = {"datasetTypes": [datasetName]} runner = LogCliRunner() with runner.isolated_filesystem(): butlerCfg = Butler.makeRepo("here") butler = Butler(butlerCfg, writeable=True) storageClass = StorageClass(storageClassName) butler.registry.storageClasses.registerStorageClass(storageClass) dimensions = butler.registry.dimensions.extract( (instrumentDimension, visitDimension)) datasetType = DatasetType(datasetName, dimensions, storageClass) butler.registry.registerDatasetType(datasetType) # check not-verbose output: result = runner.invoke(cli, ["query-dataset-types", "here"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output)) # check glob output: result = runner.invoke(cli, ["query-dataset-types", "here", "t*"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output)) # check verbose output: result = runner.invoke( cli, ["query-dataset-types", "here", "--verbose"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) response = yaml.safe_load(result.output) # output dimension names contain all required dimensions, more than # the registered dimensions, so verify the expected components # individually. self.assertEqual(response["datasetTypes"][0]["name"], datasetName) self.assertEqual(response["datasetTypes"][0]["storageClass"], storageClassName) self.assertIn(instrumentDimension, response["datasetTypes"][0]["dimensions"]) self.assertIn(visitDimension, response["datasetTypes"][0]["dimensions"])
def testGetCollections(self): run = "ingest/run" tag = "tag" with self.runner.isolated_filesystem(): butlerCfg = Butler.makeRepo("here") # the purpose of this call is to create some collections butler = Butler(butlerCfg, run=run, collections=[tag], writeable=True) butler.registry.registerCollection(tag, CollectionType.TAGGED) # Verify collections that were created are found by # query-collections. result = self.runner.invoke(cli, ["query-collections", "here"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table((("ingest/run", "tag"), ("RUN", "TAGGED")), names=("Name", "Type")) self.assertAstropyTablesEqual(readTable(result.output), expected) # Verify that with a glob argument, that only collections whose # name matches with the specified pattern are returned. result = self.runner.invoke(cli, ["query-collections", "here", "t*"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table((("tag", ), ("TAGGED", )), names=("Name", "Type")) self.assertAstropyTablesEqual(readTable(result.output), expected) # Verify that with a collection type argument, only collections of # that type are returned. result = self.runner.invoke( cli, ["query-collections", "here", "--collection-type", "RUN"]) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) expected = Table((("ingest/run", ), ("RUN", )), names=("Name", "Type")) self.assertAstropyTablesEqual(readTable(result.output), expected)
def setUp(self): self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) # Create a random image for testing self.rng = Random(self.RANDOM_SEED)
parser.add_argument( "--outfile", "-f", default=None, type=str, help="Name of output file to receive repository configuration." " Default is to write butler.yaml into the specified root.") parser.add_argument("--verbose", "-v", action="store_true", help="Turn on debug reporting.") parser.add_argument( "--override", "-o", action="store_true", help= "Allow values in the supplied config to override any root settings.") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) forceConfigRoot = not args.override config = Config(args.config) if args.config is not None else None Butler.makeRepo(args.root, config=config, standalone=args.standalone, forceConfigRoot=forceConfigRoot, outfile=args.outfile)