def testOperators(self): c1 = Config({"a": {"b": 1}, "c": 2}) c2 = c1.copy() self.assertEqual(c1, c2) self.assertIsInstance(c2, Config) c2[".a.b"] = 5 self.assertNotEqual(c1, c2)
def testSerializedString(self): """Test that we can create configs from strings""" serialized = { "yaml": """ testing: hello formatters: calexp: 3""", "json": '{"testing": "hello", "formatters": {"calexp": 3}}' } for format, string in serialized.items(): c = Config.fromString(string, format=format) self.assertEqual(c["formatters", "calexp"], 3) self.assertEqual(c["testing"], "hello") with self.assertRaises(ValueError): Config.fromString("", format="unknown") with self.assertRaises(ValueError): Config.fromString(serialized["yaml"], format="json") # This JSON can be parsed by YAML parser j = Config.fromString(serialized["json"]) y = Config.fromString(serialized["yaml"]) self.assertEqual(j["formatters", "calexp"], 3) self.assertEqual(j.toDict(), y.toDict()) # Round trip JSON -> Config -> YAML -> Config -> JSON -> Config c1 = Config.fromString(serialized["json"], format="json") yaml = c1.dump(format="yaml") c2 = Config.fromString(yaml, format="yaml") json = c2.dump(format="json") c3 = Config.fromString(json, format="json") self.assertEqual(c3.toDict(), c1.toDict())
def testIncludeConfigs(self): """Test the special includeConfigs key for pulling in additional files.""" c = Config(os.path.join(self.configDir, "configIncludes.yaml")) self.assertEqual(c["comp", "item2"], "hello") self.assertEqual(c["comp", "item50"], 5000) self.assertEqual(c["comp", "item1"], "first") self.assertEqual(c["comp", "item10"], "tenth") self.assertEqual(c["comp", "item11"], "eleventh") self.assertEqual(c["unrelated"], 1) self.assertEqual(c["addon", "comp", "item1"], "posix") self.assertEqual(c["addon", "comp", "item11"], -1) self.assertEqual(c["addon", "comp", "item50"], 500) # Now test with an environment variable in includeConfigs with modified_environment(SPECIAL_BUTLER_DIR=self.configDir3): c = Config(os.path.join(self.configDir, "configIncludesEnv.yaml")) self.assertEqual(c["comp", "item2"], "hello") self.assertEqual(c["comp", "item50"], 5000) self.assertEqual(c["comp", "item1"], "first") self.assertEqual(c["comp", "item10"], "tenth") self.assertEqual(c["comp", "item11"], "eleventh") self.assertEqual(c["unrelated"], 1) self.assertEqual(c["addon", "comp", "item1"], "envvar") self.assertEqual(c["addon", "comp", "item11"], -1) self.assertEqual(c["addon", "comp", "item50"], 501) # This will fail with modified_environment(SPECIAL_BUTLER_DIR=self.configDir2): with self.assertRaises(FileNotFoundError): Config(os.path.join(self.configDir, "configIncludesEnv.yaml"))
def setConfigRoot(cls, root, config, full, overwrite=True): """Set any filesystem-dependent config options for this Datastore to be appropriate for a new empty repository with the given root. Parameters ---------- root : `str` URI to the root of the data repository. config : `Config` A `Config` to update. Only the subset understood by this component will be updated. Will not expand defaults. full : `Config` A complete config with all defaults expanded that can be converted to a `DatastoreConfig`. Read-only and will not be modified by this method. Repository-specific options that should not be obtained from defaults when Butler instances are constructed should be copied from ``full`` to ``config``. overwrite : `bool`, optional If `False`, do not modify a value in ``config`` if the value already exists. Default is always to overwrite with the provided ``root``. Notes ----- If a keyword is explicitly defined in the supplied ``config`` it will not be overridden by this method if ``overwrite`` is `False`. This allows explicit values set in external configs to be retained. """ Config.updateParameters(DatastoreConfig, config, full, toUpdate={"root": root}, toCopy=("cls", ("records", "table")), overwrite=overwrite)
def testDict(self): """Test toDict()""" c1 = Config({"a": {"b": 1}, "c": 2}) self.assertIsInstance(c1["a"], Config) d1 = c1.toDict() self.assertIsInstance(d1["a"], dict) self.assertEqual(d1["a"], c1["a"]) # Modifying one does not change the other d1["a"]["c"] = 2 self.assertNotEqual(d1["a"], c1["a"])
def testStringInclude(self): """Using include directives in strings""" # See if include works for absolute path c = Config.fromYaml(f"something: !include {os.path.join(self.configDir, 'testconfig.yaml')}") self.assertEqual(c["something", "comp", "item3"], 3) with self.assertRaises(FileNotFoundError) as cm: Config.fromYaml("something: !include /not/here.yaml") # Test that it really was trying to open the absolute path self.assertIn("'/not/here.yaml'", str(cm.exception))
def testBadConfig(self): for badArg in ([], # Bad argument __file__, # Bad file extension for existing file ): with self.assertRaises(RuntimeError): Config(badArg) for badArg in ("file.fits", # File that does not exist with bad extension "b/c/d/", # Directory that does not exist "file.yaml", # Good extension for missing file ): with self.assertRaises(FileNotFoundError): Config(badArg)
def setUpClass(cls): """Create a new butler once only.""" cls.storageClassFactory = StorageClassFactory() cls.root = tempfile.mkdtemp(dir=TESTDIR) data_ids = { "instrument": [INSTRUMENT_NAME], "detector": [0, 1, 2, 3, 4, 5], "exposure": [11, 22], } configURI = ButlerURI("resource://spherex/configs", forceDirectory=True) butlerConfig = Config(configURI.join("butler.yaml")) # in-memory db is being phased out # butlerConfig["registry", "db"] = 'sqlite:///:memory:' cls.creatorButler = makeTestRepo( cls.root, data_ids, config=butlerConfig, dimensionConfig=configURI.join("dimensions.yaml")) for formatter in FORMATTERS: datasetTypeName, storageClassName = (formatter["dataset_type"], formatter["storage_class"]) storageClass = cls.storageClassFactory.getStorageClass( storageClassName) addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids), storageClass)
def testConfigExistence(self): c = Config(self.tmpConfigFile) uri_config = ButlerURI(c["root"]) uri_expected = ButlerURI(self.root) self.assertEqual(uri_config.geturl(), uri_expected.geturl()) self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
def testImportExport(self): # Run put/get tests just to create and populate a repo. storageClass = self.storageClassFactory.getStorageClass( "StructuredDataNoComponents") exportButler = self.runPutGetTest(storageClass, "test_metric") # Test that the repo actually has at least one dataset. datasets = list( exportButler.registry.queryDatasets(..., collections=...)) self.assertGreater(len(datasets), 0) # Export those datasets. We used TemporaryDirectory because there # doesn't seem to be a way to get the filename (as opposed to the file # object) from any of tempfile's temporary-file context managers. with tempfile.TemporaryDirectory() as exportDir: # TODO: When PosixDatastore supports transfer-on-exist, add tests # for that. exportFile = os.path.join(exportDir, "exports.yaml") with exportButler.export(filename=exportFile) as export: export.saveDatasets(datasets) self.assertTrue(os.path.exists(exportFile)) with tempfile.TemporaryDirectory() as importDir: Butler.makeRepo(importDir, config=Config(self.configFile)) importButler = Butler(importDir, run="ingest") importButler.import_(filename=exportFile, directory=exportButler.datastore.root, transfer="symlink") for ref in datasets: with self.subTest(ref=ref): # Test for existence by passing in the DatasetType and # data ID separately, to avoid lookup by dataset_id. self.assertTrue( importButler.datasetExists(ref.datasetType, ref.dataId))
def makeSimpleButler(root: str, run: str = "test", inMemory: bool = True) -> Butler: """Create new data butler instance. Parameters ---------- root : `str` Path or URI to the root location of the new repository. run : `str`, optional Run collection name. inMemory : `bool`, optional If true make in-memory repository. Returns ------- butler : `~lsst.daf.butler.Butler` Data butler instance. """ root_path = ResourcePath(root, forceDirectory=True) if not root_path.isLocal: raise ValueError(f"Only works with local root not {root_path}") config = Config() if not inMemory: config["registry", "db"] = f"sqlite:///{root_path.ospath}/gen3.sqlite" config[ "datastore", "cls"] = "lsst.daf.butler.datastores.fileDatastore.FileDatastore" repo = butlerTests.makeTestRepo(str(root_path), {}, config=config) butler = Butler(butler=repo, run=run) return butler
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = self.root2 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = os.path.join(self.root2, "different.yaml") Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) # Make a new repository in one place self.dir1 = os.path.join(self.root, "dir1") Butler.makeRepo(self.dir1, config=Config(self.configFile)) # Move the yaml file to a different place and add a "root" self.dir2 = os.path.join(self.root, "dir2") safeMakeDir(self.dir2) configFile1 = os.path.join(self.dir1, "butler.yaml") config = Config(configFile1) config["root"] = self.dir1 configFile2 = os.path.join(self.dir2, "butler2.yaml") config.dumpToFile(configFile2) os.remove(configFile1) self.tmpConfigFile = configFile2
def testDump(self): """Test that we can write and read a configuration.""" c = Config({"1": 2, "3": 4, "key3": 6, "dict": {"a": 1, "b": 2}}) outpath = os.path.join(self.tmpdir, "test.yaml") c.dumpToUri(outpath) c2 = Config(outpath) self.assertEqual(c2, c) c.dumpToUri(outpath, overwrite=True) with self.assertRaises(FileExistsError): c.dumpToUri(outpath, overwrite=False)
def setUp(self): self.root = tempfile.mkdtemp(dir=TESTDIR) self.root2 = tempfile.mkdtemp(dir=TESTDIR) self.tmpConfigFile = ButlerURI( os.path.join(self.root2, "something.yaml")).geturl() Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
def setUp(self): """Create a new butler root for each test.""" if self.useTempRoot: self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root, config=Config(self.configFile)) self.tmpConfigFile = os.path.join(self.root, "butler.yaml") else: self.root = None self.tmpConfigFile = self.configFile
def make_butler(self) -> Butler: """Return new Butler instance on each call.""" config = Config() config["root"] = self.root config["registry", "db"] = f"sqlite:///{self.root}/gen3.sqlite3" butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True) DatastoreMock.apply(butler) return butler
def testStringYaml(self): """Test that we can create configs from strings""" c = Config.fromYaml(""" testing: hello formatters: calexp: 3""") self.assertEqual(c["formatters", "calexp"], 3) self.assertEqual(c["testing"], "hello")
def testMerge(self): c1 = Config({"a": 1, "c": 3}) c2 = Config({"a": 4, "b": 2}) c1.merge(c2) self.assertEqual(c1, {"a": 1, "b": 2, "c": 3}) # Check that c2 was not changed self.assertEqual(c2, {"a": 4, "b": 2}) # Repeat with a simple dict c1.merge({"b": 5, "d": 42}) self.assertEqual(c1, {"a": 1, "b": 2, "c": 3, "d": 42}) with self.assertRaises(TypeError): c1.merge([1, 2, 3])
def testPickle(self): """Test pickle support. """ self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root, config=Config(self.configFile)) self.tmpConfigFile = os.path.join(self.root, "butler.yaml") butler = Butler(self.tmpConfigFile, run="ingest") butlerOut = pickle.loads(pickle.dumps(butler)) self.assertIsInstance(butlerOut, Butler) self.assertEqual(butlerOut.config, butler.config)
def testMakeRepo(self): """Test that we can write butler configuration to a new repository via the Butler.makeRepo interface and then instantiate a butler from the repo root. """ # Do not run the test if we know this datastore configuration does # not support a file system root if self.fullConfigKey is None: return # Remove the file created in setUp os.unlink(self.tmpConfigFile) butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) limited = Config(self.configFile) butler1 = Butler(butlerConfig) butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, config=Config(self.configFile)) full = Config(self.tmpConfigFile) butler2 = Butler(butlerConfig) # Butlers should have the same configuration regardless of whether # defaults were expanded. self.assertEqual(butler1._config, butler2._config) # Config files loaded directly should not be the same. self.assertNotEqual(limited, full) # Make sure "limited" doesn't have a few keys we know it should be # inheriting from defaults. self.assertIn(self.fullConfigKey, full) self.assertNotIn(self.fullConfigKey, limited) # Collections don't appear until something is put in them collections1 = butler1.registry.getAllCollections() self.assertEqual(collections1, set()) self.assertEqual(butler2.registry.getAllCollections(), collections1) # Check that a config with no associated file name will not # work properly with relocatable Butler repo butlerConfig.configFile = None with self.assertRaises(ValueError): Butler(butlerConfig)
def testEscape(self): c = Config({"a": {"foo.bar": 1}, "b😂c": {"bar_baz": 2}}) self.assertEqual(c[r".a.foo\.bar"], 1) self.assertEqual(c[":a:foo.bar"], 1) self.assertEqual(c[".b😂c.bar_baz"], 2) self.assertEqual(c[r"😂b\😂c😂bar_baz"], 2) self.assertEqual(c[r"\a\foo.bar"], 1) self.assertEqual(c["\ra\rfoo.bar"], 1) with self.assertRaises(ValueError): c[".a.foo\\.bar\r"]
def read_server_config() -> Mapping: """Return the butler configuration that the client should use.""" config_str = f""" datastore: root: {BUTLER_ROOT} registry: cls: lsst.daf.butler.registries.remote.RemoteRegistry db: <butlerRoot> """ config = Config.fromString(config_str, format="yaml") return config
def testResource(self): c = Config("resource://lsst.daf.butler/configs/datastore.yaml") self.assertIn("datastore", c) # Test that we can include a resource URI yaml = """ toplevel: true resource: !include resource://lsst.daf.butler/configs/datastore.yaml """ c = Config.fromYaml(yaml) self.assertIn(("resource", "datastore", "cls"), c) # Test that we can include a resource URI with includeConfigs yaml = """ toplevel: true resource: includeConfigs: resource://lsst.daf.butler/configs/datastore.yaml """ c = Config.fromYaml(yaml) self.assertIn(("resource", "datastore", "cls"), c)
def testUpdate(self): c = Config({"a": {"b": 1}}) c.update({"a": {"c": 2}}) self.assertEqual(c[".a.b"], 1) self.assertEqual(c[".a.c"], 2) c.update({"a": {"d": [3, 4]}}) self.assertEqual(c[".a.d.0"], 3) c.update({"z": [5, 6, {"g": 2, "h": 3}]}) self.assertEqual(c[".z.1"], 6) # This is detached from parent c2 = c[".z.2"] self.assertEqual(c2["g"], 2) c2.update({"h": 4, "j": 5}) self.assertEqual(c2["h"], 4) self.assertNotIn(".z.2.j", c) self.assertNotEqual(c[".z.2.h"], 4) with self.assertRaises(RuntimeError): c.update([1, 2, 3])
def _setupNewButler(butler: Butler, outputLocation: ResourcePath, dirExists: bool) -> Butler: # Set up the new butler object at the specified location if dirExists: # Remove the existing table, if the code got this far and this exists # clobber must be true executionRegistry = outputLocation.join("gen3.sqlite3") if executionRegistry.exists(): executionRegistry.remove() else: outputLocation.mkdir() # Copy the existing butler config, modifying the location of the # registry to the specified location. # Preserve the root path from the existing butler so things like # file data stores continue to look at the old location. config = Config(butler._config) config["root"] = outputLocation.geturl() config["allow_put_of_predefined_dataset"] = True config["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" # Remove any namespace that may be set in main registry. config.pop(("registry", "namespace"), None) # record the current root of the datastore if it is specified relative # to the butler root if config.get(("datastore", "root")) == BUTLER_ROOT_TAG and butler._config.configDir is not None: config["datastore", "root"] = butler._config.configDir.geturl() config["datastore", "trust_get_request"] = True # Requires that we use the dimension configuration from the original # butler and not use the defaults. config = Butler.makeRepo( root=outputLocation, config=config, dimensionConfig=butler.registry.dimensions.dimensionConfig, overwrite=True, forceConfigRoot=False, ) # Return a newly created butler return Butler(config, writeable=True)
def testInclude(self): """Read a config that has an include directive""" c = Config(os.path.join(self.configDir, "testinclude.yaml")) self.assertEqual(c[".comp1.item1"], 58) self.assertEqual(c[".comp2.comp.item1"], 1) self.assertEqual(c[".comp3.1.comp.item1"], "posix") self.assertEqual(c[".comp4.0.comp.item1"], "posix") self.assertEqual(c[".comp4.1.comp.item1"], 1) self.assertEqual(c[".comp5.comp6.comp.item1"], "posix") # Test a specific name and then test that all # returned names are "in" the config. names = c.names() self.assertIn(c._D.join(("", "comp3", "1", "comp", "item1")), names) for n in names: self.assertIn(n, c) # Test that override delimiter works delimiter = "-" names = c.names(delimiter=delimiter) self.assertIn(delimiter.join(("", "comp3", "1", "comp", "item1")), names)
def obscore_export( repo: str, destination: str, config: str, format: str, where: Optional[str], collections: Iterable[str], dataset_type: Iterable[str], ) -> None: """Export Butler datasets as ObsCore Data Model in parquet format. Parameters ---------- repo : `str` URI to the butler repository. destination : `str` Location of the output file. config : `str` Location of the configuration file. format : `str` Output format, 'csv' or 'parquet' where : `str` Optional user expression, if provided overrides one in ``config``. collections : `iterable` [ `str` ] Optional collection names, if provided overrides one in ``config``. """ butler = Butler(repo, writeable=False) config_data = Config(config) cfg = ExporterConfig.parse_obj(config_data) if where: cfg.where = where if collections: cfg.collections = list(collections) if dataset_type: dataset_type_set = set(dataset_type) # Check that configuration has all requested dataset types. if not dataset_type_set.issubset(cfg.dataset_types): extras = dataset_type_set - set(cfg.dataset_types) raise ValueError(f"Dataset types {extras} are not defined in configuration file.") # Remove dataset types that are not needed. cfg.dataset_types = { key: value for key, value in cfg.dataset_types.items() if key in dataset_type_set } exporter = ObscoreExporter(butler, cfg) if format == "parquet": exporter.to_parquet(destination) elif format == "csv": exporter.to_csv(destination) else: raise ValueError(f"Unexpected output format {format:r}")
def _makeArgs(registryConfig=None, **kwargs): """Return parsed command line arguments. By default butler_config is set to `Config` populated with some defaults, it can be overridden completely by keyword argument. Parameters ---------- cmd : `str`, optional Produce arguments for this pipetask command. registryConfig : `RegistryConfig`, optional Override for registry configuration. **kwargs Overrides for other arguments. """ # Use a mock to get the default value of arguments to 'run'. mock = unittest.mock.Mock() @click.command(cls=PipetaskCommand) @run_options() def fake_run(ctx, **kwargs): """Fake "pipetask run" command for gathering input arguments. The arguments & options should always match the arguments & options in the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. """ mock(**kwargs) runner = click.testing.CliRunner() result = runner.invoke(fake_run) if result.exit_code != 0: raise RuntimeError( f"Failure getting default args from 'fake_run': {result}") mock.assert_called_once() args = mock.call_args[1] args["enableLsstDebug"] = args.pop("debug") if "pipeline_actions" not in args: args["pipeline_actions"] = [] args = SimpleNamespace(**args) # override butler_config with our defaults args.butler_config = Config() if registryConfig: args.butler_config["registry"] = registryConfig # The default datastore has a relocatable root, so we need to specify # some root here for it to use args.butler_config.configFile = "." # override arguments from keyword parameters for key, value in kwargs.items(): setattr(args, key, value) return args