def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([ self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # The test after this will not work if we don't have local file self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}") with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testHealSparseMapFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("map", [], "HealSparseMap", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.hspMap, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.hsp') # Retrieve the full map. hspMap = butler.get('map') self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map)) # Retrieve the coverage map coverage = butler.get('map.coverage') self.assertTrue( np.all(coverage.coverage_mask == self.hspMap.coverage_mask)) # Retrieve a partial map pixels = [0, 6] partialMap = butler.get('map', parameters={'pixels': pixels}) self.assertTrue( np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels))) self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000])) self.assertTrue( np.all(partialMap[100000:110000] == self.hspMap[100000:110000])) # Retrieve a degraded map degradedMapRead = butler.get('map', parameters={'degrade_nside': 512}) degradedMap = self.hspMap.degrade(512) self.assertTrue( np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # Following test needs a local file with uri.as_local() as local: with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue( filecmp.cmp( local.ospath, file.name, shallow=True ) ) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testTransaction(self): butler = Butler(self.tmpConfigFile, run="ingest") datasetTypeName = "test_metric" dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) dimensionEntries = (("instrument", { "instrument": "DummyCam" }), ("physical_filter", { "instrument": "DummyCam", "name": "d-r", "abstract_filter": "R" }), ("visit", { "instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r" })) storageClass = self.storageClassFactory.getStorageClass( "StructuredData") metric = makeExampleMetrics() dataId = {"instrument": "DummyCam", "visit": 42} with self.assertRaises(TransactionTestError): with butler.transaction(): # Create and register a DatasetType datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions for args in dimensionEntries: butler.registry.insertDimensionData(*args) # Store a dataset ref = butler.put(metric, datasetTypeName, dataId) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(datasetTypeName, dataId) self.assertEqual(metric, metricOut) # Check we can get components self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) raise TransactionTestError( "This should roll back the entire transaction") with self.assertRaises(KeyError): butler.registry.getDatasetType(datasetTypeName) with self.assertRaises(LookupError): butler.registry.expandDataId(dataId) # Should raise KeyError for missing DatasetType with self.assertRaises(KeyError): butler.get(datasetTypeName, dataId) # Also check explicitly if Dataset entry is missing self.assertIsNone( butler.registry.find(butler.collection, datasetType, dataId)) # Direct retrieval should not find the file in the Datastore with self.assertRaises(FileNotFoundError): butler.getDirect(ref)
def verifyIngest(self, files=None, cli=False, fullCheck=False): """ Test that RawIngestTask ingested the expected files. Parameters ---------- files : `list` [`str`], or None List of files to be ingested, or None to use ``self.file`` fullCheck : `bool`, optional If `True`, read the full raw dataset and check component consistency. If `False` check that a component can be read but do not read the entire raw exposure. Notes ----- Reading all the ingested test data can be expensive. The code paths for reading the second raw are the same as reading the first so we do not gain anything by doing full checks of everything. Only read full pixel data for first dataset from file. Don't even do that if we are requested not to by the caller. This only really affects files that contain multiple datasets. """ butler = Butler(self.root, run=self.outputRun) datasets = list( butler.registry.queryDatasets("raw", collections=self.outputRun)) self.assertEqual(len(datasets), len(self.dataIds)) # Get the URI to the first dataset and check it is inside the # datastore datasetUri = butler.getURI(datasets[0]) self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root)) for dataId in self.dataIds: # Check that we can read metadata from a raw metadata = butler.get("raw.metadata", dataId) if not fullCheck: continue fullCheck = False exposure = butler.get("raw", dataId) self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict()) # Since components follow a different code path we check that # WCS match and also we check that at least the shape # of the image is the same (rather than doing per-pixel equality) wcs = butler.get("raw.wcs", dataId) self.assertEqual(wcs, exposure.getWcs()) rawImage = butler.get("raw.image", dataId) self.assertEqual(rawImage.getBBox(), exposure.getBBox()) # check that the filter label got the correct band filterLabel = butler.get("raw.filterLabel", dataId) self.assertEqual(filterLabel, self.filterLabel) self.checkRepo(files=files)
def runExposureCompositePutGetTest(self, storageClass, datasetTypeName): example = os.path.join(TESTDIR, "data", "basic", "small.fits") exposure = lsst.afw.image.ExposureF(example) butler = Butler(self.tmpConfigFile) dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) self.registerDatasetTypes(datasetTypeName, dimensions, storageClass, butler.registry) dataId = { "visit": 42, "instrument": "DummyCam", "physical_filter": "d-r" } # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCam"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCam", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 42, "physical_filter": "d-r" }) butler.put(exposure, datasetTypeName, dataId) # Get the full thing butler.get(datasetTypeName, dataId) # TODO enable check for equality (fix for Exposure type) # self.assertEqual(full, exposure) # Get a component compsRead = {} for compName in ("wcs", "image", "mask", "coaddInputs", "psf"): compTypeName = DatasetType.nameWithComponent( datasetTypeName, compName) component = butler.get(compTypeName, dataId) # TODO enable check for component instance types # compRef = butler.registry.find(butler.run.collection, # f"calexp.{compName}", dataId) # self.assertIsInstance(component, # compRef.datasetType.storageClass.pytype) compsRead[compName] = component # Simple check of WCS bbox = lsst.afw.geom.Box2I(lsst.afw.geom.Point2I(0, 0), lsst.afw.geom.Extent2I(9, 9)) self.assertWcsAlmostEqualOverBBox(compsRead["wcs"], exposure.getWcs(), bbox) # With parameters inBBox = Box2I(minimum=Point2I(0, 0), maximum=Point2I(3, 3)) parameters = dict(bbox=inBBox, origin=LOCAL) subset = butler.get(datasetTypeName, dataId, parameters=parameters) outBBox = subset.getBBox() self.assertEqual(inBBox, outBBox)
class TestCalibrateOutputs(lsst.utils.tests.TestCase): """Test the output data products of calibrate task make sense This is a regression test and not intended for scientific validation """ def setUp(self): self.butler = Butler(os.path.join(getPackageDir("ci_imsim"), "DATA"), writeable=False, collections=["LSSTCam-imSim/runs/ci_imsim"]) self.dataId = {"detector": 55, "visit": 206039, "band": "y"} self.calexp = self.butler.get("calexp", self.dataId) self.src = self.butler.get("src", self.dataId) def testLocalPhotoCalibColumns(self): """Check calexp's calibs are consistent with src's photocalib columns """ # Check that means are in the same ballpark calexpCalib = self.calexp.getPhotoCalib().getCalibrationMean() calexpCalibErr = self.calexp.getPhotoCalib().getCalibrationErr() srcCalib = np.mean(self.src['base_LocalPhotoCalib']) srcCalibErr = np.mean(self.src['base_LocalPhotoCalibErr']) self.assertAlmostEqual(calexpCalib, srcCalib, places=3) self.assertAlmostEqual(calexpCalibErr, srcCalibErr, places=3) # and that calibs evalutated at local positions match a few rows randomRows = [0, 8, 20] for rowNum in randomRows: record = self.src[rowNum] localEval = self.calexp.getPhotoCalib().getLocalCalibration(record.getCentroid()) self.assertAlmostEqual(localEval, record['base_LocalPhotoCalib']) def testLocalWcsColumns(self): """Check the calexp's wcs match local wcs columns in src """ # Check a few rows: randomRows = [1, 9, 21] for rowNum in randomRows: record = self.src[rowNum] centroid = record.getCentroid() trueCdMatrix = np.radians(self.calexp.getWcs().getCdMatrix(centroid)) self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_1_1'], trueCdMatrix[0, 0]) self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_2_1'], trueCdMatrix[1, 0]) self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_1_2'], trueCdMatrix[0, 1]) self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_2_2'], trueCdMatrix[1, 1]) self.assertAlmostEqual( self.calexp.getWcs().getPixelScale(centroid).asRadians(), np.sqrt(np.fabs(record['base_LocalWcs_CDMatrix_1_1']*record['base_LocalWcs_CDMatrix_2_2'] - record['base_LocalWcs_CDMatrix_2_1']*record['base_LocalWcs_CDMatrix_1_2'])))
def testBasicPutGet(self): butler = Butler(self.configFile) # Create and register a DatasetType datasetTypeName = "test_metric" dataUnits = ("Camera", "Visit") storageClass = self.storageClassFactory.getStorageClass( "StructuredData") self.registerDatasetTypes(datasetTypeName, dataUnits, storageClass, butler.registry) # Create and store a dataset metric = makeExampleMetrics() dataId = {"camera": "DummyCam", "visit": 42} ref = butler.put(metric, datasetTypeName, dataId) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(datasetTypeName, dataId) self.assertEqual(metric, metricOut) # Check we can get components self.assertGetComponents(butler, datasetTypeName, dataId, ("summary", "data", "output"), metric)
class PexConfigFormatterTestCase(unittest.TestCase): """Tests for PexConfigFormatter, using local file datastore.""" def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( "config", dimensions=(), storageClass="Config", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType) def tearDown(self): removeTestTempDir(self.root) def testPexConfig(self) -> None: """Test that we can put and get pex_config Configs""" c = SimpleConfig(i=10, c="hello") self.assertEqual(c.i, 10) ref = self.butler.put(c, "config") butler_c = self.butler.get(ref) self.assertEqual(c, butler_c) self.assertIsInstance(butler_c, SimpleConfig)
def getInitInputs(butler: Butler, config: PipelineTaskConfig) -> Dict[str, Any]: """Return the initInputs object that would have been passed to a `~lsst.pipe.base.PipelineTask` constructor. Parameters ---------- butler : `lsst.daf.butler.Butler` The repository to search for input datasets. Must have pre-configured collections. config : `lsst.pipe.base.PipelineTaskConfig` The config for the task to be constructed. Returns ------- initInputs : `dict` [`str`] A dictionary of objects in the format of the ``initInputs`` parameter to `lsst.pipe.base.PipelineTask`. """ connections = config.connections.ConnectionsClass(config=config) initInputs = {} for name in connections.initInputs: attribute = getattr(connections, name) # Get full dataset type to check for consistency problems dsType = DatasetType(attribute.name, butler.registry.dimensions.extract(set()), attribute.storageClass) # All initInputs have empty data IDs initInputs[name] = butler.get(dsType) return initInputs
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) pyplot.imshow(np.random.randn(3, 4)) ref = butler.put(pyplot.gcf(), datasetType) parsed = urllib.parse.urlparse(butler.getUri(ref)) with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.remove(ref) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testAstropyTableFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("table", [], "AstropyTable", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.table, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.ecsv') table = butler.get('table') self.assertTrue(numpy.all(table == self.table))
def testCuratedCalibrations(self): """Test that defects, the camera, and the brighter-fatter kernel were added to the Gen3 registry. """ originInfo = DatasetOriginInfoDef(["raw", "calib"], []) # Query for raws that have associated calibs of the types below; # result is an iterator over rows that correspond roughly to data IDs. rowsWithCalibs = list( self.butler.registry.selectMultipleDatasetTypes( originInfo, expression="", required=["raw", "camera", "bfKernel", "defects"], perDatasetTypeDimensions=["calibration_label"])) # Query for all rows, with no restriction on having associated calibs. rowsWithoutCalibs = list( self.butler.registry.selectMultipleDatasetTypes( originInfo, expression="", required=["raw"], )) # We should get the same raws in both cases because all of the raws # here should have associated calibs. self.assertGreater(len(rowsWithoutCalibs), 0) self.assertEqual(len(rowsWithCalibs), len(rowsWithoutCalibs)) # Try getting those calibs to make sure the files themselves are # where the Butler thinks they are. butler = Butler(REPO_ROOT, run="calib") instrument = HyperSuprimeCam() for row in rowsWithCalibs: refsByName = {k.name: v for k, v in row.datasetRefs.items()} cameraFromButler = butler.get(refsByName["camera"]) cameraFromInstrument = instrument.getCamera() self.assertEqual(len(cameraFromButler), len(cameraFromInstrument)) self.assertEqual(cameraFromButler.getName(), cameraFromInstrument.getName()) self.assertFloatsEqual(butler.get(refsByName["bfKernel"]), instrument.getBrighterFatterKernel()) defects = butler.get(refsByName["defects"]) self.assertIsInstance(defects, lsst.meas.algorithms.Defects)
class TestSchemaMatch(lsst.utils.tests.TestCase, MockCheckMixin): """Check the schema of the parquet outputs match the DDL in sdm_schemas""" def setUp(self): self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"), writeable=False, collections=["HSC/runs/ci_hsc"]) schemaFile = os.path.join(getPackageDir("sdm_schemas"), 'yml', 'hsc.yaml') with open(schemaFile, "r") as f: self.schema = yaml.safe_load(f)['tables'] def _validateSchema(self, dataset, dataId, tableName): """Check the schema of the parquet dataset match that in the DDL. Only the column names are checked currently. """ # skip the test in mock execution self.skip_mock(dataset) sdmSchema = [ table for table in self.schema if table['name'] == tableName ] self.assertEqual(len(sdmSchema), 1) expectedColumnNames = set(column['name'] for column in sdmSchema[0]['columns']) df = self.butler.get(dataset, dataId) df.reset_index(inplace=True) outputColumnNames = set(df.columns.to_list()) self.assertEqual(outputColumnNames, expectedColumnNames) def testObjectSchemaMatch(self): """Check objectTable_tract""" dataId = {"instrument": "HSC", "tract": 0} self._validateSchema("objectTable_tract", dataId, "Object") def testSourceSchemaMatch(self): """Check one sourceTable_visit""" dataId = { "instrument": "HSC", "detector": 100, "visit": 903334, "band": "r" } self._validateSchema("sourceTable_visit", dataId, "Source")
class JobReporter: def __init__(self, repository, collection, metrics_package, spec, dataset_name): # Hard coding verify_metrics as the packager for now. # It would be easy to pass this in as an argument, if necessary. self.metrics = MetricSet.load_metrics_package(package_name_or_path='verify_metrics', subset=metrics_package) self.butler = Butler(repository) self.registry = self.butler.registry self.spec = spec self.collection = collection self.dataset_name = dataset_name def run(self): jobs = {} for metric in self.metrics: data_ids = list(self.registry.queryDatasets((f'metricvalue_{metric.package}' f'_{metric.metric}'), collections=self.collection)) for did in data_ids: m = self.butler.get(did, collections=self.collection) # make the name the same as what SQuaSH Expects m.metric_name = metric # Grab the physical filter associated with the abstract filter # In general there may be more than one. Take the shortest assuming # it is the most generic. pfilts = [el.name for el in self.butler.registry.queryDimensionRecords('physical_filter', dataId=did.dataId)] pfilt = min(pfilts, key=len) tract = did.dataId['tract'] afilt = did.dataId['band'] key = f"{tract}_{afilt}" if key not in jobs.keys(): job_metadata = {'instrument': did.dataId['instrument'], 'filter': pfilt, 'band': afilt, 'tract': tract, 'butler_generation': 'Gen3', 'ci_dataset': self.dataset_name} # Get dataset_repo_url from repository somehow? jobs[key] = Job(meta=job_metadata, metrics=self.metrics) jobs[key].measurements.insert(m) return jobs
def testPhotodiode(self): """Test ingest to a repo with the exposure information will not raise. """ # Ingest raw to provide exposure information. outputRun = "raw_ingest_" + self.id() runner = LogCliRunner() result = runner.invoke( butlerCli, [ "ingest-raws", self.root, self.file, "--output-run", outputRun, "--ingest-task", self.rawIngestTask, ], ) self.assertEqual( result.exit_code, 0, f"output: {result.output} exception: {result.exception}") # Ingest photodiode matching this exposure. runner = LogCliRunner() result = runner.invoke( butlerCli, [ "ingest-photodiode", self.root, self.instrumentClassName, self.pdPath, ], ) self.assertEqual( result.exit_code, 0, f"output: {result.output} exception: {result.exception}") # Confirm that we can retrieve the ingested photodiode, and # that it has the correct type. butler = Butler(self.root, run="LSSTCam/calib/photodiode") getResult = butler.get('photodiode', dataId=self.dataIds[0]) self.assertIsInstance(getResult, PhotodiodeCalib)
class IngestTestBase(metaclass=abc.ABCMeta): """Base class for tests of gen3 ingest. Subclass from this, then `unittest.TestCase` to get a working test suite. """ ingestDir = "" """Root path to ingest files into. Typically `obs_package/tests/`; the actual directory will be a tempdir under this one. """ instrument = None """The instrument to be registered and tested.""" dataIds = [] """list of butler data IDs of files that should have been ingested.""" file = "" """Full path to a file to ingest in tests.""" RawIngestTask = lsst.obs.base.RawIngestTask """The task to use in the Ingest test.""" def setUp(self): # Use a temporary working directory self.root = tempfile.mkdtemp(dir=self.ingestDir) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="raw") # Register the instrument and its static metadata self.instrument.register(self.butler.registry) # Make a default config for test methods to play with self.config = self.RawIngestTask.ConfigClass() self.config.instrument = \ f"{self.instrument.__class__.__module__}.{self.instrument.__class__.__name__}" def tearDown(self): if os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True) def runIngest(self, files=None): """ Initialize and run RawIngestTask on a list of files. Parameters ---------- files : `list` [`str`], or None List of files to be ingested, or None to use ``self.file`` """ if files is None: files = [self.file] task = self.RawIngestTask(config=self.config, butler=self.butler) task.log.setLevel( task.log.FATAL) # silence logs, since we expect a lot of warnings task.run(files) def runIngestTest(self, files=None): """ Test that RawIngestTask ingested the expected files. Parameters ---------- files : `list` [`str`], or None List of files to be ingested, or None to use ``self.file`` """ self.runIngest(files) datasets = self.butler.registry.queryDatasets('raw', collections=...) self.assertEqual(len(list(datasets)), len(self.dataIds)) for dataId in self.dataIds: exposure = self.butler.get("raw", dataId) metadata = self.butler.get("raw.metadata", dataId) # only check the metadata, not the images, to speed up tests self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict()) self.checkRepo(files=files) def checkRepo(self, files=None): """Check the state of the repository after ingest. This is an optional hook provided for subclasses; by default it does nothing. Parameters ---------- files : `list` [`str`], or None List of files to be ingested, or None to use ``self.file`` """ pass def testSymLink(self): self.config.transfer = "symlink" self.runIngestTest() def testCopy(self): self.config.transfer = "copy" self.runIngestTest() def testHardLink(self): self.config.transfer = "hardlink" try: self.runIngestTest() except PermissionError as err: raise unittest.SkipTest( "Skipping hard-link test because input data" " is on a different filesystem.") from err def testInPlace(self): """Test that files already in the directory can be added to the registry in-place. """ # symlink into repo root manually newPath = os.path.join(self.butler.datastore.root, os.path.basename(self.file)) os.symlink(os.path.abspath(self.file), newPath) self.config.transfer = None self.runIngestTest([newPath]) def testFailOnConflict(self): """Re-ingesting the same data into the repository should fail. """ self.config.transfer = "symlink" self.runIngest() with self.assertRaises(Exception): self.runIngest()
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile) # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.addDimensionEntry("instrument", {"instrument": "DummyCamComp"}) butler.registry.addDimensionEntry("physical_filter", { "instrument": "DummyCamComp", "physical_filter": "d-r" }) butler.registry.addDimensionEntry("visit", { "instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", })
def makeDiscreteSkyMap(repo, config_file, collections, instrument, skymap_id='discrete', old_skymap_id=None): """Implements the command line interface `butler make-discrete-skymap` subcommand, should only be called by command line tools and unit test code that tests this function. Constructs a skymap from calibrated exposure in the butler repository Parameters ---------- repo : `str` URI to the location to read the repo. config_file : `str` or `None` Path to a config file that contains overrides to the skymap config. collections : `list` [`str`] An expression specifying the collections to be searched (in order) when reading datasets, and optionally dataset type restrictions on them. At least one collection must be specified. This is the collection with the calibrated exposures. instrument : `str` The name or fully-qualified class name of an instrument. skymap_id : `str`, optional The identifier of the skymap to save. Default is 'discrete'. old_skymap_id : `str`, optional The identifer of the skymap to append to. Must differ from ``skymap_id``. Ignored unless ``config.doAppend=True``. """ butler = Butler(repo, collections=collections, writeable=True) instr = getInstrument(instrument, butler.registry) config = MakeDiscreteSkyMapConfig() instr.applyConfigOverrides(MakeDiscreteSkyMapTask._DefaultName, config) if config_file is not None: config.load(config_file) # The coaddName for a SkyMap is only relevant in Gen2, and we completely # ignore it here; once Gen2 is gone it can be removed. oldSkyMap = None if config.doAppend: if old_skymap_id is None: raise ValueError( "old_skymap_id must be provided if config.doAppend is True.") dataId = {'skymap': old_skymap_id} try: oldSkyMap = butler.get(BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, collections=collections, dataId=dataId) except LookupError as e: msg = ( f"Could not find seed skymap with dataId {dataId} " f"in collections {collections} but doAppend is {config.doAppend}. Aborting..." ) raise LookupError(msg, *e.args[1:]) datasets = butler.registry.queryDatasets('calexp', collections=collections) wcs_md_tuple_list = [(butler.getDirect('calexp.metadata', ref), butler.getDirect('calexp.wcs', ref)) for ref in datasets] task = MakeDiscreteSkyMapTask(config=config) result = task.run(wcs_md_tuple_list, oldSkyMap) result.skyMap.register(skymap_id, butler) butler.put(result.skyMap, BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, dataId={'skymap': skymap_id}, run=BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
class HscIngestTestCase(lsst.utils.tests.TestCase): def setUp(self): # Use a temporary working directory self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="raw") # Register the instrument and its static metadata HyperSuprimeCam().register(self.butler.registry) # Make a default config for test methods to play with self.config = RawIngestTask.ConfigClass() self.config.onError = "break" self.file = os.path.join(testDataDirectory, "hsc", "raw", "HSCA90402512.fits.gz") self.dataId = dict(instrument="HSC", exposure=904024, detector=50) def tearDown(self): if os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True) def runIngest(self, files=None): if files is None: files = [self.file] task = RawIngestTask(config=self.config, butler=self.butler) task.log.setLevel( task.log.FATAL) # silence logs, since we expect a lot of warnings task.run(files) def runIngestTest(self, files=None): self.runIngest(files) exposure = self.butler.get("raw", self.dataId) metadata = self.butler.get("raw.metadata", self.dataId) image = self.butler.get("raw.image", self.dataId) self.assertImagesEqual(exposure.image, image) self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict()) def testSymLink(self): self.config.transfer = "symlink" self.runIngestTest() def testCopy(self): self.config.transfer = "copy" self.runIngestTest() def testHardLink(self): self.config.transfer = "hardlink" self.runIngestTest() def testInPlace(self): # hardlink into repo root manually newPath = os.path.join(self.butler.datastore.root, os.path.basename(self.file)) os.link(self.file, newPath) self.config.transfer = None self.runIngestTest([newPath]) def testOnConflictFail(self): self.config.transfer = "symlink" self.config.conflict = "fail" self.runIngest() # this one shd with self.assertRaises(Exception): self.runIngest() # this ont def testOnConflictIgnore(self): self.config.transfer = "symlink" self.config.conflict = "ignore" self.runIngest() # this one should succeed n1, = self.butler.registry.query("SELECT COUNT(*) FROM Dataset") self.runIngest() # this ong should silently fail n2, = self.butler.registry.query("SELECT COUNT(*) FROM Dataset") self.assertEqual(n1, n2) def testOnConflictStash(self): self.config.transfer = "symlink" self.config.conflict = "ignore" self.config.stash = "stash" self.runIngest() # this one should write to 'rawn self.runIngest() # this one should write to 'stashn dt = self.butler.registry.getDatasetType("raw.metadata") ref1 = self.butler.registry.find(self.butler.collection, dt, self.dataId) ref2 = self.butler.registry.find("stash", dt, self.dataId) self.assertNotEqual(ref1.id, ref2.id) self.assertEqual( self.butler.get(ref1).toDict(), self.butler.getDirect(ref2).toDict()) def testOnErrorBreak(self): self.config.transfer = "symlink" self.config.onError = "break" # Failing to ingest this nonexistent file after ingesting the valid one should # leave the valid one in the registry, despite raising an exception. with self.assertRaises(Exception): self.runIngest(files=[self.file, "nonexistent.fits"]) dt = self.butler.registry.getDatasetType("raw.metadata") self.assertIsNotNone( self.butler.registry.find(self.butler.collection, dt, self.dataId)) def testOnErrorContinue(self): self.config.transfer = "symlink" self.config.onError = "continue" # Failing to ingest nonexistent files before and after ingesting the # valid one should leave the valid one in the registry and not raise # an exception. self.runIngest( files=["nonexistent.fits", self.file, "still-not-here.fits"]) dt = self.butler.registry.getDatasetType("raw.metadata") self.assertIsNotNone( self.butler.registry.find(self.butler.collection, dt, self.dataId)) def testOnErrorRollback(self): self.config.transfer = "symlink" self.config.onError = "rollback" # Failing to ingest nonexistent files after ingesting the # valid one should leave the registry empty. with self.assertRaises(Exception): self.runIngest(file=[self.file, "nonexistent.fits"]) try: dt = self.butler.registry.getDatasetType("raw.metadata") except KeyError: # If we also rollback registering the DatasetType, that's fine, # but not required. pass else: self.assertIsNotNone( self.butler.registry.find(self.butler.collection, dt, self.dataId))
class TestCoaddOutputs(unittest.TestCase, MockCheckMixin): """Check that coadd outputs are as expected. Many tests here are ported from https://github.com/lsst/pipe_tasks/blob/ fd7d5e23d3c71e5d440153bc4faae7de9d5918c5/tests/nopytest_test_coadds.py """ def setUp(self): self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"), instrument="HSC", skymap="discrete/ci_hsc", writeable=False, collections=["HSC/runs/ci_hsc"]) self.skip_mock() self._tract = 0 self._patch = 69 self._bands = ['r', 'i'] def test_forced_id_names(self): """Test that forced photometry ID fields are named as expected (DM-8210). Specifically, coadd forced photometry should have only "id" and "parent" fields, while CCD forced photometry should have those, "objectId", and "parentObjectId". """ coadd_schema = self.butler.get("deepCoadd_forced_src_schema").schema self.assertIn("id", coadd_schema) self.assertIn("parent", coadd_schema) self.assertNotIn("objectId", coadd_schema) self.assertNotIn("parentObjectId", coadd_schema) ccd_schema = self.butler.get("forced_src_schema").schema self.assertIn("id", ccd_schema) self.assertIn("parent", ccd_schema) self.assertIn("objectId", ccd_schema) self.assertIn("parentObjectId", ccd_schema) def test_alg_metadata_output(self): """Test that the algorithm metadata is persisted correctly from MeasureMergedCoaddSourcesTask. """ for band in self._bands: cat = self.butler.get( "deepCoadd_meas", band=band, tract=self._tract, patch=self._patch ) meta = cat.getMetadata() for circ_aperture_flux_radius in meta.getArray('BASE_CIRCULARAPERTUREFLUX_RADII'): self.assertIsInstance(circ_aperture_flux_radius, numbers.Number) # Each time the run method of a measurement task is executed, # algorithm metadata is appended to the algorithm metadata object. # Depending on how many times a measurement task is run, # a metadata entry may be a single value or multiple values. for n_offset in meta.getArray('NOISE_OFFSET'): self.assertIsInstance(n_offset, numbers.Number) for noise_src in meta.getArray('NOISE_SOURCE'): self.assertEqual(noise_src, 'measure') for noise_exp_id in meta.getArray('NOISE_EXPOSURE_ID'): self.assertIsInstance(noise_exp_id, numbers.Number) for noise_seed_mul in meta.getArray('NOISE_SEED_MULTIPLIER'): self.assertIsInstance(noise_seed_mul, numbers.Number) def test_schema_consistency(self): """Test that _schema catalogs are consistent with the data catalogs.""" det_schema = self.butler.get("deepCoadd_det_schema").schema meas_schema = self.butler.get("deepCoadd_meas_schema").schema mergeDet_schema = self.butler.get("deepCoadd_mergeDet_schema").schema ref_schema = self.butler.get("deepCoadd_ref_schema").schema coadd_forced_schema = self.butler.get("deepCoadd_forced_src_schema").schema ccd_forced_schema = self.butler.get("forced_src_schema").schema for band in self._bands: det = self.butler.get("deepCoadd_det", band=band, tract=self._tract, patch=self._patch) self.assertEqual(det.schema, det_schema) mergeDet = self.butler.get("deepCoadd_mergeDet", band=band, tract=self._tract, patch=self._patch) self.assertEqual(mergeDet.schema, mergeDet_schema) meas = self.butler.get("deepCoadd_meas", band=band, tract=self._tract, patch=self._patch) self.assertEqual(meas.schema, meas_schema) ref = self.butler.get("deepCoadd_ref", band=band, tract=self._tract, patch=self._patch) self.assertEqual(ref.schema, ref_schema) coadd_forced_src = self.butler.get( "deepCoadd_forced_src", band=band, tract=self._tract, patch=self._patch ) self.assertEqual(coadd_forced_src.schema, coadd_forced_schema) ccd_forced_src = self.butler.get( "forced_src", tract=self._tract, visit=DATA_IDS[0]["visit"], detector=DATA_IDS[0]["detector"] ) self.assertEqual(ccd_forced_src.schema, ccd_forced_schema) def test_coadd_transmission_curves(self): """Test that coadded TransmissionCurves agree with the inputs.""" wavelengths = np.linspace(4000, 7000, 10) n_object_test = 10 ctx = np.random.RandomState(12345) for band in self._bands: n_tested = 0 exp = self.butler.get("deepCoadd_calexp", band=band, tract=self._tract, patch=self._patch) cat = self.butler.get("objectTable", band=band, tract=self._tract, patch=self._patch) transmission_curve = exp.getInfo().getTransmissionCurve() coadd_inputs = exp.getInfo().getCoaddInputs().ccds wcs = exp.getWcs() to_check = ctx.choice(len(cat), size=n_object_test, replace=False) for index in to_check: coadd_coord = geom.SpherePoint(cat["coord_ra"].values[index]*geom.degrees, cat["coord_dec"].values[index]*geom.degrees) summed_throughput = np.zeros(wavelengths.shape, dtype=np.float64) weight_sum = 0.0 for rec in coadd_inputs.subsetContaining(coadd_coord, includeValidPolygon=True): det_pos = rec.getWcs().skyToPixel(coadd_coord) det_trans = rec.getTransmissionCurve() weight = rec.get("weight") summed_throughput += det_trans.sampleAt(det_pos, wavelengths)*weight weight_sum += weight if weight_sum == 0.0: continue summed_throughput /= weight_sum coadd_pos = wcs.skyToPixel(coadd_coord) coadd_throughput = transmission_curve.sampleAt(coadd_pos, wavelengths) np.testing.assert_array_almost_equal(coadd_throughput, summed_throughput) n_tested += 1 self.assertGreater(n_tested, 5) def test_mask_planes_exist(self): """Test that the input mask planes have been added.""" for data_id in DATA_IDS: mask = self.butler.get("calexp.mask", data_id) self.assertIn("CROSSTALK", mask.getMaskPlaneDict()) self.assertIn("NOT_DEBLENDED", mask.getMaskPlaneDict()) # Expected to fail until DM-5174 is fixed. @unittest.expectedFailure def test_masks_removed(self): """Test that certain mask planes have been removed from the coadds. This is expected to fail until DM-5174 is fixed. """ for band in self._bands: mask = self.butler.get("deepCoadd_calexp.mask", band=band, tract=self._tract, patch=self._patch) self.assertNotIn("CROSSTALK", mask.getMaskPlaneDict()) self.assertNotIn("NOT_DEBLENDED", mask.getMaskPlaneDict()) def test_warp_inputs(self): """Test that the warps have the correct inputs.""" skymap = self.butler.get("skyMap") tract_info = skymap[self._tract] for warp_type in ["directWarp", "psfMatchedWarp"]: datasets = set(self.butler.registry.queryDatasets(f"deepCoadd_{warp_type}")) # We only need to test one dataset dataset = list(datasets)[0] warp = self.butler.getDirect(dataset) self.assertEqual(warp.wcs, tract_info.wcs) coadd_inputs = warp.getInfo().getCoaddInputs() self.assertEqual(len(coadd_inputs.visits), 1) visit_record = coadd_inputs.visits[0] self.assertEqual(visit_record.getWcs(), warp.wcs) self.assertEqual(visit_record.getBBox(), warp.getBBox()) self.assertGreater(len(coadd_inputs.ccds), 0) wcs_cat = self.butler.get( "jointcalSkyWcsCatalog", visit=visit_record.getId(), tract=self._tract ) photocalib_cat = self.butler.get( "jointcalPhotoCalibCatalog", visit=visit_record.getId(), tract=self._tract ) final_psf_cat = self.butler.get( "finalized_psf_ap_corr_catalog", visit=visit_record.getId() ) # We only need to test one input ccd det_record = coadd_inputs.ccds[0] exp_bbox = self.butler.get( "calexp.bbox", visit=det_record["visit"], detector=det_record["ccd"] ) self.assertEqual(det_record.getWcs(), wcs_cat.find(det_record["ccd"]).getWcs()) self.assertEqual( det_record.getPhotoCalib(), photocalib_cat.find(det_record["ccd"]).getPhotoCalib() ) self.assertEqual(det_record.getBBox(), exp_bbox) self.assertIsNotNone(det_record.getTransmissionCurve()) center = det_record.getBBox().getCenter() np.testing.assert_array_almost_equal( det_record.getPsf().computeKernelImage(center).array, final_psf_cat.find(det_record["ccd"]).getPsf().computeKernelImage(center).array ) input_map = det_record.getApCorrMap() final_map = final_psf_cat.find(det_record["ccd"]).getApCorrMap() self.assertEqual(len(input_map), len(final_map)) for key in input_map.keys(): self.assertEqual(input_map[key], final_map[key]) self.assertIsNotNone(coadd_inputs.visits.find(det_record["visit"])) def test_coadd_inputs(self): """Test that the coadds have the correct inputs.""" skymap = self.butler.get("skyMap") tract_info = skymap[self._tract] for band in self._bands: wcs = self.butler.get("deepCoadd_calexp.wcs", band=band, tract=self._tract, patch=self._patch) self.assertEqual(wcs, tract_info.wcs) coadd_inputs = self.butler.get( "deepCoadd_calexp.coaddInputs", band=band, tract=self._tract, patch=self._patch ) # We only need to test one input ccd det_record = coadd_inputs.ccds[0] wcs_cat = self.butler.get( "jointcalSkyWcsCatalog", visit=det_record["visit"], tract=self._tract ) photocalib_cat = self.butler.get( "jointcalPhotoCalibCatalog", visit=det_record["visit"], tract=self._tract ) final_psf_cat = self.butler.get( "finalized_psf_ap_corr_catalog", visit=det_record["visit"] ) exp_bbox = self.butler.get( "calexp.bbox", visit=det_record["visit"], detector=det_record["ccd"] ) self.assertEqual(det_record.getWcs(), wcs_cat.find(det_record["ccd"]).getWcs()) self.assertEqual( det_record.getPhotoCalib(), photocalib_cat.find(det_record["ccd"]).getPhotoCalib() ) self.assertEqual(det_record.getBBox(), exp_bbox) self.assertIsNotNone(det_record.getTransmissionCurve()) center = det_record.getBBox().getCenter() np.testing.assert_array_almost_equal( det_record.getPsf().computeKernelImage(center).array, final_psf_cat.find(det_record["ccd"]).getPsf().computeKernelImage(center).array ) input_map = det_record.getApCorrMap() final_map = final_psf_cat.find(det_record["ccd"]).getApCorrMap() self.assertEqual(len(input_map), len(final_map)) for key in input_map.keys(): self.assertEqual(input_map[key], final_map[key]) self.assertIsNotNone(coadd_inputs.visits.find(det_record["visit"])) def test_psf_installation(self): """Test that the coadd psf is installed.""" for band in self._bands: wcs = self.butler.get("deepCoadd_calexp.wcs", band=band, tract=self._tract, patch=self._patch) coadd_inputs = self.butler.get( "deepCoadd_calexp.coaddInputs", band=band, tract=self._tract, patch=self._patch ) coadd_psf = self.butler.get( "deepCoadd_calexp.psf", band=band, tract=self._tract, patch=self._patch ) new_psf = lsst.meas.algorithms.CoaddPsf(coadd_inputs.ccds, wcs) self.assertEqual(coadd_psf.getComponentCount(), len(coadd_inputs.ccds)) self.assertEqual(new_psf.getComponentCount(), len(coadd_inputs.ccds)) for n, record in enumerate(coadd_inputs.ccds): center = record.getBBox().getCenter() np.testing.assert_array_almost_equal( coadd_psf.getPsf(n).computeKernelImage(center).array, record.getPsf().computeKernelImage(center).array ) np.testing.assert_array_almost_equal( new_psf.getPsf(n).computeKernelImage(center).array, record.getPsf().computeKernelImage(center).array ) self.assertEqual(coadd_psf.getWcs(n), record.getWcs()) self.assertEqual(new_psf.getWcs(n), record.getWcs()) self.assertEqual(coadd_psf.getBBox(n), record.getBBox()) self.assertEqual(new_psf.getBBox(n), record.getBBox()) def test_coadd_psf(self): """Test that the stars on the coadd are well represented by the attached PSF. """ n_object_test = 10 n_good_test = 5 ctx = np.random.RandomState(12345) for band in self._bands: exp = self.butler.get("deepCoadd_calexp", band=band, tract=self._tract, patch=self._patch) coadd_psf = exp.getPsf() cat = self.butler.get("objectTable", band=band, tract=self._tract, patch=self._patch) star_cat = cat[(cat["i_extendedness"] < 0.5) & (cat["detect_isPrimary"]) & (cat[f"{band}_psfFlux"] > 0.0) & (cat[f"{band}_psfFlux"]/cat[f"{band}_psfFluxErr"] > 50.0) & (cat[f"{band}_psfFlux"]/cat[f"{band}_psfFluxErr"] < 200.0)] to_check = ctx.choice(len(star_cat), size=n_object_test, replace=False) n_good = 0 for index in to_check: position = geom.Point2D(star_cat["x"].values[index], star_cat["y"].values[index]) psf_image = coadd_psf.computeImage(position) psf_image_bbox = psf_image.getBBox() star_image = lsst.afw.image.ImageF( exp.maskedImage.image, psf_image_bbox ).convertD() star_image /= star_image.array.sum() psf_image /= psf_image.array.sum() residuals = lsst.afw.image.ImageD(star_image, True) residuals -= psf_image # This is just a quick check that the coadd psf model works # reasonably well for the stars. It is not meant as a detailed # test of the psf modeling capability. if np.max(np.abs(residuals.array)) < 0.01: n_good += 1 self.assertGreater(n_good, n_good_test)
def testIngest(self): butler = Butler(self.tmpConfigFile, run="ingest") # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit", "detector"]) storageClass = self.storageClassFactory.getStorageClass( "StructuredDataDictYaml") datasetTypeName = "metric" datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) for detector in (1, 2): butler.registry.insertDimensionData( "detector", { "instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }, { "instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r" }) formatter = doImport( "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") dataRoot = os.path.join(TESTDIR, "data", "basic") datasets = [] for detector in (1, 2): detector_name = f"detector_{detector}" metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") dataId = { "instrument": "DummyCamComp", "visit": 423, "detector": detector } # Create a DatasetRef for ingest refIn = DatasetRef(datasetType, dataId, id=None) datasets.append( FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) butler.ingest(*datasets, transfer="copy") dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} metrics1 = butler.get(datasetTypeName, dataId1) metrics2 = butler.get(datasetTypeName, dataId2) self.assertNotEqual(metrics1, metrics2) # Compare URIs uri1 = butler.getUri(datasetTypeName, dataId1) uri2 = butler.getUri(datasetTypeName, dataId2) self.assertNotEqual(uri1, uri2) # Now do a multi-dataset but single file ingest metricFile = os.path.join(dataRoot, "detectors.yaml") refs = [] for detector in (1, 2): detector_name = f"detector_{detector}" dataId = { "instrument": "DummyCamComp", "visit": 424, "detector": detector } # Create a DatasetRef for ingest refs.append(DatasetRef(datasetType, dataId, id=None)) datasets = [] datasets.append( FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) butler.ingest(*datasets, transfer="copy") dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} multi1 = butler.get(datasetTypeName, dataId1) multi2 = butler.get(datasetTypeName, dataId2) self.assertEqual(multi1, metrics1) self.assertEqual(multi2, metrics2) # Compare URIs uri1 = butler.getUri(datasetTypeName, dataId1) uri2 = butler.getUri(datasetTypeName, dataId2) self.assertEqual(uri1, uri2) # Test that removing one does not break the second butler.remove(datasetTypeName, dataId1) with self.assertRaises(LookupError): butler.datasetExists(datasetTypeName, dataId1) self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) multi2b = butler.get(datasetTypeName, dataId2) self.assertEqual(multi2, multi2b)
def runPutGetTest(self, storageClass, datasetTypeName): butler = Butler(self.tmpConfigFile, run="ingest") # There will not be a collection yet collections = butler.registry.getAllCollections() self.assertEqual(collections, set()) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) # Create and store a dataset metric = makeExampleMetrics() dataId = {"instrument": "DummyCamComp", "visit": 423} # Create a DatasetRef for put refIn = DatasetRef(datasetType, dataId, id=None) # Put with a preexisting id should fail with self.assertRaises(ValueError): butler.put(metric, DatasetRef(datasetType, dataId, id=100)) # Put and remove the dataset once as a DatasetRef, once as a dataId, # and once with a DatasetType for args in ((refIn, ), (datasetTypeName, dataId), (datasetType, dataId)): with self.subTest(args=args): ref = butler.put(metric, *args) self.assertIsInstance(ref, DatasetRef) # Test getDirect metricOut = butler.getDirect(ref) self.assertEqual(metric, metricOut) # Test get metricOut = butler.get(ref.datasetType.name, dataId) self.assertEqual(metric, metricOut) # Test get with a datasetRef metricOut = butler.get(ref) self.assertEqual(metric, metricOut) # Test getDeferred with dataId metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() self.assertEqual(metric, metricOut) # Test getDeferred with a datasetRef metricOut = butler.getDeferred(ref).get() self.assertEqual(metric, metricOut) # Check we can get components if storageClass.isComposite(): self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) # Remove from collection only; after that we shouldn't be able # to find it unless we use the dataset_id. butler.remove(*args, delete=False) with self.assertRaises(LookupError): butler.datasetExists(*args) # If we use the output ref with the dataset_id, we should # still be able to load it with getDirect(). self.assertEqual(metric, butler.getDirect(ref)) # Reinsert into collection, then delete from Datastore *and* # remove from collection. butler.registry.associate(butler.collection, [ref]) butler.remove(*args) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # Now getDirect() should fail, too. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry still knows about it, if we use the dataset_id. self.assertEqual(butler.registry.getDataset(ref.id), ref) # Put again, then remove completely (this generates a new # dataset record in registry, with a new ID - the old one # still exists but it is not in any collection so we don't # care). ref = butler.put(metric, *args) butler.remove(*args, remember=False) # Lookup with original args should still fail. with self.assertRaises(LookupError): butler.datasetExists(*args) # getDirect() should still fail. with self.assertRaises(FileNotFoundError): butler.getDirect(ref) # Registry shouldn't be able to find it by dataset_id anymore. self.assertIsNone(butler.registry.getDataset(ref.id)) # Put the dataset again, since the last thing we did was remove it. ref = butler.put(metric, refIn) # Get with parameters stop = 4 sliced = butler.get(ref, parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with parameters sliced = butler.getDeferred(ref, parameters={ "slice": slice(stop) }).get() self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) # getDeferred with deferred parameters sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) self.assertNotEqual(metric, sliced) self.assertEqual(metric.summary, sliced.summary) self.assertEqual(metric.output, sliced.output) self.assertEqual(metric.data[:stop], sliced.data) if storageClass.isComposite(): # Delete one component and check that the other components # can still be retrieved metricOut = butler.get(ref.datasetType.name, dataId) compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") summary = butler.get(compNameS, dataId) self.assertEqual(summary, metric.summary) self.assertTrue(butler.datastore.exists(ref.components["summary"])) butler.remove(compNameS, dataId, remember=True) with self.assertRaises(LookupError): butler.datasetExists(compNameS, dataId) self.assertFalse(butler.datastore.exists( ref.components["summary"])) self.assertTrue(butler.datastore.exists(ref.components["data"])) data = butler.get(compNameD, dataId) self.assertEqual(data, metric.data) # Combining a DatasetRef with a dataId should fail with self.assertRaises(ValueError): butler.get(ref, dataId) # Getting with an explicit ref should fail if the id doesn't match with self.assertRaises(ValueError): butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) # Getting a dataset with unknown parameters should fail with self.assertRaises(KeyError): butler.get(ref, parameters={"unsupported": True}) # Check we have a collection collections = butler.registry.getAllCollections() self.assertEqual(collections, { "ingest", }) # Clean up to check that we can remove something that may have # already had a component removed butler.remove(ref.datasetType.name, dataId) # Add a dataset back in since some downstream tests require # something to be present ref = butler.put(metric, refIn) return butler # Construct a butler with no run or collection, but make it writeable. butler = Butler(self.tmpConfigFile, writeable=True) # Create and register a DatasetType dimensions = butler.registry.dimensions.extract( ["instrument", "visit"]) datasetType = self.addDatasetType( "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) butler.registry.insertDimensionData("physical_filter", { "instrument": "DummyCamComp", "name": "d-r", "abstract_filter": "R" }) butler.registry.insertDimensionData( "visit", { "instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r" }) dataId = {"instrument": "DummyCamComp", "visit": 423} # Create dataset. metric = makeExampleMetrics() # Register a new run and put dataset. run = "deferred" butler.registry.registerRun(run) ref = butler.put(metric, datasetType, dataId, run=run) # Putting with no run should fail with TypeError. with self.assertRaises(TypeError): butler.put(metric, datasetType, dataId) # Dataset should exist. self.assertTrue( butler.datasetExists(datasetType, dataId, collection=run)) # We should be able to get the dataset back, but with and without # a deferred dataset handle. self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) self.assertEqual( metric, butler.getDeferred(datasetType, dataId, collection=run).get()) # Trying to find the dataset without any collection is a TypeError. with self.assertRaises(TypeError): butler.datasetExists(datasetType, dataId) with self.assertRaises(TypeError): butler.get(datasetType, dataId) with self.assertRaises(TypeError): butler.remove(datasetType, dataId) # Associate the dataset with a different collection. butler.registry.associate("tagged", [ref]) # Deleting the dataset from the new collection should make it findable # in the original collection but without a Datastore entry. butler.remove(datasetType, dataId, collection="tagged") self.assertFalse( butler.datasetExists(datasetType, dataId, collection=run))
def checkInstrumentWithRegistry(self, cls, testRaw): Butler.makeRepo(self.root) butler = Butler(self.root, run="tests") instrument = cls() scFactory = StorageClassFactory() # Check instrument class and metadata translator agree on # instrument name, using readRawFitsHeader to read the metadata. filename = os.path.join(DATAROOT, testRaw) md = readRawFitsHeader(filename, translator_class=cls.translatorClass) obsInfo = ObservationInfo(md, translator_class=cls.translatorClass, filename=filename) self.assertEqual(instrument.getName(), obsInfo.instrument) # Add Instrument, Detector, and PhysicalFilter entries to the # Butler Registry. instrument.register(butler.registry) # Define a DatasetType for the cameraGeom.Camera, which can be # accessed just by identifying its Instrument. # A real-world Camera DatasetType should be identified by a # validity range as well. cameraDatasetType = DatasetType( "camera", dimensions=["instrument"], storageClass=scFactory.getStorageClass("Camera"), universe=butler.registry.dimensions) butler.registry.registerDatasetType(cameraDatasetType) # Define a DatasetType for cameraGeom.Detectors, which can be # accessed by identifying its Instrument and (Butler) Detector. # A real-world Detector DatasetType probably doesn't need to exist, # as it would just duplicate information in the Camera, and # reading a full Camera just to get a single Detector should be # plenty efficient. detectorDatasetType = DatasetType( "detector", dimensions=["instrument", "detector"], storageClass=scFactory.getStorageClass("Detector"), universe=butler.registry.dimensions) butler.registry.registerDatasetType(detectorDatasetType) # Put and get the Camera. dataId = dict(instrument=instrument.instrument) butler.put(instrument.getCamera(), "camera", dataId=dataId) camera = butler.get("camera", dataId) # Full camera comparisons are *slow*; just compare names. self.assertEqual(instrument.getCamera().getName(), camera.getName()) # Put and get a random subset of the Detectors. allDetectors = list(instrument.getCamera()) numDetectors = min(3, len(allDetectors)) someDetectors = [ allDetectors[i] for i in self.rng.choice( len(allDetectors), size=numDetectors, replace=False) ] for cameraGeomDetector in someDetectors: # Right now we only support integer detector IDs in data IDs; # support for detector names and groups (i.e. rafts) is # definitely planned but not yet implemented. dataId = dict(instrument=instrument.instrument, detector=cameraGeomDetector.getId()) butler.put(cameraGeomDetector, "detector", dataId=dataId) cameraGeomDetector2 = butler.get("detector", dataId=dataId) # Full detector comparisons are *slow*; just compare names and # serials. self.assertEqual(cameraGeomDetector.getName(), cameraGeomDetector2.getName()) self.assertEqual(cameraGeomDetector.getSerial(), cameraGeomDetector2.getSerial())
class FormattersTests(DatasetTestHelper, lsst.utils.tests.TestCase): root = None storageClassFactory = None @classmethod def setUpClass(cls): """Create a new butler once only.""" cls.storageClassFactory = StorageClassFactory() cls.root = tempfile.mkdtemp(dir=TESTDIR) data_ids = { "instrument": [INSTRUMENT_NAME], "detector": [0, 1, 2, 3, 4, 5], "exposure": [11, 22], } configURI = ButlerURI("resource://spherex/configs", forceDirectory=True) butlerConfig = Config(configURI.join("butler.yaml")) # in-memory db is being phased out # butlerConfig["registry", "db"] = 'sqlite:///:memory:' cls.creatorButler = makeTestRepo( cls.root, data_ids, config=butlerConfig, dimensionConfig=configURI.join("dimensions.yaml")) for formatter in FORMATTERS: datasetTypeName, storageClassName = (formatter["dataset_type"], formatter["storage_class"]) storageClass = cls.storageClassFactory.getStorageClass( storageClassName) addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids), storageClass) @classmethod def tearDownClass(cls): if cls.root is not None: shutil.rmtree(cls.root, ignore_errors=True) def setUp(self): # make test collection # self.butler = makeTestCollection(self.creatorButler) self.collection = self._testMethodName self.butler = Butler(butler=self.creatorButler, run=self.collection) def test_putget(self): fitsPath = os.path.join(TESTDIR, "data", "small.fits") dataid = {"exposure": 11, "detector": 0, "instrument": INSTRUMENT_NAME} for formatter in FORMATTERS: # in-memory object, representing fits inmemobj = formatter["reader"](fitsPath) # save in-memory object into butler dataset datasetTypeName = formatter["dataset_type"] self.butler.put(inmemobj, datasetTypeName, dataid) # get butler dataset retrievedobj = self.butler.get(datasetTypeName, dataid) self.assertTrue(isinstance(retrievedobj, formatter["inmem_cls"])) self.assertTrue(retrievedobj.__class__.__name__, inmemobj.__class__.__name__) def test_ingest(self): fitsPath = os.path.join(TESTDIR, "data", "small.fits") formatter = FORMATTERS[0] datasetTypeName, formatterCls = (formatter["dataset_type"], formatter["formatter_cls"]) datasetType = self.butler.registry.getDatasetType(datasetTypeName) datasets = [] for exposure in range(3, 5): for detector in range(6): # use the same fits to test ingest if not os.path.exists(fitsPath): log.warning( f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}." ) continue ref = DatasetRef(datasetType, dataId={ "instrument": INSTRUMENT_NAME, "detector": detector, "exposure": exposure * 11 }) datasets.append( FileDataset(refs=ref, path=fitsPath, formatter=formatterCls)) # register new collection # run = "rawIngestedRun" # self.butler.registry.registerCollection(run, type=CollectionType.RUN) # collection is registered as a part of setUp run = self.collection with self.butler.transaction(): for exposure in range(3, 5): expid = exposure * 11 self.butler.registry.insertDimensionData( "exposure", { "instrument": INSTRUMENT_NAME, "id": expid, "name": f"{expid}", "group_name": "day1", "timespan": Timespan(begin=None, end=None) }) # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink' # or 'symlink' self.butler.ingest(*datasets, transfer="symlink", run=run) # verify that 12 files were ingested (2 exposures for each detector) refsSet = set( self.butler.registry.queryDatasets(datasetTypeName, collections=[run])) self.assertEqual( len(refsSet), 12, f"Collection {run} should have 12 elements after ingest") # verify that data id is present dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME} refsList = list( self.butler.registry.queryDatasets(datasetTypeName, collections=[run], dataId=dataid)) self.assertEqual( len(refsList), 1, f"Collection {run} should have 1 element with {dataid}")
class ParquetFormatterTestCase(unittest.TestCase): """Tests for ParquetFormatter, using PosixDatastore. """ def setUp(self): """Create a new butler root for each test.""" self.root = tempfile.mkdtemp(dir=TESTDIR) Butler.makeRepo(self.root) self.butler = Butler(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( "data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions) self.butler.registry.registerDatasetType(self.datasetType) def tearDown(self): if os.path.exists(self.root): shutil.rmtree(self.root, ignore_errors=True) def testSingleIndexDataFrame(self): columns1 = pd.Index(["a", "b", "c"]) df1 = pd.DataFrame(np.random.randn(5, 3), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get( self.datasetType.componentTypeName("columns"), dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]}) self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3)) df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"}) self.assertTrue(df1.loc[:, ["a"]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]}) def testMultiIndexDataFrame(self): columns1 = pd.MultiIndex.from_tuples( [ ("g", "a"), ("g", "b"), ("g", "c"), ("r", "a"), ("r", "b"), ("r", "c"), ], names=["filter", "column"], ) df1 = pd.DataFrame(np.random.randn(5, 6), index=np.arange(5, dtype=int), columns=columns1) self.butler.put(df1, self.datasetType, dataId={}) # Read the whole DataFrame. df2 = self.butler.get(self.datasetType, dataId={}) self.assertTrue(df1.equals(df2)) # Read just the column descriptions. columns2 = self.butler.get( self.datasetType.componentTypeName("columns"), dataId={}) self.assertTrue(df1.columns.equals(columns2)) # Read just some columns a few different ways. df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": { "filter": "g" }}) self.assertTrue(df1.loc[:, ["g"]].equals(df3)) df4 = self.butler.get( self.datasetType, dataId={}, parameters={"columns": { "filter": ["r"], "column": "a" }}) self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4)) # Passing an unrecognized column should be a ValueError. with self.assertRaises(ValueError): self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]})
class TestFilterLabelFixups(lsst.utils.tests.TestCase, MockCheckMixin): """Tests for the logic in lsst.obs.base.formatters.fitsExposure.FitsExposureFormatter._fixFilterLabels that uses the data ID passed to a formatter to fix and/or check the FilterLabel read from an Exposure FITS file, allowing us to load images with new, standardized filters even if they were written prior to filter standardization (and without enough information to reconstruct the standardized filter name). This test lives here instead of obs_base because it relies on having Exposure FITS files written both before and after standardization in a Gen3 butler, something trivial to obtain here: the flats are old (from testdata_ci_hsc) - while calexps are new (written by Gen3 pipelines). And this package already has the dependency on a concrete obs package (obs_subaru in this case) necessary to set up a full butler repository, something that obs_base can by definition never have. """ def setUp(self): self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"), writeable=False, collections=["HSC/calib/2013-06-17", "HSC/runs/ci_hsc"]) # We need to provide a physical_filter value to fully identify a flat, # but this still leaves the band as an implied value that this data ID # doesn't know. self.flatMinimalDataId = DataCoordinate.standardize( instrument="HSC", detector=0, physical_filter="HSC-R", universe=self.butler.registry.dimensions, ) # For a calexp, the minimal data ID just has exposure and detector, # so both band and physical_filter are implied and not known here. self.calexpMinimalDataId = DataCoordinate.standardize( instrument="HSC", detector=100, visit=903334, universe=self.butler.registry.dimensions, ) # Parameters with bbox to test that logic still works on subimage gets. self.parameters = {"bbox": Box2I(Point2I(0, 0), Point2I(8, 7))} def testReadingOldFileWithIncompleteDataId(self): """If we try to read an old flat with an incomplete data ID, we should get a warning. It is unspecified what the FilterLabel will have in this case, so we don't check that. """ with self.assertWarns(Warning): self.butler.get("flat", self.flatMinimalDataId) with self.assertWarns(Warning): self.butler.get("flat", self.flatMinimalDataId, parameters=self.parameters) with self.assertWarns(Warning): self.butler.get("flat.filter", self.flatMinimalDataId) def testFixingReadingOldFile(self): """If we read an old flat with a complete data ID, we fix the FilterLabel. """ flatFullDataId = self.butler.registry.expandDataId(self.flatMinimalDataId) flat = self.butler.get("flat", flatFullDataId) self.assertEqual(flat.getFilter().bandLabel, flatFullDataId["band"]) self.assertEqual(flat.getFilter().physicalLabel, flatFullDataId["physical_filter"]) flatFilterLabel = self.butler.get("flat.filter", flatFullDataId) self.assertEqual(flatFilterLabel.bandLabel, flatFullDataId["band"]) self.assertEqual(flatFilterLabel.physicalLabel, flatFullDataId["physical_filter"]) flatSub = self.butler.get("flat", flatFullDataId, parameters=self.parameters) self.assertEqual(flat.getFilter(), flatSub.getFilter()) def testReadingNewFileWithIncompleteDataId(self): """If we try to read a new calexp with an incomplete data ID, the reader should recognize that it can't check the filters and just trust the file. """ self.skip_mock() calexp = self.butler.get("calexp", self.calexpMinimalDataId) calexpFilterLabel = self.butler.get("calexp.filter", self.calexpMinimalDataId) self.assertTrue(calexp.getFilter().hasPhysicalLabel()) self.assertTrue(calexp.getFilter().hasBandLabel()) self.assertEqual(calexp.getFilter(), calexpFilterLabel) calexpSub = self.butler.get("calexp", self.calexpMinimalDataId, parameters=self.parameters) self.assertEqual(calexp.getFilter(), calexpSub.getFilter()) def testReadingNewFileWithFullDataId(self): """If we try to read a new calexp with a full data ID, the reader should check the filters in the file for consistency with the data ID (and in this case, find them consistent). """ self.skip_mock() calexpFullDataId = self.butler.registry.expandDataId(self.calexpMinimalDataId) calexp = self.butler.get("calexp", calexpFullDataId) self.assertEqual(calexp.getFilter().bandLabel, calexpFullDataId["band"]) self.assertEqual(calexp.getFilter().physicalLabel, calexpFullDataId["physical_filter"]) calexpFilterLabel = self.butler.get("calexp.filter", calexpFullDataId) self.assertEqual(calexpFilterLabel.bandLabel, calexpFullDataId["band"]) self.assertEqual(calexpFilterLabel.physicalLabel, calexpFullDataId["physical_filter"]) calexpSub = self.butler.get("calexp", calexpFullDataId, parameters=self.parameters) self.assertEqual(calexp.getFilter(), calexpSub.getFilter()) def testReadingBadNewFileWithFullDataId(self): """If we try to read a new calexp with a full data ID, the reader should check the filters in the file for consistency with the data ID (and in this case, find them inconsistent, which should result in warnings and returning what's in the data ID). """ self.skip_mock() calexpBadDataId = DataCoordinate.standardize( self.calexpMinimalDataId, band="g", physical_filter="HSC-G", visit_system=0, ) self.assertTrue(calexpBadDataId.hasFull()) # Some tests are only relevant when reading full calexps. # By definition a disassembled exposure will have a correct # filterlabel written out. # In this situation the test becomes moot since the filterLabel # formatter will not force a correct filter label into an # incorrect filter label based on DataId. _, components = self.butler.getURIs("calexp", calexpBadDataId) if components: raise unittest.SkipTest("Test not relevant because composite has been disassembled") with self.assertWarns(Warning): calexp = self.butler.get("calexp", calexpBadDataId) with self.assertWarns(Warning): calexpFilterLabel = self.butler.get("calexp.filter", calexpBadDataId) self.assertEqual(calexp.getFilter(), calexpFilterLabel) self.assertEqual(calexp.getFilter().bandLabel, calexpBadDataId["band"]) self.assertEqual(calexp.getFilter().physicalLabel, calexpBadDataId["physical_filter"]) self.assertEqual(calexpFilterLabel.bandLabel, calexpBadDataId["band"]) self.assertEqual(calexpFilterLabel.physicalLabel, calexpBadDataId["physical_filter"]) with self.assertWarns(Warning): calexpSub = self.butler.get("calexp", calexpBadDataId, parameters=self.parameters) self.assertEqual(calexp.getFilter(), calexpSub.getFilter())
class TestSchemaMatch(lsst.utils.tests.TestCase): """Check the schema of the parquet outputs match the DDL in sdm_schemas""" def setUp(self): self.butler = Butler(os.path.join(getPackageDir("ci_imsim"), "DATA"), writeable=False, collections=["LSSTCam-imSim/runs/ci_imsim"]) schemaFile = os.path.join(getPackageDir("sdm_schemas"), 'yml', 'imsim.yaml') with open(schemaFile, "r") as f: self.schema = yaml.safe_load(f)['tables'] def _validateSchema(self, dataset, dataId, tableName): """Check column name and data type match between dataset and DDL""" info = f"dataset={dataset} tableName={tableName} dataId={dataId}" sdmSchema = [ table for table in self.schema if table['name'] == tableName ] self.assertEqual(len(sdmSchema), 1) expectedColumns = { column['name']: column['datatype'] for column in sdmSchema[0]['columns'] } df = self.butler.get(dataset, dataId) df.reset_index(inplace=True) outputColumnNames = set(df.columns.to_list()) self.assertEqual(outputColumnNames, set(expectedColumns.keys()), f"{info} failed") # the data type mapping from felis datatype to pandas typeMapping = { "boolean": "bool", "int": "int32", "long": "int64", "float": "float32", "double": "float64", "char": "object" } for column in outputColumnNames: self.assertEqual( df.dtypes.get(column).name, typeMapping[expectedColumns[column]], f"{info} column={column} failed") def testObjectSchemaMatch(self): """Check objectTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("objectTable_tract", dataId, "object") def testSourceSchemaMatch(self): """Check one sourceTable_visit""" dataId = { "instrument": "LSSTCam-imSim", "detector": 100, "visit": 5884, "band": "y" } self._validateSchema("sourceTable_visit", dataId, "source") def testForcedSourceSchemaMatch(self): """Check forcedSourceTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("forcedSourceTable_tract", dataId, "forcedSource") def testDiaObjectSchemaMatch(self): """Check diaObjectTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("diaObjectTable_tract", dataId, "diaObject") def testDiaSourceSchemaMatch(self): """Check one diaSourceTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("diaSourceTable_tract", dataId, "diaSource") def testForcedSourceeOnDiaObjectSchemaMatch(self): """Check forcedSourceOnDiaObjectTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("forcedSourceOnDiaObjectTable_tract", dataId, "forcedSourceOnDiaObject") def testMatchRefSchemaMatch(self): """Check match_ref_truth_summary_objectTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("match_ref_truth_summary_objectTable_tract", dataId, "matchesTruth") def testMatchObjectSchemaMatch(self): """Check match_target_truth_summary_objectTable_tract""" dataId = { "instrument": "LSSTCam-imSim", "tract": 0, "skymap": "discrete/ci_imsim/4k" } self._validateSchema("match_target_truth_summary_objectTable_tract", dataId, "matchesObject")
class Gen2ConvertTestCase(lsst.utils.tests.TestCase): def setUp(self): self.butler = Butler(GEN3_REPO_ROOT, collections="HSC/runs/ci_hsc") def tearDown(self): del self.butler def testCollections(self): """Test that the correct set of collections is created. """ self.assertCountEqual( self.butler.registry.getCollectionChain("HSC/defaults"), [ "refcats", "skymaps", "HSC/raw/all", "HSC/calib", "HSC/masks", "HSC/external" ]) self.assertCountEqual( self.butler.registry.getCollectionChain("refcats"), ["refcats/gen2"], ) self.assertEqual(self.butler.registry.getCollectionType("skymaps"), CollectionType.RUN) self.assertEqual( self.butler.registry.getCollectionType("refcats/gen2"), CollectionType.RUN) self.assertEqual(self.butler.registry.getCollectionType("HSC/raw/all"), CollectionType.RUN) self.assertEqual(self.butler.registry.getCollectionType("skymaps"), CollectionType.RUN) self.assertEqual( list( self.butler.registry.queryCollections( ..., collectionTypes={CollectionType.CALIBRATION})), ["HSC/calib"], ) def testObservationPacking(self): """Test that packing Visit+Detector into an integer in Gen3 generates the same results as in Gen2. """ butler2 = Butler2(os.path.join(REPO_ROOT, "rerun", "ci_hsc")) for visit, detector in [(903334, 16), (903338, 25), (903986, 100)]: dataId2 = {"visit": visit, "ccd": detector} dataId3 = self.butler.registry.expandDataId(visit=visit, detector=detector, instrument="HSC") self.assertEqual(butler2.get("ccdExposureId", dataId2), dataId3.pack("visit_detector")) def testSkyMapPacking(self): """Test that packing Tract+Patch into an integer in Gen3 works and is self-consistent. Note that this packing does *not* use the same algorithm as Gen2 and hence generates different IDs, because the Gen2 algorithm is problematically tied to the *default* SkyMap for a particular camera, rather than the SkyMap actually used. """ # SkyMap used by ci_hsc has only one tract, so the test coverage in # that area isn't great. That's okay because that's tested in SkyMap; # what we care about here is that the converted repo has the necessary # metadata to construct and use these packers at all. for patch in [0, 43, 52]: dataId = self.butler.registry.expandDataId( skymap="discrete/ci_hsc", tract=0, patch=patch, band='r') packer1 = self.butler.registry.dimensions.makePacker( "tract_patch", dataId) packer2 = self.butler.registry.dimensions.makePacker( "tract_patch_band", dataId) self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) self.assertEqual( packer1.unpack(packer1.pack(dataId)), DataCoordinate.standardize(dataId, graph=packer1.dimensions)) self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId) self.assertEqual(packer1.pack(dataId, band='i'), packer1.pack(dataId)) self.assertNotEqual(packer2.pack(dataId, band='i'), packer2.pack(dataId)) def testRawFilters(self): """Test that raw data has the Filter component set. """ # Note that the 'r' and 'i' values here look like Gen3 band # values, but they're something weird in between abstract and physical # filters; if we had HSC-R2 data, the corresponding value would be # 'r2', not just 'r'. We need that to be compatible with Gen2 usage # of the afw.image.Filter system. rawR = self.butler.get("raw", instrument="HSC", exposure=903334, detector=16) self.assertEqual(rawR.getFilter().getCanonicalName(), "r") rawI = self.butler.get("raw", instrument="HSC", exposure=903986, detector=16) self.assertEqual(rawI.getFilter().getCanonicalName(), "i") def testCuratedCalibrations(self): """Test that defects, the camera, and the brighter-fatter kernel were added to the Gen3 registry. """ rawDatasetType = self.butler.registry.getDatasetType("raw") cameraRef = None bfKernelRef = None rawRefs = list( self.butler.registry.queryDatasets(rawDatasetType, collections=["HSC/raw/all" ]).expanded()) self.assertEqual(len(rawRefs), 33) for rawRef in rawRefs: # Expand raw data ID to include implied dimensions (e.g. # physical_filter from exposure). for calibDatasetTypeName in ("camera", "bfKernel", "defects"): with self.subTest(dataset=calibDatasetTypeName): calibDatasetType = self.butler.registry.getDatasetType( calibDatasetTypeName) calibRef = self.butler.registry.findDataset( calibDatasetType, collections=["HSC/calib"], dataId=rawRef.dataId, timespan=rawRef.dataId.timespan) # We should have exactly one calib of each type self.assertIsNotNone(calibRef) # Try getting those calibs to make sure the files # themselves are where the Butler thinks they are. We # defer that for camera and bfKernel, because there's only # one of each of those. if calibDatasetTypeName == "camera": if cameraRef is None: cameraRef = calibRef else: self.assertEqual(cameraRef, calibRef) elif calibDatasetTypeName == "bfKernel": if bfKernelRef is None: bfKernelRef = calibRef else: self.assertEqual(bfKernelRef, calibRef) else: defects = self.butler.get(calibRef, collections=calibRef.run) self.assertIsInstance(defects, lsst.ip.isr.Defects) instrument = HyperSuprimeCam() cameraFromButler = self.butler.get(cameraRef, collections=cameraRef.run) cameraFromInstrument = instrument.getCamera() self.assertEqual(len(cameraFromButler), len(cameraFromInstrument)) self.assertEqual(cameraFromButler.getName(), cameraFromInstrument.getName()) self.assertFloatsEqual( self.butler.get(bfKernelRef, collections=bfKernelRef.run), instrument.getBrighterFatterKernel()) def testBrightObjectMasks(self): """Test that bright object masks are included in the Gen3 repo. """ regions = self.butler.get("brightObjectMask", skymap='discrete/ci_hsc', tract=0, patch=69, band='r') self.assertIsInstance(regions, ObjectMaskCatalog) self.assertGreater(len(regions), 0)
class Gen2ConvertTestCase(lsst.utils.tests.TestCase): def setUp(self): self.butler = Butler(REPO_ROOT, run="shared/ci_hsc") def tearDown(self): del self.butler def testImpliedDimensions(self): """Test that implied dimensions are expanded properly when populating the Dataset table. """ # All of the dataset types below have Visit or Exposure in their # dimensions, which means PhysicalFilter and AbstractFilter are # implied. dimensions for them. Those should be non-null and # consistent. sql = """ SELECT physical_filter, abstract_filter FROM dataset WHERE dataset_type_name IN ( 'raw', 'calexp', 'icExp', 'src', 'icSrc', 'deepCoadd_directWarp', 'deepCoadd_psfMatchedWarp' ) """ count = 0 for row in self.butler.registry.query(sql): if row["physical_filter"] == "HSC-R": self.assertEqual(row["abstract_filter"], "r") elif row["physical_filter"] == "HSC-I": self.assertEqual(row["abstract_filter"], "i") else: self.fail("physical_filter not in ('HSC-R', 'HSC-I')") count += 1 self.assertGreater(count, 0) def testObservationPacking(self): """Test that packing Visit+Detector into an integer in Gen3 generates the same results as in Gen2. """ butler2 = Butler2(os.path.join(REPO_ROOT, "rerun", "ci_hsc")) for visit, detector in [(903334, 16), (903338, 25), (903986, 100)]: dataId2 = {"visit": visit, "ccd": detector} dataId3 = self.butler.registry.expandDataId(visit=visit, detector=detector, instrument="HSC") self.assertEqual( butler2.get("ccdExposureId", dataId2), self.butler.registry.packDataId("visit_detector", dataId3)) def testSkyMapPacking(self): """Test that packing Tract+Patch into an integer in Gen3 works and is self-consistent. Note that this packing does *not* use the same algorithm as Gen2 and hence generates different IDs, because the Gen2 algorithm is problematically tied to the *default* SkyMap for a particular camera, rather than the SkyMap actually used. """ # SkyMap used by ci_hsc has only one tract, so the test coverage in # that area isn't great. That's okay because that's tested in SkyMap; # what we care about here is that the converted repo has the necessary # metadata to construct and use these packers at all. for patch in [0, 43, 52]: dataId = self.butler.registry.expandDataId(skymap="ci_hsc", tract=0, patch=patch, abstract_filter='r') packer1 = self.butler.registry.makeDataIdPacker( "tract_patch", dataId) packer2 = self.butler.registry.makeDataIdPacker( "tract_patch_abstract_filter", dataId) self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) self.assertEqual( packer1.unpack(packer1.pack(dataId)), DataId(dataId, dimensions=packer1.dimensions.required)) self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId) self.assertEqual(packer1.pack(dataId, abstract_filter='i'), packer1.pack(dataId)) self.assertNotEqual(packer2.pack(dataId, abstract_filter='i'), packer2.pack(dataId)) def testRawFilters(self): """Test that raw data has the Filter component set. """ # Note that the 'r' and 'i' values here look like Gen3 abstract_filter # values, but they're something weird in between abstract and physical # filters; if we had HSC-R2 data, the corresponding value would be # 'r2', not just 'r'. We need that to be compatible with Gen2 usage # of the afw.image.Filter system. rawR = self.butler.get("raw", instrument="HSC", exposure=903334, detector=16) self.assertEqual(rawR.getFilter().getName(), "r") rawI = self.butler.get("raw", instrument="HSC", exposure=903986, detector=16) self.assertEqual(rawI.getFilter().getName(), "i") def testCuratedCalibrations(self): """Test that defects, the camera, and the brighter-fatter kernel were added to the Gen3 registry. """ originInfo = DatasetOriginInfoDef(["raw", "calib"], []) # Query for raws that have associated calibs of the types below; # result is an iterator over rows that correspond roughly to data IDs. rowsWithCalibs = list( self.butler.registry.selectMultipleDatasetTypes( originInfo, expression="", required=["raw", "camera", "bfKernel", "defects"], perDatasetTypeDimensions=["calibration_label"])) # Query for all rows, with no restriction on having associated calibs. rowsWithoutCalibs = list( self.butler.registry.selectMultipleDatasetTypes( originInfo, expression="", required=["raw"], )) # We should get the same raws in both cases because all of the raws # here should have associated calibs. self.assertGreater(len(rowsWithoutCalibs), 0) self.assertEqual(len(rowsWithCalibs), len(rowsWithoutCalibs)) # Try getting those calibs to make sure the files themselves are # where the Butler thinks they are. butler = Butler(REPO_ROOT, run="calib") instrument = HyperSuprimeCam() for row in rowsWithCalibs: refsByName = {k.name: v for k, v in row.datasetRefs.items()} cameraFromButler = butler.get(refsByName["camera"]) cameraFromInstrument = instrument.getCamera() self.assertEqual(len(cameraFromButler), len(cameraFromInstrument)) self.assertEqual(cameraFromButler.getName(), cameraFromInstrument.getName()) self.assertFloatsEqual(butler.get(refsByName["bfKernel"]), instrument.getBrighterFatterKernel()) defects = butler.get(refsByName["defects"]) self.assertIsInstance(defects, lsst.meas.algorithms.Defects) def testBrightObjectMasks(self): """Test that bright object masks are included in the Gen3 repo. """ regions = self.butler.get("brightObjectMask", skymap='ci_hsc', tract=0, patch=69, abstract_filter='r') self.assertIsInstance(regions, ObjectMaskCatalog) self.assertGreater(len(regions), 0)