Example #1
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     # Does not have to be a random image
     pyplot.imshow([
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
         self.rng.sample(range(50), 10),
     ])
     ref = butler.put(pyplot.gcf(), datasetType)
     uri = butler.getURI(ref)
     # The test after this will not work if we don't have local file
     self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}")
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.pruneDatasets([ref], unstore=True, purge=True)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
    def testHealSparseMapFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("map", [],
                                  "HealSparseMap",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        ref = butler.put(self.hspMap, datasetType)
        uri = butler.getURI(ref)
        self.assertEqual(uri.getExtension(), '.hsp')

        # Retrieve the full map.
        hspMap = butler.get('map')
        self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map))

        # Retrieve the coverage map
        coverage = butler.get('map.coverage')
        self.assertTrue(
            np.all(coverage.coverage_mask == self.hspMap.coverage_mask))

        # Retrieve a partial map
        pixels = [0, 6]
        partialMap = butler.get('map', parameters={'pixels': pixels})

        self.assertTrue(
            np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels)))
        self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000]))
        self.assertTrue(
            np.all(partialMap[100000:110000] == self.hspMap[100000:110000]))

        # Retrieve a degraded map
        degradedMapRead = butler.get('map', parameters={'degrade_nside': 512})
        degradedMap = self.hspMap.degrade(512)

        self.assertTrue(
            np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
Example #3
0
    def testMatplotlibFormatter(self):
        butler = Butler(self.root, run="testrun")
        datasetType = DatasetType("test_plot", [], "Plot",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        # Does not have to be a random image
        pyplot.imshow([self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       self.rng.sample(range(50), 10),
                       ])
        ref = butler.put(pyplot.gcf(), datasetType)
        uri = butler.getURI(ref)

        # Following test needs a local file
        with uri.as_local() as local:
            with tempfile.NamedTemporaryFile(suffix=".png") as file:
                pyplot.gcf().savefig(file.name)
                self.assertTrue(
                    filecmp.cmp(
                        local.ospath,
                        file.name,
                        shallow=True
                    )
                )
        self.assertTrue(butler.datasetExists(ref))
        with self.assertRaises(ValueError):
            butler.get(ref)
        butler.pruneDatasets([ref], unstore=True, purge=True)
        with self.assertRaises(LookupError):
            butler.datasetExists(ref)
    def testTransaction(self):
        butler = Butler(self.tmpConfigFile, run="ingest")
        datasetTypeName = "test_metric"
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        dimensionEntries = (("instrument", {
            "instrument": "DummyCam"
        }), ("physical_filter", {
            "instrument": "DummyCam",
            "name": "d-r",
            "abstract_filter": "R"
        }), ("visit", {
            "instrument": "DummyCam",
            "id": 42,
            "name": "fortytwo",
            "physical_filter": "d-r"
        }))
        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredData")
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCam", "visit": 42}
        with self.assertRaises(TransactionTestError):
            with butler.transaction():
                # Create and register a DatasetType
                datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                                  storageClass,
                                                  butler.registry)
                # Add needed Dimensions
                for args in dimensionEntries:
                    butler.registry.insertDimensionData(*args)
                # Store a dataset
                ref = butler.put(metric, datasetTypeName, dataId)
                self.assertIsInstance(ref, DatasetRef)
                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(datasetTypeName, dataId)
                self.assertEqual(metric, metricOut)
                # Check we can get components
                self.assertGetComponents(butler, ref,
                                         ("summary", "data", "output"), metric)
                raise TransactionTestError(
                    "This should roll back the entire transaction")

        with self.assertRaises(KeyError):
            butler.registry.getDatasetType(datasetTypeName)
        with self.assertRaises(LookupError):
            butler.registry.expandDataId(dataId)
        # Should raise KeyError for missing DatasetType
        with self.assertRaises(KeyError):
            butler.get(datasetTypeName, dataId)
        # Also check explicitly if Dataset entry is missing
        self.assertIsNone(
            butler.registry.find(butler.collection, datasetType, dataId))
        # Direct retrieval should not find the file in the Datastore
        with self.assertRaises(FileNotFoundError):
            butler.getDirect(ref)
Example #5
0
    def verifyIngest(self, files=None, cli=False, fullCheck=False):
        """
        Test that RawIngestTask ingested the expected files.

        Parameters
        ----------
        files : `list` [`str`], or None
            List of files to be ingested, or None to use ``self.file``
        fullCheck : `bool`, optional
            If `True`, read the full raw dataset and check component
            consistency. If `False` check that a component can be read
            but do not read the entire raw exposure.

        Notes
        -----
        Reading all the ingested test data can be expensive. The code paths
        for reading the second raw are the same as reading the first so
        we do not gain anything by doing full checks of everything.
        Only read full pixel data for first dataset from file.
        Don't even do that if we are requested not to by the caller.
        This only really affects files that contain multiple datasets.
        """
        butler = Butler(self.root, run=self.outputRun)
        datasets = list(
            butler.registry.queryDatasets("raw", collections=self.outputRun))
        self.assertEqual(len(datasets), len(self.dataIds))

        # Get the URI to the first dataset and check it is inside the
        # datastore
        datasetUri = butler.getURI(datasets[0])
        self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))

        for dataId in self.dataIds:
            # Check that we can read metadata from a raw
            metadata = butler.get("raw.metadata", dataId)
            if not fullCheck:
                continue
            fullCheck = False
            exposure = butler.get("raw", dataId)
            self.assertEqual(metadata.toDict(),
                             exposure.getMetadata().toDict())

            # Since components follow a different code path we check that
            # WCS match and also we check that at least the shape
            # of the image is the same (rather than doing per-pixel equality)
            wcs = butler.get("raw.wcs", dataId)
            self.assertEqual(wcs, exposure.getWcs())

            rawImage = butler.get("raw.image", dataId)
            self.assertEqual(rawImage.getBBox(), exposure.getBBox())

            # check that the filter label got the correct band
            filterLabel = butler.get("raw.filterLabel", dataId)
            self.assertEqual(filterLabel, self.filterLabel)

        self.checkRepo(files=files)
    def runExposureCompositePutGetTest(self, storageClass, datasetTypeName):
        example = os.path.join(TESTDIR, "data", "basic", "small.fits")
        exposure = lsst.afw.image.ExposureF(example)
        butler = Butler(self.tmpConfigFile)
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        self.registerDatasetTypes(datasetTypeName, dimensions, storageClass,
                                  butler.registry)
        dataId = {
            "visit": 42,
            "instrument": "DummyCam",
            "physical_filter": "d-r"
        }
        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCam"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCam",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCam",
            "visit": 42,
            "physical_filter": "d-r"
        })
        butler.put(exposure, datasetTypeName, dataId)
        # Get the full thing
        butler.get(datasetTypeName, dataId)
        # TODO enable check for equality (fix for Exposure type)
        # self.assertEqual(full, exposure)
        # Get a component
        compsRead = {}
        for compName in ("wcs", "image", "mask", "coaddInputs", "psf"):
            compTypeName = DatasetType.nameWithComponent(
                datasetTypeName, compName)
            component = butler.get(compTypeName, dataId)
            # TODO enable check for component instance types
            # compRef = butler.registry.find(butler.run.collection,
            #                                f"calexp.{compName}", dataId)
            # self.assertIsInstance(component,
            #                       compRef.datasetType.storageClass.pytype)
            compsRead[compName] = component
        # Simple check of WCS
        bbox = lsst.afw.geom.Box2I(lsst.afw.geom.Point2I(0, 0),
                                   lsst.afw.geom.Extent2I(9, 9))
        self.assertWcsAlmostEqualOverBBox(compsRead["wcs"], exposure.getWcs(),
                                          bbox)

        # With parameters
        inBBox = Box2I(minimum=Point2I(0, 0), maximum=Point2I(3, 3))
        parameters = dict(bbox=inBBox, origin=LOCAL)
        subset = butler.get(datasetTypeName, dataId, parameters=parameters)
        outBBox = subset.getBBox()
        self.assertEqual(inBBox, outBBox)
Example #7
0
class TestCalibrateOutputs(lsst.utils.tests.TestCase):
    """Test the output data products of calibrate task make sense

    This is a regression test and not intended for scientific validation
    """

    def setUp(self):
        self.butler = Butler(os.path.join(getPackageDir("ci_imsim"), "DATA"),
                             writeable=False, collections=["LSSTCam-imSim/runs/ci_imsim"])
        self.dataId = {"detector": 55, "visit": 206039, "band": "y"}
        self.calexp = self.butler.get("calexp", self.dataId)
        self.src = self.butler.get("src", self.dataId)

    def testLocalPhotoCalibColumns(self):
        """Check calexp's calibs are consistent with src's photocalib columns
        """
        # Check that means are in the same ballpark
        calexpCalib = self.calexp.getPhotoCalib().getCalibrationMean()
        calexpCalibErr = self.calexp.getPhotoCalib().getCalibrationErr()
        srcCalib = np.mean(self.src['base_LocalPhotoCalib'])
        srcCalibErr = np.mean(self.src['base_LocalPhotoCalibErr'])

        self.assertAlmostEqual(calexpCalib, srcCalib, places=3)
        self.assertAlmostEqual(calexpCalibErr, srcCalibErr, places=3)

        # and that calibs evalutated at local positions match a few rows
        randomRows = [0, 8, 20]
        for rowNum in randomRows:
            record = self.src[rowNum]
            localEval = self.calexp.getPhotoCalib().getLocalCalibration(record.getCentroid())
            self.assertAlmostEqual(localEval, record['base_LocalPhotoCalib'])

    def testLocalWcsColumns(self):
        """Check the calexp's wcs match local wcs columns in src
        """
        # Check a few rows:
        randomRows = [1, 9, 21]
        for rowNum in randomRows:
            record = self.src[rowNum]
            centroid = record.getCentroid()
            trueCdMatrix = np.radians(self.calexp.getWcs().getCdMatrix(centroid))

            self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_1_1'], trueCdMatrix[0, 0])
            self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_2_1'], trueCdMatrix[1, 0])
            self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_1_2'], trueCdMatrix[0, 1])
            self.assertAlmostEqual(record['base_LocalWcs_CDMatrix_2_2'], trueCdMatrix[1, 1])
            self.assertAlmostEqual(
                self.calexp.getWcs().getPixelScale(centroid).asRadians(),
                np.sqrt(np.fabs(record['base_LocalWcs_CDMatrix_1_1']*record['base_LocalWcs_CDMatrix_2_2']
                                - record['base_LocalWcs_CDMatrix_2_1']*record['base_LocalWcs_CDMatrix_1_2'])))
Example #8
0
    def testBasicPutGet(self):
        butler = Butler(self.configFile)
        # Create and register a DatasetType
        datasetTypeName = "test_metric"
        dataUnits = ("Camera", "Visit")
        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredData")
        self.registerDatasetTypes(datasetTypeName, dataUnits, storageClass,
                                  butler.registry)

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"camera": "DummyCam", "visit": 42}
        ref = butler.put(metric, datasetTypeName, dataId)
        self.assertIsInstance(ref, DatasetRef)
        # Test getDirect
        metricOut = butler.getDirect(ref)
        self.assertEqual(metric, metricOut)
        # Test get
        metricOut = butler.get(datasetTypeName, dataId)
        self.assertEqual(metric, metricOut)

        # Check we can get components
        self.assertGetComponents(butler, datasetTypeName, dataId,
                                 ("summary", "data", "output"), metric)
Example #9
0
class PexConfigFormatterTestCase(unittest.TestCase):
    """Tests for PexConfigFormatter, using local file datastore."""
    def setUp(self):
        """Create a new butler root for each test."""
        self.root = makeTestTempDir(TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="test_run")
        # No dimensions in dataset type so we don't have to worry about
        # inserting dimension data or defining data IDs.
        self.datasetType = DatasetType(
            "config",
            dimensions=(),
            storageClass="Config",
            universe=self.butler.registry.dimensions)
        self.butler.registry.registerDatasetType(self.datasetType)

    def tearDown(self):
        removeTestTempDir(self.root)

    def testPexConfig(self) -> None:
        """Test that we can put and get pex_config Configs"""
        c = SimpleConfig(i=10, c="hello")
        self.assertEqual(c.i, 10)
        ref = self.butler.put(c, "config")
        butler_c = self.butler.get(ref)
        self.assertEqual(c, butler_c)
        self.assertIsInstance(butler_c, SimpleConfig)
Example #10
0
def getInitInputs(butler: Butler,
                  config: PipelineTaskConfig) -> Dict[str, Any]:
    """Return the initInputs object that would have been passed to a
    `~lsst.pipe.base.PipelineTask` constructor.

    Parameters
    ----------
    butler : `lsst.daf.butler.Butler`
        The repository to search for input datasets. Must have
        pre-configured collections.
    config : `lsst.pipe.base.PipelineTaskConfig`
        The config for the task to be constructed.

    Returns
    -------
    initInputs : `dict` [`str`]
        A dictionary of objects in the format of the ``initInputs`` parameter
        to `lsst.pipe.base.PipelineTask`.
    """
    connections = config.connections.ConnectionsClass(config=config)
    initInputs = {}
    for name in connections.initInputs:
        attribute = getattr(connections, name)
        # Get full dataset type to check for consistency problems
        dsType = DatasetType(attribute.name,
                             butler.registry.dimensions.extract(set()),
                             attribute.storageClass)
        # All initInputs have empty data IDs
        initInputs[name] = butler.get(dsType)

    return initInputs
Example #11
0
 def testMatplotlibFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("test_plot", [],
                               "Plot",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     pyplot.imshow(np.random.randn(3, 4))
     ref = butler.put(pyplot.gcf(), datasetType)
     parsed = urllib.parse.urlparse(butler.getUri(ref))
     with tempfile.NamedTemporaryFile(suffix=".png") as file:
         pyplot.gcf().savefig(file.name)
         self.assertTrue(filecmp.cmp(parsed.path, file.name, shallow=True))
     self.assertTrue(butler.datasetExists(ref))
     with self.assertRaises(ValueError):
         butler.get(ref)
     butler.remove(ref)
     with self.assertRaises(LookupError):
         butler.datasetExists(ref)
 def testAstropyTableFormatter(self):
     butler = Butler(self.root, run="testrun")
     datasetType = DatasetType("table", [],
                               "AstropyTable",
                               universe=butler.registry.dimensions)
     butler.registry.registerDatasetType(datasetType)
     ref = butler.put(self.table, datasetType)
     uri = butler.getURI(ref)
     self.assertEqual(uri.getExtension(), '.ecsv')
     table = butler.get('table')
     self.assertTrue(numpy.all(table == self.table))
Example #13
0
 def testCuratedCalibrations(self):
     """Test that defects, the camera, and the brighter-fatter kernel were
     added to the Gen3 registry.
     """
     originInfo = DatasetOriginInfoDef(["raw", "calib"], [])
     # Query for raws that have associated calibs of the types below;
     # result is an iterator over rows that correspond roughly to data IDs.
     rowsWithCalibs = list(
         self.butler.registry.selectMultipleDatasetTypes(
             originInfo,
             expression="",
             required=["raw", "camera", "bfKernel", "defects"],
             perDatasetTypeDimensions=["calibration_label"]))
     # Query for all rows, with no restriction on having associated calibs.
     rowsWithoutCalibs = list(
         self.butler.registry.selectMultipleDatasetTypes(
             originInfo,
             expression="",
             required=["raw"],
         ))
     # We should get the same raws in both cases because all of the raws
     # here should have associated calibs.
     self.assertGreater(len(rowsWithoutCalibs), 0)
     self.assertEqual(len(rowsWithCalibs), len(rowsWithoutCalibs))
     # Try getting those calibs to make sure the files themselves are
     # where the Butler thinks they are.
     butler = Butler(REPO_ROOT, run="calib")
     instrument = HyperSuprimeCam()
     for row in rowsWithCalibs:
         refsByName = {k.name: v for k, v in row.datasetRefs.items()}
         cameraFromButler = butler.get(refsByName["camera"])
         cameraFromInstrument = instrument.getCamera()
         self.assertEqual(len(cameraFromButler), len(cameraFromInstrument))
         self.assertEqual(cameraFromButler.getName(),
                          cameraFromInstrument.getName())
         self.assertFloatsEqual(butler.get(refsByName["bfKernel"]),
                                instrument.getBrighterFatterKernel())
         defects = butler.get(refsByName["defects"])
         self.assertIsInstance(defects, lsst.meas.algorithms.Defects)
Example #14
0
class TestSchemaMatch(lsst.utils.tests.TestCase, MockCheckMixin):
    """Check the schema of the parquet outputs match the DDL in sdm_schemas"""
    def setUp(self):
        self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"),
                                          "DATA"),
                             writeable=False,
                             collections=["HSC/runs/ci_hsc"])
        schemaFile = os.path.join(getPackageDir("sdm_schemas"), 'yml',
                                  'hsc.yaml')
        with open(schemaFile, "r") as f:
            self.schema = yaml.safe_load(f)['tables']

    def _validateSchema(self, dataset, dataId, tableName):
        """Check the schema of the parquet dataset match that in the DDL.
        Only the column names are checked currently.
        """
        # skip the test in mock execution
        self.skip_mock(dataset)

        sdmSchema = [
            table for table in self.schema if table['name'] == tableName
        ]
        self.assertEqual(len(sdmSchema), 1)
        expectedColumnNames = set(column['name']
                                  for column in sdmSchema[0]['columns'])

        df = self.butler.get(dataset, dataId)
        df.reset_index(inplace=True)
        outputColumnNames = set(df.columns.to_list())
        self.assertEqual(outputColumnNames, expectedColumnNames)

    def testObjectSchemaMatch(self):
        """Check objectTable_tract"""
        dataId = {"instrument": "HSC", "tract": 0}
        self._validateSchema("objectTable_tract", dataId, "Object")

    def testSourceSchemaMatch(self):
        """Check one sourceTable_visit"""
        dataId = {
            "instrument": "HSC",
            "detector": 100,
            "visit": 903334,
            "band": "r"
        }
        self._validateSchema("sourceTable_visit", dataId, "Source")
class JobReporter:
    def __init__(self, repository, collection, metrics_package, spec, dataset_name):
        # Hard coding verify_metrics as the packager for now.
        # It would be easy to pass this in as an argument, if necessary.
        self.metrics = MetricSet.load_metrics_package(package_name_or_path='verify_metrics',
                                                      subset=metrics_package)
        self.butler = Butler(repository)
        self.registry = self.butler.registry
        self.spec = spec
        self.collection = collection
        self.dataset_name = dataset_name

    def run(self):
        jobs = {}
        for metric in self.metrics:
            data_ids = list(self.registry.queryDatasets((f'metricvalue_{metric.package}'
                                                         f'_{metric.metric}'),
                            collections=self.collection))
            for did in data_ids:
                m = self.butler.get(did, collections=self.collection)
                # make the name the same as what SQuaSH Expects
                m.metric_name = metric
                # Grab the physical filter associated with the abstract filter
                # In general there may be more than one.  Take the shortest assuming
                # it is the most generic.
                pfilts = [el.name for el in self.butler.registry.queryDimensionRecords('physical_filter',
                                                                                       dataId=did.dataId)]
                pfilt = min(pfilts, key=len)

                tract = did.dataId['tract']
                afilt = did.dataId['band']
                key = f"{tract}_{afilt}"
                if key not in jobs.keys():
                    job_metadata = {'instrument': did.dataId['instrument'],
                                    'filter': pfilt,
                                    'band': afilt,
                                    'tract': tract,
                                    'butler_generation': 'Gen3',
                                    'ci_dataset': self.dataset_name}
                    # Get dataset_repo_url from repository somehow?
                    jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
                jobs[key].measurements.insert(m)
        return jobs
Example #16
0
    def testPhotodiode(self):
        """Test ingest to a repo with the exposure information will not raise.
        """
        # Ingest raw to provide exposure information.
        outputRun = "raw_ingest_" + self.id()
        runner = LogCliRunner()
        result = runner.invoke(
            butlerCli,
            [
                "ingest-raws",
                self.root,
                self.file,
                "--output-run",
                outputRun,
                "--ingest-task",
                self.rawIngestTask,
            ],
        )
        self.assertEqual(
            result.exit_code, 0,
            f"output: {result.output} exception: {result.exception}")

        # Ingest photodiode matching this exposure.
        runner = LogCliRunner()
        result = runner.invoke(
            butlerCli,
            [
                "ingest-photodiode",
                self.root,
                self.instrumentClassName,
                self.pdPath,
            ],
        )
        self.assertEqual(
            result.exit_code, 0,
            f"output: {result.output} exception: {result.exception}")

        # Confirm that we can retrieve the ingested photodiode, and
        # that it has the correct type.
        butler = Butler(self.root, run="LSSTCam/calib/photodiode")
        getResult = butler.get('photodiode', dataId=self.dataIds[0])
        self.assertIsInstance(getResult, PhotodiodeCalib)
Example #17
0
class IngestTestBase(metaclass=abc.ABCMeta):
    """Base class for tests of gen3 ingest. Subclass from this, then
    `unittest.TestCase` to get a working test suite.
    """

    ingestDir = ""
    """Root path to ingest files into. Typically `obs_package/tests/`; the
    actual directory will be a tempdir under this one.
    """

    instrument = None
    """The instrument to be registered and tested."""

    dataIds = []
    """list of butler data IDs of files that should have been ingested."""

    file = ""
    """Full path to a file to ingest in tests."""

    RawIngestTask = lsst.obs.base.RawIngestTask
    """The task to use in the Ingest test."""
    def setUp(self):
        # Use a temporary working directory
        self.root = tempfile.mkdtemp(dir=self.ingestDir)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="raw")

        # Register the instrument and its static metadata
        self.instrument.register(self.butler.registry)

        # Make a default config for test methods to play with
        self.config = self.RawIngestTask.ConfigClass()
        self.config.instrument = \
            f"{self.instrument.__class__.__module__}.{self.instrument.__class__.__name__}"

    def tearDown(self):
        if os.path.exists(self.root):
            shutil.rmtree(self.root, ignore_errors=True)

    def runIngest(self, files=None):
        """
        Initialize and run RawIngestTask on a list of files.

        Parameters
        ----------
        files : `list` [`str`], or None
            List of files to be ingested, or None to use ``self.file``
        """
        if files is None:
            files = [self.file]
        task = self.RawIngestTask(config=self.config, butler=self.butler)
        task.log.setLevel(
            task.log.FATAL)  # silence logs, since we expect a lot of warnings
        task.run(files)

    def runIngestTest(self, files=None):
        """
        Test that RawIngestTask ingested the expected files.

        Parameters
        ----------
        files : `list` [`str`], or None
            List of files to be ingested, or None to use ``self.file``
        """
        self.runIngest(files)
        datasets = self.butler.registry.queryDatasets('raw', collections=...)
        self.assertEqual(len(list(datasets)), len(self.dataIds))
        for dataId in self.dataIds:
            exposure = self.butler.get("raw", dataId)
            metadata = self.butler.get("raw.metadata", dataId)
            # only check the metadata, not the images, to speed up tests
            self.assertEqual(metadata.toDict(),
                             exposure.getMetadata().toDict())
            self.checkRepo(files=files)

    def checkRepo(self, files=None):
        """Check the state of the repository after ingest.

        This is an optional hook provided for subclasses; by default it does
        nothing.

        Parameters
        ----------
        files : `list` [`str`], or None
            List of files to be ingested, or None to use ``self.file``
        """
        pass

    def testSymLink(self):
        self.config.transfer = "symlink"
        self.runIngestTest()

    def testCopy(self):
        self.config.transfer = "copy"
        self.runIngestTest()

    def testHardLink(self):
        self.config.transfer = "hardlink"
        try:
            self.runIngestTest()
        except PermissionError as err:
            raise unittest.SkipTest(
                "Skipping hard-link test because input data"
                " is on a different filesystem.") from err

    def testInPlace(self):
        """Test that files already in the directory can be added to the
        registry in-place.
        """
        # symlink into repo root manually
        newPath = os.path.join(self.butler.datastore.root,
                               os.path.basename(self.file))
        os.symlink(os.path.abspath(self.file), newPath)
        self.config.transfer = None
        self.runIngestTest([newPath])

    def testFailOnConflict(self):
        """Re-ingesting the same data into the repository should fail.
        """
        self.config.transfer = "symlink"
        self.runIngest()
        with self.assertRaises(Exception):
            self.runIngest()
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile)

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.addDimensionEntry("instrument",
                                          {"instrument": "DummyCamComp"})
        butler.registry.addDimensionEntry("physical_filter", {
            "instrument": "DummyCamComp",
            "physical_filter": "d-r"
        })
        butler.registry.addDimensionEntry("visit", {
            "instrument": "DummyCamComp",
            "visit": 423,
            "physical_filter": "d-r"
        })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })
Example #19
0
def makeDiscreteSkyMap(repo,
                       config_file,
                       collections,
                       instrument,
                       skymap_id='discrete',
                       old_skymap_id=None):
    """Implements the command line interface `butler make-discrete-skymap` subcommand,
    should only be called by command line tools and unit test code that tests
    this function.

    Constructs a skymap from calibrated exposure in the butler repository

    Parameters
    ----------
    repo : `str`
        URI to the location to read the repo.
    config_file : `str` or `None`
        Path to a config file that contains overrides to the skymap config.
    collections : `list` [`str`]
        An expression specifying the collections to be searched (in order) when
        reading datasets, and optionally dataset type restrictions on them.
        At least one collection must be specified.  This is the collection
        with the calibrated exposures.
    instrument : `str`
        The name or fully-qualified class name of an instrument.
    skymap_id : `str`, optional
        The identifier of the skymap to save.  Default is 'discrete'.
    old_skymap_id : `str`, optional
        The identifer of the skymap to append to.  Must differ from
        ``skymap_id``.  Ignored unless ``config.doAppend=True``.
    """
    butler = Butler(repo, collections=collections, writeable=True)
    instr = getInstrument(instrument, butler.registry)
    config = MakeDiscreteSkyMapConfig()
    instr.applyConfigOverrides(MakeDiscreteSkyMapTask._DefaultName, config)

    if config_file is not None:
        config.load(config_file)
    # The coaddName for a SkyMap is only relevant in Gen2, and we completely
    # ignore it here; once Gen2 is gone it can be removed.
    oldSkyMap = None
    if config.doAppend:
        if old_skymap_id is None:
            raise ValueError(
                "old_skymap_id must be provided if config.doAppend is True.")
        dataId = {'skymap': old_skymap_id}
        try:
            oldSkyMap = butler.get(BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
                                   collections=collections,
                                   dataId=dataId)
        except LookupError as e:
            msg = (
                f"Could not find seed skymap with dataId {dataId} "
                f"in collections {collections} but doAppend is {config.doAppend}.  Aborting..."
            )
            raise LookupError(msg, *e.args[1:])

    datasets = butler.registry.queryDatasets('calexp', collections=collections)
    wcs_md_tuple_list = [(butler.getDirect('calexp.metadata', ref),
                          butler.getDirect('calexp.wcs', ref))
                         for ref in datasets]
    task = MakeDiscreteSkyMapTask(config=config)
    result = task.run(wcs_md_tuple_list, oldSkyMap)
    result.skyMap.register(skymap_id, butler)
    butler.put(result.skyMap,
               BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
               dataId={'skymap': skymap_id},
               run=BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
Example #20
0
class HscIngestTestCase(lsst.utils.tests.TestCase):
    def setUp(self):
        # Use a temporary working directory
        self.root = tempfile.mkdtemp(dir=TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="raw")
        # Register the instrument and its static metadata
        HyperSuprimeCam().register(self.butler.registry)
        # Make a default config for test methods to play with
        self.config = RawIngestTask.ConfigClass()
        self.config.onError = "break"
        self.file = os.path.join(testDataDirectory, "hsc", "raw",
                                 "HSCA90402512.fits.gz")
        self.dataId = dict(instrument="HSC", exposure=904024, detector=50)

    def tearDown(self):
        if os.path.exists(self.root):
            shutil.rmtree(self.root, ignore_errors=True)

    def runIngest(self, files=None):
        if files is None:
            files = [self.file]
        task = RawIngestTask(config=self.config, butler=self.butler)
        task.log.setLevel(
            task.log.FATAL)  # silence logs, since we expect a lot of warnings
        task.run(files)

    def runIngestTest(self, files=None):
        self.runIngest(files)
        exposure = self.butler.get("raw", self.dataId)
        metadata = self.butler.get("raw.metadata", self.dataId)
        image = self.butler.get("raw.image", self.dataId)
        self.assertImagesEqual(exposure.image, image)
        self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())

    def testSymLink(self):
        self.config.transfer = "symlink"
        self.runIngestTest()

    def testCopy(self):
        self.config.transfer = "copy"
        self.runIngestTest()

    def testHardLink(self):
        self.config.transfer = "hardlink"
        self.runIngestTest()

    def testInPlace(self):
        # hardlink into repo root manually
        newPath = os.path.join(self.butler.datastore.root,
                               os.path.basename(self.file))
        os.link(self.file, newPath)
        self.config.transfer = None
        self.runIngestTest([newPath])

    def testOnConflictFail(self):
        self.config.transfer = "symlink"
        self.config.conflict = "fail"
        self.runIngest()  # this one shd
        with self.assertRaises(Exception):
            self.runIngest()  # this ont

    def testOnConflictIgnore(self):
        self.config.transfer = "symlink"
        self.config.conflict = "ignore"
        self.runIngest()  # this one should succeed
        n1, = self.butler.registry.query("SELECT COUNT(*) FROM Dataset")
        self.runIngest()  # this ong should silently fail
        n2, = self.butler.registry.query("SELECT COUNT(*) FROM Dataset")
        self.assertEqual(n1, n2)

    def testOnConflictStash(self):
        self.config.transfer = "symlink"
        self.config.conflict = "ignore"
        self.config.stash = "stash"
        self.runIngest()  # this one should write to 'rawn
        self.runIngest()  # this one should write to 'stashn
        dt = self.butler.registry.getDatasetType("raw.metadata")
        ref1 = self.butler.registry.find(self.butler.collection, dt,
                                         self.dataId)
        ref2 = self.butler.registry.find("stash", dt, self.dataId)
        self.assertNotEqual(ref1.id, ref2.id)
        self.assertEqual(
            self.butler.get(ref1).toDict(),
            self.butler.getDirect(ref2).toDict())

    def testOnErrorBreak(self):
        self.config.transfer = "symlink"
        self.config.onError = "break"
        # Failing to ingest this nonexistent file after ingesting the valid one should
        # leave the valid one in the registry, despite raising an exception.
        with self.assertRaises(Exception):
            self.runIngest(files=[self.file, "nonexistent.fits"])
        dt = self.butler.registry.getDatasetType("raw.metadata")
        self.assertIsNotNone(
            self.butler.registry.find(self.butler.collection, dt, self.dataId))

    def testOnErrorContinue(self):
        self.config.transfer = "symlink"
        self.config.onError = "continue"
        # Failing to ingest nonexistent files before and after ingesting the
        # valid one should leave the valid one in the registry and not raise
        # an exception.
        self.runIngest(
            files=["nonexistent.fits", self.file, "still-not-here.fits"])
        dt = self.butler.registry.getDatasetType("raw.metadata")
        self.assertIsNotNone(
            self.butler.registry.find(self.butler.collection, dt, self.dataId))

    def testOnErrorRollback(self):
        self.config.transfer = "symlink"
        self.config.onError = "rollback"
        # Failing to ingest nonexistent files after ingesting the
        # valid one should leave the registry empty.
        with self.assertRaises(Exception):
            self.runIngest(file=[self.file, "nonexistent.fits"])
        try:
            dt = self.butler.registry.getDatasetType("raw.metadata")
        except KeyError:
            # If we also rollback registering the DatasetType, that's fine,
            # but not required.
            pass
        else:
            self.assertIsNotNone(
                self.butler.registry.find(self.butler.collection, dt,
                                          self.dataId))
Example #21
0
class TestCoaddOutputs(unittest.TestCase, MockCheckMixin):
    """Check that coadd outputs are as expected.

    Many tests here are ported from
    https://github.com/lsst/pipe_tasks/blob/
    fd7d5e23d3c71e5d440153bc4faae7de9d5918c5/tests/nopytest_test_coadds.py
    """
    def setUp(self):
        self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"),
                             instrument="HSC", skymap="discrete/ci_hsc",
                             writeable=False, collections=["HSC/runs/ci_hsc"])
        self.skip_mock()
        self._tract = 0
        self._patch = 69
        self._bands = ['r', 'i']

    def test_forced_id_names(self):
        """Test that forced photometry ID fields are named as expected
        (DM-8210).

        Specifically, coadd forced photometry should have only "id" and
        "parent" fields, while CCD forced photometry should have those,
        "objectId", and "parentObjectId".
        """
        coadd_schema = self.butler.get("deepCoadd_forced_src_schema").schema
        self.assertIn("id", coadd_schema)
        self.assertIn("parent", coadd_schema)
        self.assertNotIn("objectId", coadd_schema)
        self.assertNotIn("parentObjectId", coadd_schema)
        ccd_schema = self.butler.get("forced_src_schema").schema
        self.assertIn("id", ccd_schema)
        self.assertIn("parent", ccd_schema)
        self.assertIn("objectId", ccd_schema)
        self.assertIn("parentObjectId", ccd_schema)

    def test_alg_metadata_output(self):
        """Test that the algorithm metadata is persisted correctly
        from MeasureMergedCoaddSourcesTask.
        """
        for band in self._bands:
            cat = self.butler.get(
                "deepCoadd_meas",
                band=band,
                tract=self._tract,
                patch=self._patch
            )
            meta = cat.getMetadata()
            for circ_aperture_flux_radius in meta.getArray('BASE_CIRCULARAPERTUREFLUX_RADII'):
                self.assertIsInstance(circ_aperture_flux_radius, numbers.Number)
            # Each time the run method of a measurement task is executed,
            # algorithm metadata is appended to the algorithm metadata object.
            # Depending on how many times a measurement task is run,
            # a metadata entry may be a single value or multiple values.
            for n_offset in meta.getArray('NOISE_OFFSET'):
                self.assertIsInstance(n_offset, numbers.Number)
            for noise_src in meta.getArray('NOISE_SOURCE'):
                self.assertEqual(noise_src, 'measure')
            for noise_exp_id in meta.getArray('NOISE_EXPOSURE_ID'):
                self.assertIsInstance(noise_exp_id, numbers.Number)
            for noise_seed_mul in meta.getArray('NOISE_SEED_MULTIPLIER'):
                self.assertIsInstance(noise_seed_mul, numbers.Number)

    def test_schema_consistency(self):
        """Test that _schema catalogs are consistent with the data catalogs."""
        det_schema = self.butler.get("deepCoadd_det_schema").schema
        meas_schema = self.butler.get("deepCoadd_meas_schema").schema
        mergeDet_schema = self.butler.get("deepCoadd_mergeDet_schema").schema
        ref_schema = self.butler.get("deepCoadd_ref_schema").schema
        coadd_forced_schema = self.butler.get("deepCoadd_forced_src_schema").schema
        ccd_forced_schema = self.butler.get("forced_src_schema").schema
        for band in self._bands:
            det = self.butler.get("deepCoadd_det", band=band, tract=self._tract, patch=self._patch)
            self.assertEqual(det.schema, det_schema)
            mergeDet = self.butler.get("deepCoadd_mergeDet", band=band, tract=self._tract, patch=self._patch)
            self.assertEqual(mergeDet.schema, mergeDet_schema)
            meas = self.butler.get("deepCoadd_meas", band=band, tract=self._tract, patch=self._patch)
            self.assertEqual(meas.schema, meas_schema)
            ref = self.butler.get("deepCoadd_ref", band=band, tract=self._tract, patch=self._patch)
            self.assertEqual(ref.schema, ref_schema)
            coadd_forced_src = self.butler.get(
                "deepCoadd_forced_src",
                band=band,
                tract=self._tract,
                patch=self._patch
            )
            self.assertEqual(coadd_forced_src.schema, coadd_forced_schema)
        ccd_forced_src = self.butler.get(
            "forced_src",
            tract=self._tract,
            visit=DATA_IDS[0]["visit"],
            detector=DATA_IDS[0]["detector"]
        )
        self.assertEqual(ccd_forced_src.schema, ccd_forced_schema)

    def test_coadd_transmission_curves(self):
        """Test that coadded TransmissionCurves agree with the inputs."""
        wavelengths = np.linspace(4000, 7000, 10)
        n_object_test = 10
        ctx = np.random.RandomState(12345)

        for band in self._bands:
            n_tested = 0
            exp = self.butler.get("deepCoadd_calexp", band=band, tract=self._tract, patch=self._patch)
            cat = self.butler.get("objectTable", band=band, tract=self._tract, patch=self._patch)
            transmission_curve = exp.getInfo().getTransmissionCurve()
            coadd_inputs = exp.getInfo().getCoaddInputs().ccds
            wcs = exp.getWcs()

            to_check = ctx.choice(len(cat), size=n_object_test, replace=False)
            for index in to_check:
                coadd_coord = geom.SpherePoint(cat["coord_ra"].values[index]*geom.degrees,
                                               cat["coord_dec"].values[index]*geom.degrees)
                summed_throughput = np.zeros(wavelengths.shape, dtype=np.float64)
                weight_sum = 0.0
                for rec in coadd_inputs.subsetContaining(coadd_coord, includeValidPolygon=True):
                    det_pos = rec.getWcs().skyToPixel(coadd_coord)
                    det_trans = rec.getTransmissionCurve()
                    weight = rec.get("weight")
                    summed_throughput += det_trans.sampleAt(det_pos, wavelengths)*weight
                    weight_sum += weight
                if weight_sum == 0.0:
                    continue
                summed_throughput /= weight_sum
                coadd_pos = wcs.skyToPixel(coadd_coord)
                coadd_throughput = transmission_curve.sampleAt(coadd_pos, wavelengths)
                np.testing.assert_array_almost_equal(coadd_throughput, summed_throughput)
                n_tested += 1
            self.assertGreater(n_tested, 5)

    def test_mask_planes_exist(self):
        """Test that the input mask planes have been added."""
        for data_id in DATA_IDS:
            mask = self.butler.get("calexp.mask", data_id)
            self.assertIn("CROSSTALK", mask.getMaskPlaneDict())
            self.assertIn("NOT_DEBLENDED", mask.getMaskPlaneDict())

    # Expected to fail until DM-5174 is fixed.
    @unittest.expectedFailure
    def test_masks_removed(self):
        """Test that certain mask planes have been removed from the coadds.

        This is expected to fail until DM-5174 is fixed.
        """
        for band in self._bands:
            mask = self.butler.get("deepCoadd_calexp.mask", band=band, tract=self._tract, patch=self._patch)
            self.assertNotIn("CROSSTALK", mask.getMaskPlaneDict())
            self.assertNotIn("NOT_DEBLENDED", mask.getMaskPlaneDict())

    def test_warp_inputs(self):
        """Test that the warps have the correct inputs."""
        skymap = self.butler.get("skyMap")
        tract_info = skymap[self._tract]
        for warp_type in ["directWarp", "psfMatchedWarp"]:
            datasets = set(self.butler.registry.queryDatasets(f"deepCoadd_{warp_type}"))
            # We only need to test one dataset
            dataset = list(datasets)[0]

            warp = self.butler.getDirect(dataset)
            self.assertEqual(warp.wcs, tract_info.wcs)
            coadd_inputs = warp.getInfo().getCoaddInputs()
            self.assertEqual(len(coadd_inputs.visits), 1)
            visit_record = coadd_inputs.visits[0]
            self.assertEqual(visit_record.getWcs(), warp.wcs)
            self.assertEqual(visit_record.getBBox(), warp.getBBox())
            self.assertGreater(len(coadd_inputs.ccds), 0)

            wcs_cat = self.butler.get(
                "jointcalSkyWcsCatalog",
                visit=visit_record.getId(),
                tract=self._tract
            )
            photocalib_cat = self.butler.get(
                "jointcalPhotoCalibCatalog",
                visit=visit_record.getId(),
                tract=self._tract
            )
            final_psf_cat = self.butler.get(
                "finalized_psf_ap_corr_catalog",
                visit=visit_record.getId()
            )

            # We only need to test one input ccd
            det_record = coadd_inputs.ccds[0]
            exp_bbox = self.butler.get(
                "calexp.bbox",
                visit=det_record["visit"],
                detector=det_record["ccd"]
            )
            self.assertEqual(det_record.getWcs(), wcs_cat.find(det_record["ccd"]).getWcs())
            self.assertEqual(
                det_record.getPhotoCalib(),
                photocalib_cat.find(det_record["ccd"]).getPhotoCalib()
            )
            self.assertEqual(det_record.getBBox(), exp_bbox)
            self.assertIsNotNone(det_record.getTransmissionCurve())
            center = det_record.getBBox().getCenter()
            np.testing.assert_array_almost_equal(
                det_record.getPsf().computeKernelImage(center).array,
                final_psf_cat.find(det_record["ccd"]).getPsf().computeKernelImage(center).array
            )
            input_map = det_record.getApCorrMap()
            final_map = final_psf_cat.find(det_record["ccd"]).getApCorrMap()
            self.assertEqual(len(input_map), len(final_map))
            for key in input_map.keys():
                self.assertEqual(input_map[key], final_map[key])
            self.assertIsNotNone(coadd_inputs.visits.find(det_record["visit"]))

    def test_coadd_inputs(self):
        """Test that the coadds have the correct inputs."""
        skymap = self.butler.get("skyMap")
        tract_info = skymap[self._tract]
        for band in self._bands:
            wcs = self.butler.get("deepCoadd_calexp.wcs", band=band, tract=self._tract, patch=self._patch)
            self.assertEqual(wcs, tract_info.wcs)
            coadd_inputs = self.butler.get(
                "deepCoadd_calexp.coaddInputs",
                band=band,
                tract=self._tract,
                patch=self._patch
            )
            # We only need to test one input ccd
            det_record = coadd_inputs.ccds[0]
            wcs_cat = self.butler.get(
                "jointcalSkyWcsCatalog",
                visit=det_record["visit"],
                tract=self._tract
            )
            photocalib_cat = self.butler.get(
                "jointcalPhotoCalibCatalog",
                visit=det_record["visit"],
                tract=self._tract
            )
            final_psf_cat = self.butler.get(
                "finalized_psf_ap_corr_catalog",
                visit=det_record["visit"]
            )
            exp_bbox = self.butler.get(
                "calexp.bbox",
                visit=det_record["visit"],
                detector=det_record["ccd"]
            )
            self.assertEqual(det_record.getWcs(), wcs_cat.find(det_record["ccd"]).getWcs())
            self.assertEqual(
                det_record.getPhotoCalib(),
                photocalib_cat.find(det_record["ccd"]).getPhotoCalib()
            )
            self.assertEqual(det_record.getBBox(), exp_bbox)
            self.assertIsNotNone(det_record.getTransmissionCurve())
            center = det_record.getBBox().getCenter()
            np.testing.assert_array_almost_equal(
                det_record.getPsf().computeKernelImage(center).array,
                final_psf_cat.find(det_record["ccd"]).getPsf().computeKernelImage(center).array
            )
            input_map = det_record.getApCorrMap()
            final_map = final_psf_cat.find(det_record["ccd"]).getApCorrMap()
            self.assertEqual(len(input_map), len(final_map))
            for key in input_map.keys():
                self.assertEqual(input_map[key], final_map[key])
            self.assertIsNotNone(coadd_inputs.visits.find(det_record["visit"]))

    def test_psf_installation(self):
        """Test that the coadd psf is installed."""
        for band in self._bands:
            wcs = self.butler.get("deepCoadd_calexp.wcs", band=band, tract=self._tract, patch=self._patch)
            coadd_inputs = self.butler.get(
                "deepCoadd_calexp.coaddInputs",
                band=band,
                tract=self._tract,
                patch=self._patch
            )
            coadd_psf = self.butler.get(
                "deepCoadd_calexp.psf",
                band=band,
                tract=self._tract,
                patch=self._patch
            )
            new_psf = lsst.meas.algorithms.CoaddPsf(coadd_inputs.ccds, wcs)
            self.assertEqual(coadd_psf.getComponentCount(), len(coadd_inputs.ccds))
            self.assertEqual(new_psf.getComponentCount(), len(coadd_inputs.ccds))
            for n, record in enumerate(coadd_inputs.ccds):
                center = record.getBBox().getCenter()
                np.testing.assert_array_almost_equal(
                    coadd_psf.getPsf(n).computeKernelImage(center).array,
                    record.getPsf().computeKernelImage(center).array
                )
                np.testing.assert_array_almost_equal(
                    new_psf.getPsf(n).computeKernelImage(center).array,
                    record.getPsf().computeKernelImage(center).array
                )
                self.assertEqual(coadd_psf.getWcs(n), record.getWcs())
                self.assertEqual(new_psf.getWcs(n), record.getWcs())
                self.assertEqual(coadd_psf.getBBox(n), record.getBBox())
                self.assertEqual(new_psf.getBBox(n), record.getBBox())

    def test_coadd_psf(self):
        """Test that the stars on the coadd are well represented by
        the attached PSF.
        """
        n_object_test = 10
        n_good_test = 5
        ctx = np.random.RandomState(12345)

        for band in self._bands:
            exp = self.butler.get("deepCoadd_calexp", band=band, tract=self._tract, patch=self._patch)
            coadd_psf = exp.getPsf()
            cat = self.butler.get("objectTable", band=band, tract=self._tract, patch=self._patch)

            star_cat = cat[(cat["i_extendedness"] < 0.5)
                           & (cat["detect_isPrimary"])
                           & (cat[f"{band}_psfFlux"] > 0.0)
                           & (cat[f"{band}_psfFlux"]/cat[f"{band}_psfFluxErr"] > 50.0)
                           & (cat[f"{band}_psfFlux"]/cat[f"{band}_psfFluxErr"] < 200.0)]

            to_check = ctx.choice(len(star_cat), size=n_object_test, replace=False)
            n_good = 0
            for index in to_check:
                position = geom.Point2D(star_cat["x"].values[index], star_cat["y"].values[index])
                psf_image = coadd_psf.computeImage(position)
                psf_image_bbox = psf_image.getBBox()
                star_image = lsst.afw.image.ImageF(
                    exp.maskedImage.image,
                    psf_image_bbox
                ).convertD()
                star_image /= star_image.array.sum()
                psf_image /= psf_image.array.sum()
                residuals = lsst.afw.image.ImageD(star_image, True)
                residuals -= psf_image
                # This is just a quick check that the coadd psf model works
                # reasonably well for the stars. It is not meant as a detailed
                # test of the psf modeling capability.
                if np.max(np.abs(residuals.array)) < 0.01:
                    n_good += 1

            self.assertGreater(n_good, n_good_test)
    def testIngest(self):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit", "detector"])

        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredDataDictYaml")
        datasetTypeName = "metric"

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        for detector in (1, 2):
            butler.registry.insertDimensionData(
                "detector", {
                    "instrument": "DummyCamComp",
                    "id": detector,
                    "full_name": f"detector{detector}"
                })

        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            }, {
                "instrument": "DummyCamComp",
                "id": 424,
                "name": "fourtwentyfour",
                "physical_filter": "d-r"
            })

        formatter = doImport(
            "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
        dataRoot = os.path.join(TESTDIR, "data", "basic")
        datasets = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 423,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refIn = DatasetRef(datasetType, dataId, id=None)

            datasets.append(
                FileDataset(path=metricFile, refs=[refIn],
                            formatter=formatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}

        metrics1 = butler.get(datasetTypeName, dataId1)
        metrics2 = butler.get(datasetTypeName, dataId2)
        self.assertNotEqual(metrics1, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertNotEqual(uri1, uri2)

        # Now do a multi-dataset but single file ingest
        metricFile = os.path.join(dataRoot, "detectors.yaml")
        refs = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 424,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refs.append(DatasetRef(datasetType, dataId, id=None))

        datasets = []
        datasets.append(
            FileDataset(path=metricFile,
                        refs=refs,
                        formatter=MultiDetectorFormatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}

        multi1 = butler.get(datasetTypeName, dataId1)
        multi2 = butler.get(datasetTypeName, dataId2)

        self.assertEqual(multi1, metrics1)
        self.assertEqual(multi2, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertEqual(uri1, uri2)

        # Test that removing one does not break the second
        butler.remove(datasetTypeName, dataId1)
        with self.assertRaises(LookupError):
            butler.datasetExists(datasetTypeName, dataId1)
        self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
        multi2b = butler.get(datasetTypeName, dataId2)
        self.assertEqual(multi2, multi2b)
    def runPutGetTest(self, storageClass, datasetTypeName):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # There will not be a collection yet
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, set())

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })

        # Create and store a dataset
        metric = makeExampleMetrics()
        dataId = {"instrument": "DummyCamComp", "visit": 423}

        # Create a DatasetRef for put
        refIn = DatasetRef(datasetType, dataId, id=None)

        # Put with a preexisting id should fail
        with self.assertRaises(ValueError):
            butler.put(metric, DatasetRef(datasetType, dataId, id=100))

        # Put and remove the dataset once as a DatasetRef, once as a dataId,
        # and once with a DatasetType
        for args in ((refIn, ), (datasetTypeName, dataId), (datasetType,
                                                            dataId)):
            with self.subTest(args=args):
                ref = butler.put(metric, *args)
                self.assertIsInstance(ref, DatasetRef)

                # Test getDirect
                metricOut = butler.getDirect(ref)
                self.assertEqual(metric, metricOut)
                # Test get
                metricOut = butler.get(ref.datasetType.name, dataId)
                self.assertEqual(metric, metricOut)
                # Test get with a datasetRef
                metricOut = butler.get(ref)
                self.assertEqual(metric, metricOut)
                # Test getDeferred with dataId
                metricOut = butler.getDeferred(ref.datasetType.name,
                                               dataId).get()
                self.assertEqual(metric, metricOut)
                # Test getDeferred with a datasetRef
                metricOut = butler.getDeferred(ref).get()
                self.assertEqual(metric, metricOut)

                # Check we can get components
                if storageClass.isComposite():
                    self.assertGetComponents(butler, ref,
                                             ("summary", "data", "output"),
                                             metric)

                # Remove from collection only; after that we shouldn't be able
                # to find it unless we use the dataset_id.
                butler.remove(*args, delete=False)
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # If we use the output ref with the dataset_id, we should
                # still be able to load it with getDirect().
                self.assertEqual(metric, butler.getDirect(ref))

                # Reinsert into collection, then delete from Datastore *and*
                # remove from collection.
                butler.registry.associate(butler.collection, [ref])
                butler.remove(*args)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # Now getDirect() should fail, too.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry still knows about it, if we use the dataset_id.
                self.assertEqual(butler.registry.getDataset(ref.id), ref)

                # Put again, then remove completely (this generates a new
                # dataset record in registry, with a new ID - the old one
                # still exists but it is not in any collection so we don't
                # care).
                ref = butler.put(metric, *args)
                butler.remove(*args, remember=False)
                # Lookup with original args should still fail.
                with self.assertRaises(LookupError):
                    butler.datasetExists(*args)
                # getDirect() should still fail.
                with self.assertRaises(FileNotFoundError):
                    butler.getDirect(ref)
                # Registry shouldn't be able to find it by dataset_id anymore.
                self.assertIsNone(butler.registry.getDataset(ref.id))

        # Put the dataset again, since the last thing we did was remove it.
        ref = butler.put(metric, refIn)

        # Get with parameters
        stop = 4
        sliced = butler.get(ref, parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with parameters
        sliced = butler.getDeferred(ref, parameters={
            "slice": slice(stop)
        }).get()
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)
        # getDeferred with deferred parameters
        sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
        self.assertNotEqual(metric, sliced)
        self.assertEqual(metric.summary, sliced.summary)
        self.assertEqual(metric.output, sliced.output)
        self.assertEqual(metric.data[:stop], sliced.data)

        if storageClass.isComposite():
            # Delete one component and check that the other components
            # can still be retrieved
            metricOut = butler.get(ref.datasetType.name, dataId)
            compNameS = DatasetType.nameWithComponent(datasetTypeName,
                                                      "summary")
            compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
            summary = butler.get(compNameS, dataId)
            self.assertEqual(summary, metric.summary)
            self.assertTrue(butler.datastore.exists(ref.components["summary"]))

            butler.remove(compNameS, dataId, remember=True)
            with self.assertRaises(LookupError):
                butler.datasetExists(compNameS, dataId)
            self.assertFalse(butler.datastore.exists(
                ref.components["summary"]))
            self.assertTrue(butler.datastore.exists(ref.components["data"]))
            data = butler.get(compNameD, dataId)
            self.assertEqual(data, metric.data)

        # Combining a DatasetRef with a dataId should fail
        with self.assertRaises(ValueError):
            butler.get(ref, dataId)
        # Getting with an explicit ref should fail if the id doesn't match
        with self.assertRaises(ValueError):
            butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))

        # Getting a dataset with unknown parameters should fail
        with self.assertRaises(KeyError):
            butler.get(ref, parameters={"unsupported": True})

        # Check we have a collection
        collections = butler.registry.getAllCollections()
        self.assertEqual(collections, {
            "ingest",
        })

        # Clean up to check that we can remove something that may have
        # already had a component removed
        butler.remove(ref.datasetType.name, dataId)

        # Add a dataset back in since some downstream tests require
        # something to be present
        ref = butler.put(metric, refIn)

        return butler

        # Construct a butler with no run or collection, but make it writeable.
        butler = Butler(self.tmpConfigFile, writeable=True)
        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit"])
        datasetType = self.addDatasetType(
            "example", dimensions,
            self.storageClassFactory.getStorageClass("StructuredData"),
            butler.registry)
        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            })
        dataId = {"instrument": "DummyCamComp", "visit": 423}
        # Create dataset.
        metric = makeExampleMetrics()
        # Register a new run and put dataset.
        run = "deferred"
        butler.registry.registerRun(run)
        ref = butler.put(metric, datasetType, dataId, run=run)
        # Putting with no run should fail with TypeError.
        with self.assertRaises(TypeError):
            butler.put(metric, datasetType, dataId)
        # Dataset should exist.
        self.assertTrue(
            butler.datasetExists(datasetType, dataId, collection=run))
        # We should be able to get the dataset back, but with and without
        # a deferred dataset handle.
        self.assertEqual(metric, butler.get(datasetType,
                                            dataId,
                                            collection=run))
        self.assertEqual(
            metric,
            butler.getDeferred(datasetType, dataId, collection=run).get())
        # Trying to find the dataset without any collection is a TypeError.
        with self.assertRaises(TypeError):
            butler.datasetExists(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.get(datasetType, dataId)
        with self.assertRaises(TypeError):
            butler.remove(datasetType, dataId)
        # Associate the dataset with a different collection.
        butler.registry.associate("tagged", [ref])
        # Deleting the dataset from the new collection should make it findable
        # in the original collection but without a Datastore entry.
        butler.remove(datasetType, dataId, collection="tagged")
        self.assertFalse(
            butler.datasetExists(datasetType, dataId, collection=run))
Example #24
0
    def checkInstrumentWithRegistry(self, cls, testRaw):

        Butler.makeRepo(self.root)
        butler = Butler(self.root, run="tests")
        instrument = cls()
        scFactory = StorageClassFactory()

        # Check instrument class and metadata translator agree on
        # instrument name, using readRawFitsHeader to read the metadata.
        filename = os.path.join(DATAROOT, testRaw)
        md = readRawFitsHeader(filename, translator_class=cls.translatorClass)
        obsInfo = ObservationInfo(md,
                                  translator_class=cls.translatorClass,
                                  filename=filename)
        self.assertEqual(instrument.getName(), obsInfo.instrument)

        # Add Instrument, Detector, and PhysicalFilter entries to the
        # Butler Registry.
        instrument.register(butler.registry)

        # Define a DatasetType for the cameraGeom.Camera, which can be
        # accessed just by identifying its Instrument.
        # A real-world Camera DatasetType should be identified by a
        # validity range as well.
        cameraDatasetType = DatasetType(
            "camera",
            dimensions=["instrument"],
            storageClass=scFactory.getStorageClass("Camera"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(cameraDatasetType)

        # Define a DatasetType for cameraGeom.Detectors, which can be
        # accessed by identifying its Instrument and (Butler) Detector.
        # A real-world Detector DatasetType probably doesn't need to exist,
        # as  it would just duplicate information in the Camera, and
        # reading a full Camera just to get a single Detector should be
        # plenty efficient.
        detectorDatasetType = DatasetType(
            "detector",
            dimensions=["instrument", "detector"],
            storageClass=scFactory.getStorageClass("Detector"),
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(detectorDatasetType)

        # Put and get the Camera.
        dataId = dict(instrument=instrument.instrument)
        butler.put(instrument.getCamera(), "camera", dataId=dataId)
        camera = butler.get("camera", dataId)
        # Full camera comparisons are *slow*; just compare names.
        self.assertEqual(instrument.getCamera().getName(), camera.getName())

        # Put and get a random subset of the Detectors.
        allDetectors = list(instrument.getCamera())
        numDetectors = min(3, len(allDetectors))
        someDetectors = [
            allDetectors[i] for i in self.rng.choice(
                len(allDetectors), size=numDetectors, replace=False)
        ]
        for cameraGeomDetector in someDetectors:
            # Right now we only support integer detector IDs in data IDs;
            # support for detector names and groups (i.e. rafts) is
            # definitely planned but not yet implemented.
            dataId = dict(instrument=instrument.instrument,
                          detector=cameraGeomDetector.getId())
            butler.put(cameraGeomDetector, "detector", dataId=dataId)
            cameraGeomDetector2 = butler.get("detector", dataId=dataId)
            # Full detector comparisons are *slow*; just compare names and
            # serials.
            self.assertEqual(cameraGeomDetector.getName(),
                             cameraGeomDetector2.getName())
            self.assertEqual(cameraGeomDetector.getSerial(),
                             cameraGeomDetector2.getSerial())
class FormattersTests(DatasetTestHelper, lsst.utils.tests.TestCase):
    root = None
    storageClassFactory = None

    @classmethod
    def setUpClass(cls):
        """Create a new butler once only."""

        cls.storageClassFactory = StorageClassFactory()

        cls.root = tempfile.mkdtemp(dir=TESTDIR)

        data_ids = {
            "instrument": [INSTRUMENT_NAME],
            "detector": [0, 1, 2, 3, 4, 5],
            "exposure": [11, 22],
        }

        configURI = ButlerURI("resource://spherex/configs",
                              forceDirectory=True)
        butlerConfig = Config(configURI.join("butler.yaml"))
        # in-memory db is being phased out
        # butlerConfig["registry", "db"] = 'sqlite:///:memory:'
        cls.creatorButler = makeTestRepo(
            cls.root,
            data_ids,
            config=butlerConfig,
            dimensionConfig=configURI.join("dimensions.yaml"))
        for formatter in FORMATTERS:
            datasetTypeName, storageClassName = (formatter["dataset_type"],
                                                 formatter["storage_class"])
            storageClass = cls.storageClassFactory.getStorageClass(
                storageClassName)
            addDatasetType(cls.creatorButler, datasetTypeName, set(data_ids),
                           storageClass)

    @classmethod
    def tearDownClass(cls):
        if cls.root is not None:
            shutil.rmtree(cls.root, ignore_errors=True)

    def setUp(self):
        # make test collection
        # self.butler = makeTestCollection(self.creatorButler)
        self.collection = self._testMethodName
        self.butler = Butler(butler=self.creatorButler, run=self.collection)

    def test_putget(self):
        fitsPath = os.path.join(TESTDIR, "data", "small.fits")
        dataid = {"exposure": 11, "detector": 0, "instrument": INSTRUMENT_NAME}
        for formatter in FORMATTERS:
            # in-memory object, representing fits
            inmemobj = formatter["reader"](fitsPath)

            # save in-memory object into butler dataset
            datasetTypeName = formatter["dataset_type"]
            self.butler.put(inmemobj, datasetTypeName, dataid)

            # get butler dataset
            retrievedobj = self.butler.get(datasetTypeName, dataid)
            self.assertTrue(isinstance(retrievedobj, formatter["inmem_cls"]))
            self.assertTrue(retrievedobj.__class__.__name__,
                            inmemobj.__class__.__name__)

    def test_ingest(self):

        fitsPath = os.path.join(TESTDIR, "data", "small.fits")

        formatter = FORMATTERS[0]
        datasetTypeName, formatterCls = (formatter["dataset_type"],
                                         formatter["formatter_cls"])

        datasetType = self.butler.registry.getDatasetType(datasetTypeName)
        datasets = []
        for exposure in range(3, 5):
            for detector in range(6):
                # use the same fits to test ingest
                if not os.path.exists(fitsPath):
                    log.warning(
                        f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}."
                    )
                    continue
                ref = DatasetRef(datasetType,
                                 dataId={
                                     "instrument": INSTRUMENT_NAME,
                                     "detector": detector,
                                     "exposure": exposure * 11
                                 })
                datasets.append(
                    FileDataset(refs=ref,
                                path=fitsPath,
                                formatter=formatterCls))

        # register new collection
        # run = "rawIngestedRun"
        # self.butler.registry.registerCollection(run, type=CollectionType.RUN)

        # collection is registered as a part of setUp
        run = self.collection

        with self.butler.transaction():
            for exposure in range(3, 5):
                expid = exposure * 11
                self.butler.registry.insertDimensionData(
                    "exposure", {
                        "instrument": INSTRUMENT_NAME,
                        "id": expid,
                        "name": f"{expid}",
                        "group_name": "day1",
                        "timespan": Timespan(begin=None, end=None)
                    })
            # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink'
            # or 'symlink'
            self.butler.ingest(*datasets, transfer="symlink", run=run)

        # verify that 12 files were ingested (2 exposures for each detector)
        refsSet = set(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run]))
        self.assertEqual(
            len(refsSet), 12,
            f"Collection {run} should have 12 elements after ingest")

        # verify that data id is present
        dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME}
        refsList = list(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run],
                                               dataId=dataid))
        self.assertEqual(
            len(refsList), 1,
            f"Collection {run} should have 1 element with {dataid}")
Example #26
0
class ParquetFormatterTestCase(unittest.TestCase):
    """Tests for ParquetFormatter, using PosixDatastore.
    """
    def setUp(self):
        """Create a new butler root for each test."""
        self.root = tempfile.mkdtemp(dir=TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="test_run")
        # No dimensions in dataset type so we don't have to worry about
        # inserting dimension data or defining data IDs.
        self.datasetType = DatasetType(
            "data",
            dimensions=(),
            storageClass="DataFrame",
            universe=self.butler.registry.dimensions)
        self.butler.registry.registerDatasetType(self.datasetType)

    def tearDown(self):
        if os.path.exists(self.root):
            shutil.rmtree(self.root, ignore_errors=True)

    def testSingleIndexDataFrame(self):
        columns1 = pd.Index(["a", "b", "c"])
        df1 = pd.DataFrame(np.random.randn(5, 3),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": ["a", "c"]})
        self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3))
        df4 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": "a"})
        self.assertTrue(df1.loc[:, ["a"]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})

    def testMultiIndexDataFrame(self):
        columns1 = pd.MultiIndex.from_tuples(
            [
                ("g", "a"),
                ("g", "b"),
                ("g", "c"),
                ("r", "a"),
                ("r", "b"),
                ("r", "c"),
            ],
            names=["filter", "column"],
        )
        df1 = pd.DataFrame(np.random.randn(5, 6),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": {
                                  "filter": "g"
                              }})
        self.assertTrue(df1.loc[:, ["g"]].equals(df3))
        df4 = self.butler.get(
            self.datasetType,
            dataId={},
            parameters={"columns": {
                "filter": ["r"],
                "column": "a"
            }})
        self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})
class TestFilterLabelFixups(lsst.utils.tests.TestCase, MockCheckMixin):
    """Tests for the logic in
    lsst.obs.base.formatters.fitsExposure.FitsExposureFormatter._fixFilterLabels
    that uses the data ID passed to a formatter to fix and/or check the
    FilterLabel read from an Exposure FITS file, allowing us to load images
    with new, standardized filters even if they were written prior to filter
    standardization (and without enough information to reconstruct the
    standardized filter name).

    This test lives here instead of obs_base because it relies on having
    Exposure FITS files written both before and after standardization in a Gen3
    butler, something trivial to obtain here: the flats are old (from
    testdata_ci_hsc) - while calexps are new (written by Gen3 pipelines).
    And this package already has the dependency on a concrete obs package
    (obs_subaru in this case) necessary to set up a full butler repository,
    something that obs_base can by definition never have.
    """

    def setUp(self):
        self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"), writeable=False,
                             collections=["HSC/calib/2013-06-17", "HSC/runs/ci_hsc"])
        # We need to provide a physical_filter value to fully identify a flat,
        # but this still leaves the band as an implied value that this data ID
        # doesn't know.
        self.flatMinimalDataId = DataCoordinate.standardize(
            instrument="HSC", detector=0, physical_filter="HSC-R",
            universe=self.butler.registry.dimensions,
        )
        # For a calexp, the minimal data ID just has exposure and detector,
        # so both band and physical_filter are implied and not known here.
        self.calexpMinimalDataId = DataCoordinate.standardize(
            instrument="HSC", detector=100, visit=903334,
            universe=self.butler.registry.dimensions,
        )
        # Parameters with bbox to test that logic still works on subimage gets.
        self.parameters = {"bbox": Box2I(Point2I(0, 0), Point2I(8, 7))}

    def testReadingOldFileWithIncompleteDataId(self):
        """If we try to read an old flat with an incomplete data ID, we should
        get a warning.  It is unspecified what the FilterLabel will have in
        this case, so we don't check that.
        """
        with self.assertWarns(Warning):
            self.butler.get("flat", self.flatMinimalDataId)
        with self.assertWarns(Warning):
            self.butler.get("flat", self.flatMinimalDataId, parameters=self.parameters)
        with self.assertWarns(Warning):
            self.butler.get("flat.filter", self.flatMinimalDataId)

    def testFixingReadingOldFile(self):
        """If we read an old flat with a complete data ID, we fix the
        FilterLabel.
        """
        flatFullDataId = self.butler.registry.expandDataId(self.flatMinimalDataId)
        flat = self.butler.get("flat", flatFullDataId)
        self.assertEqual(flat.getFilter().bandLabel, flatFullDataId["band"])
        self.assertEqual(flat.getFilter().physicalLabel, flatFullDataId["physical_filter"])
        flatFilterLabel = self.butler.get("flat.filter", flatFullDataId)
        self.assertEqual(flatFilterLabel.bandLabel, flatFullDataId["band"])
        self.assertEqual(flatFilterLabel.physicalLabel, flatFullDataId["physical_filter"])
        flatSub = self.butler.get("flat", flatFullDataId, parameters=self.parameters)
        self.assertEqual(flat.getFilter(), flatSub.getFilter())

    def testReadingNewFileWithIncompleteDataId(self):
        """If we try to read a new calexp with an incomplete data ID, the
        reader should recognize that it can't check the filters and just trust
        the file.
        """
        self.skip_mock()
        calexp = self.butler.get("calexp", self.calexpMinimalDataId)
        calexpFilterLabel = self.butler.get("calexp.filter", self.calexpMinimalDataId)
        self.assertTrue(calexp.getFilter().hasPhysicalLabel())
        self.assertTrue(calexp.getFilter().hasBandLabel())
        self.assertEqual(calexp.getFilter(), calexpFilterLabel)
        calexpSub = self.butler.get("calexp", self.calexpMinimalDataId, parameters=self.parameters)
        self.assertEqual(calexp.getFilter(), calexpSub.getFilter())

    def testReadingNewFileWithFullDataId(self):
        """If we try to read a new calexp with a full data ID, the reader
        should check the filters in the file for consistency with the data ID
        (and in this case, find them consistent).
        """
        self.skip_mock()
        calexpFullDataId = self.butler.registry.expandDataId(self.calexpMinimalDataId)
        calexp = self.butler.get("calexp", calexpFullDataId)
        self.assertEqual(calexp.getFilter().bandLabel, calexpFullDataId["band"])
        self.assertEqual(calexp.getFilter().physicalLabel, calexpFullDataId["physical_filter"])
        calexpFilterLabel = self.butler.get("calexp.filter", calexpFullDataId)
        self.assertEqual(calexpFilterLabel.bandLabel, calexpFullDataId["band"])
        self.assertEqual(calexpFilterLabel.physicalLabel, calexpFullDataId["physical_filter"])
        calexpSub = self.butler.get("calexp", calexpFullDataId, parameters=self.parameters)
        self.assertEqual(calexp.getFilter(), calexpSub.getFilter())

    def testReadingBadNewFileWithFullDataId(self):
        """If we try to read a new calexp with a full data ID, the reader
        should check the filters in the file for consistency with the data ID
        (and in this case, find them inconsistent, which should result in
        warnings and returning what's in the data ID).
        """
        self.skip_mock()
        calexpBadDataId = DataCoordinate.standardize(
            self.calexpMinimalDataId,
            band="g",
            physical_filter="HSC-G",
            visit_system=0,
        )
        self.assertTrue(calexpBadDataId.hasFull())

        # Some tests are only relevant when reading full calexps.
        # By definition a disassembled exposure will have a correct
        # filterlabel written out.
        # In this situation the test becomes moot since the filterLabel
        # formatter will not force a correct filter label into an
        # incorrect filter label based on DataId.
        _, components = self.butler.getURIs("calexp", calexpBadDataId)
        if components:
            raise unittest.SkipTest("Test not relevant because composite has been disassembled")

        with self.assertWarns(Warning):
            calexp = self.butler.get("calexp", calexpBadDataId)
        with self.assertWarns(Warning):
            calexpFilterLabel = self.butler.get("calexp.filter", calexpBadDataId)
        self.assertEqual(calexp.getFilter(), calexpFilterLabel)
        self.assertEqual(calexp.getFilter().bandLabel, calexpBadDataId["band"])
        self.assertEqual(calexp.getFilter().physicalLabel, calexpBadDataId["physical_filter"])
        self.assertEqual(calexpFilterLabel.bandLabel, calexpBadDataId["band"])
        self.assertEqual(calexpFilterLabel.physicalLabel, calexpBadDataId["physical_filter"])
        with self.assertWarns(Warning):
            calexpSub = self.butler.get("calexp", calexpBadDataId, parameters=self.parameters)
        self.assertEqual(calexp.getFilter(), calexpSub.getFilter())
Example #28
0
class TestSchemaMatch(lsst.utils.tests.TestCase):
    """Check the schema of the parquet outputs match the DDL in sdm_schemas"""
    def setUp(self):
        self.butler = Butler(os.path.join(getPackageDir("ci_imsim"), "DATA"),
                             writeable=False,
                             collections=["LSSTCam-imSim/runs/ci_imsim"])
        schemaFile = os.path.join(getPackageDir("sdm_schemas"), 'yml',
                                  'imsim.yaml')
        with open(schemaFile, "r") as f:
            self.schema = yaml.safe_load(f)['tables']

    def _validateSchema(self, dataset, dataId, tableName):
        """Check column name and data type match between dataset and DDL"""
        info = f"dataset={dataset} tableName={tableName} dataId={dataId}"

        sdmSchema = [
            table for table in self.schema if table['name'] == tableName
        ]
        self.assertEqual(len(sdmSchema), 1)
        expectedColumns = {
            column['name']: column['datatype']
            for column in sdmSchema[0]['columns']
        }

        df = self.butler.get(dataset, dataId)
        df.reset_index(inplace=True)
        outputColumnNames = set(df.columns.to_list())
        self.assertEqual(outputColumnNames, set(expectedColumns.keys()),
                         f"{info} failed")

        # the data type mapping from felis datatype to pandas
        typeMapping = {
            "boolean": "bool",
            "int": "int32",
            "long": "int64",
            "float": "float32",
            "double": "float64",
            "char": "object"
        }
        for column in outputColumnNames:
            self.assertEqual(
                df.dtypes.get(column).name,
                typeMapping[expectedColumns[column]],
                f"{info} column={column} failed")

    def testObjectSchemaMatch(self):
        """Check objectTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("objectTable_tract", dataId, "object")

    def testSourceSchemaMatch(self):
        """Check one sourceTable_visit"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "detector": 100,
            "visit": 5884,
            "band": "y"
        }
        self._validateSchema("sourceTable_visit", dataId, "source")

    def testForcedSourceSchemaMatch(self):
        """Check forcedSourceTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("forcedSourceTable_tract", dataId, "forcedSource")

    def testDiaObjectSchemaMatch(self):
        """Check diaObjectTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("diaObjectTable_tract", dataId, "diaObject")

    def testDiaSourceSchemaMatch(self):
        """Check one diaSourceTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("diaSourceTable_tract", dataId, "diaSource")

    def testForcedSourceeOnDiaObjectSchemaMatch(self):
        """Check forcedSourceOnDiaObjectTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("forcedSourceOnDiaObjectTable_tract", dataId,
                             "forcedSourceOnDiaObject")

    def testMatchRefSchemaMatch(self):
        """Check match_ref_truth_summary_objectTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("match_ref_truth_summary_objectTable_tract",
                             dataId, "matchesTruth")

    def testMatchObjectSchemaMatch(self):
        """Check match_target_truth_summary_objectTable_tract"""
        dataId = {
            "instrument": "LSSTCam-imSim",
            "tract": 0,
            "skymap": "discrete/ci_imsim/4k"
        }
        self._validateSchema("match_target_truth_summary_objectTable_tract",
                             dataId, "matchesObject")
Example #29
0
class Gen2ConvertTestCase(lsst.utils.tests.TestCase):
    def setUp(self):
        self.butler = Butler(GEN3_REPO_ROOT, collections="HSC/runs/ci_hsc")

    def tearDown(self):
        del self.butler

    def testCollections(self):
        """Test that the correct set of collections is created.
        """
        self.assertCountEqual(
            self.butler.registry.getCollectionChain("HSC/defaults"), [
                "refcats", "skymaps", "HSC/raw/all", "HSC/calib", "HSC/masks",
                "HSC/external"
            ])
        self.assertCountEqual(
            self.butler.registry.getCollectionChain("refcats"),
            ["refcats/gen2"],
        )
        self.assertEqual(self.butler.registry.getCollectionType("skymaps"),
                         CollectionType.RUN)
        self.assertEqual(
            self.butler.registry.getCollectionType("refcats/gen2"),
            CollectionType.RUN)
        self.assertEqual(self.butler.registry.getCollectionType("HSC/raw/all"),
                         CollectionType.RUN)
        self.assertEqual(self.butler.registry.getCollectionType("skymaps"),
                         CollectionType.RUN)
        self.assertEqual(
            list(
                self.butler.registry.queryCollections(
                    ..., collectionTypes={CollectionType.CALIBRATION})),
            ["HSC/calib"],
        )

    def testObservationPacking(self):
        """Test that packing Visit+Detector into an integer in Gen3 generates
        the same results as in Gen2.
        """
        butler2 = Butler2(os.path.join(REPO_ROOT, "rerun", "ci_hsc"))
        for visit, detector in [(903334, 16), (903338, 25), (903986, 100)]:
            dataId2 = {"visit": visit, "ccd": detector}
            dataId3 = self.butler.registry.expandDataId(visit=visit,
                                                        detector=detector,
                                                        instrument="HSC")
            self.assertEqual(butler2.get("ccdExposureId", dataId2),
                             dataId3.pack("visit_detector"))

    def testSkyMapPacking(self):
        """Test that packing Tract+Patch into an integer in Gen3 works and is
        self-consistent.

        Note that this packing does *not* use the same algorithm as Gen2 and
        hence generates different IDs, because the Gen2 algorithm is
        problematically tied to the *default* SkyMap for a particular camera,
        rather than the SkyMap actually used.
        """
        # SkyMap used by ci_hsc has only one tract, so the test coverage in
        # that area isn't great.  That's okay because that's tested in SkyMap;
        # what we care about here is that the converted repo has the necessary
        # metadata to construct and use these packers at all.
        for patch in [0, 43, 52]:
            dataId = self.butler.registry.expandDataId(
                skymap="discrete/ci_hsc", tract=0, patch=patch, band='r')
            packer1 = self.butler.registry.dimensions.makePacker(
                "tract_patch", dataId)
            packer2 = self.butler.registry.dimensions.makePacker(
                "tract_patch_band", dataId)
            self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
            self.assertEqual(
                packer1.unpack(packer1.pack(dataId)),
                DataCoordinate.standardize(dataId, graph=packer1.dimensions))
            self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId)
            self.assertEqual(packer1.pack(dataId, band='i'),
                             packer1.pack(dataId))
            self.assertNotEqual(packer2.pack(dataId, band='i'),
                                packer2.pack(dataId))

    def testRawFilters(self):
        """Test that raw data has the Filter component set.
        """
        # Note that the 'r' and 'i' values here look like Gen3 band
        # values, but they're something weird in between abstract and physical
        # filters; if we had HSC-R2 data, the corresponding value would be
        # 'r2', not just 'r'.  We need that to be compatible with Gen2 usage
        # of the afw.image.Filter system.
        rawR = self.butler.get("raw",
                               instrument="HSC",
                               exposure=903334,
                               detector=16)
        self.assertEqual(rawR.getFilter().getCanonicalName(), "r")
        rawI = self.butler.get("raw",
                               instrument="HSC",
                               exposure=903986,
                               detector=16)
        self.assertEqual(rawI.getFilter().getCanonicalName(), "i")

    def testCuratedCalibrations(self):
        """Test that defects, the camera, and the brighter-fatter kernel were
        added to the Gen3 registry.
        """
        rawDatasetType = self.butler.registry.getDatasetType("raw")
        cameraRef = None
        bfKernelRef = None
        rawRefs = list(
            self.butler.registry.queryDatasets(rawDatasetType,
                                               collections=["HSC/raw/all"
                                                            ]).expanded())
        self.assertEqual(len(rawRefs), 33)
        for rawRef in rawRefs:
            # Expand raw data ID to include implied dimensions (e.g.
            # physical_filter from exposure).
            for calibDatasetTypeName in ("camera", "bfKernel", "defects"):
                with self.subTest(dataset=calibDatasetTypeName):
                    calibDatasetType = self.butler.registry.getDatasetType(
                        calibDatasetTypeName)
                    calibRef = self.butler.registry.findDataset(
                        calibDatasetType,
                        collections=["HSC/calib"],
                        dataId=rawRef.dataId,
                        timespan=rawRef.dataId.timespan)
                    # We should have exactly one calib of each type
                    self.assertIsNotNone(calibRef)

                    # Try getting those calibs to make sure the files
                    # themselves are where the Butler thinks they are.  We
                    # defer that for camera and bfKernel, because there's only
                    # one of each of those.
                    if calibDatasetTypeName == "camera":
                        if cameraRef is None:
                            cameraRef = calibRef
                        else:
                            self.assertEqual(cameraRef, calibRef)
                    elif calibDatasetTypeName == "bfKernel":
                        if bfKernelRef is None:
                            bfKernelRef = calibRef
                        else:
                            self.assertEqual(bfKernelRef, calibRef)
                    else:
                        defects = self.butler.get(calibRef,
                                                  collections=calibRef.run)
                        self.assertIsInstance(defects, lsst.ip.isr.Defects)

        instrument = HyperSuprimeCam()
        cameraFromButler = self.butler.get(cameraRef,
                                           collections=cameraRef.run)
        cameraFromInstrument = instrument.getCamera()
        self.assertEqual(len(cameraFromButler), len(cameraFromInstrument))
        self.assertEqual(cameraFromButler.getName(),
                         cameraFromInstrument.getName())
        self.assertFloatsEqual(
            self.butler.get(bfKernelRef, collections=bfKernelRef.run),
            instrument.getBrighterFatterKernel())

    def testBrightObjectMasks(self):
        """Test that bright object masks are included in the Gen3 repo.
        """
        regions = self.butler.get("brightObjectMask",
                                  skymap='discrete/ci_hsc',
                                  tract=0,
                                  patch=69,
                                  band='r')
        self.assertIsInstance(regions, ObjectMaskCatalog)
        self.assertGreater(len(regions), 0)
Example #30
0
class Gen2ConvertTestCase(lsst.utils.tests.TestCase):
    def setUp(self):
        self.butler = Butler(REPO_ROOT, run="shared/ci_hsc")

    def tearDown(self):
        del self.butler

    def testImpliedDimensions(self):
        """Test that implied dimensions are expanded properly when populating
        the Dataset table.
        """
        # All of the dataset types below have Visit or Exposure in their
        # dimensions, which means PhysicalFilter and AbstractFilter are
        # implied. dimensions for them.  Those should be non-null and
        # consistent.
        sql = """
            SELECT physical_filter, abstract_filter
            FROM dataset
            WHERE dataset_type_name IN (
                'raw', 'calexp', 'icExp', 'src', 'icSrc',
                'deepCoadd_directWarp', 'deepCoadd_psfMatchedWarp'
            )
            """
        count = 0
        for row in self.butler.registry.query(sql):
            if row["physical_filter"] == "HSC-R":
                self.assertEqual(row["abstract_filter"], "r")
            elif row["physical_filter"] == "HSC-I":
                self.assertEqual(row["abstract_filter"], "i")
            else:
                self.fail("physical_filter not in ('HSC-R', 'HSC-I')")
            count += 1
        self.assertGreater(count, 0)

    def testObservationPacking(self):
        """Test that packing Visit+Detector into an integer in Gen3 generates
        the same results as in Gen2.
        """
        butler2 = Butler2(os.path.join(REPO_ROOT, "rerun", "ci_hsc"))
        for visit, detector in [(903334, 16), (903338, 25), (903986, 100)]:
            dataId2 = {"visit": visit, "ccd": detector}
            dataId3 = self.butler.registry.expandDataId(visit=visit,
                                                        detector=detector,
                                                        instrument="HSC")
            self.assertEqual(
                butler2.get("ccdExposureId", dataId2),
                self.butler.registry.packDataId("visit_detector", dataId3))

    def testSkyMapPacking(self):
        """Test that packing Tract+Patch into an integer in Gen3 works and is
        self-consistent.

        Note that this packing does *not* use the same algorithm as Gen2 and
        hence generates different IDs, because the Gen2 algorithm is
        problematically tied to the *default* SkyMap for a particular camera,
        rather than the SkyMap actually used.
        """
        # SkyMap used by ci_hsc has only one tract, so the test coverage in
        # that area isn't great.  That's okay because that's tested in SkyMap;
        # what we care about here is that the converted repo has the necessary
        # metadata to construct and use these packers at all.
        for patch in [0, 43, 52]:
            dataId = self.butler.registry.expandDataId(skymap="ci_hsc",
                                                       tract=0,
                                                       patch=patch,
                                                       abstract_filter='r')
            packer1 = self.butler.registry.makeDataIdPacker(
                "tract_patch", dataId)
            packer2 = self.butler.registry.makeDataIdPacker(
                "tract_patch_abstract_filter", dataId)
            self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
            self.assertEqual(
                packer1.unpack(packer1.pack(dataId)),
                DataId(dataId, dimensions=packer1.dimensions.required))
            self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId)
            self.assertEqual(packer1.pack(dataId, abstract_filter='i'),
                             packer1.pack(dataId))
            self.assertNotEqual(packer2.pack(dataId, abstract_filter='i'),
                                packer2.pack(dataId))

    def testRawFilters(self):
        """Test that raw data has the Filter component set.
        """
        # Note that the 'r' and 'i' values here look like Gen3 abstract_filter
        # values, but they're something weird in between abstract and physical
        # filters; if we had HSC-R2 data, the corresponding value would be
        # 'r2', not just 'r'.  We need that to be compatible with Gen2 usage
        # of the afw.image.Filter system.
        rawR = self.butler.get("raw",
                               instrument="HSC",
                               exposure=903334,
                               detector=16)
        self.assertEqual(rawR.getFilter().getName(), "r")
        rawI = self.butler.get("raw",
                               instrument="HSC",
                               exposure=903986,
                               detector=16)
        self.assertEqual(rawI.getFilter().getName(), "i")

    def testCuratedCalibrations(self):
        """Test that defects, the camera, and the brighter-fatter kernel were
        added to the Gen3 registry.
        """
        originInfo = DatasetOriginInfoDef(["raw", "calib"], [])
        # Query for raws that have associated calibs of the types below;
        # result is an iterator over rows that correspond roughly to data IDs.
        rowsWithCalibs = list(
            self.butler.registry.selectMultipleDatasetTypes(
                originInfo,
                expression="",
                required=["raw", "camera", "bfKernel", "defects"],
                perDatasetTypeDimensions=["calibration_label"]))
        # Query for all rows, with no restriction on having associated calibs.
        rowsWithoutCalibs = list(
            self.butler.registry.selectMultipleDatasetTypes(
                originInfo,
                expression="",
                required=["raw"],
            ))
        # We should get the same raws in both cases because all of the raws
        # here should have associated calibs.
        self.assertGreater(len(rowsWithoutCalibs), 0)
        self.assertEqual(len(rowsWithCalibs), len(rowsWithoutCalibs))
        # Try getting those calibs to make sure the files themselves are
        # where the Butler thinks they are.
        butler = Butler(REPO_ROOT, run="calib")
        instrument = HyperSuprimeCam()
        for row in rowsWithCalibs:
            refsByName = {k.name: v for k, v in row.datasetRefs.items()}
            cameraFromButler = butler.get(refsByName["camera"])
            cameraFromInstrument = instrument.getCamera()
            self.assertEqual(len(cameraFromButler), len(cameraFromInstrument))
            self.assertEqual(cameraFromButler.getName(),
                             cameraFromInstrument.getName())
            self.assertFloatsEqual(butler.get(refsByName["bfKernel"]),
                                   instrument.getBrighterFatterKernel())
            defects = butler.get(refsByName["defects"])
            self.assertIsInstance(defects, lsst.meas.algorithms.Defects)

    def testBrightObjectMasks(self):
        """Test that bright object masks are included in the Gen3 repo.
        """
        regions = self.butler.get("brightObjectMask",
                                  skymap='ci_hsc',
                                  tract=0,
                                  patch=69,
                                  abstract_filter='r')
        self.assertIsInstance(regions, ObjectMaskCatalog)
        self.assertGreater(len(regions), 0)