コード例 #1
0
def addUnboundedCalibrationLabel(registry, instrumentName):
    """Add a special 'unbounded' calibration_label dimension entry for the
    given camera that is valid for any exposure.

    If such an entry already exists, this function just returns a `DataId`
    for the existing entry.

    Parameters
    ----------
    registry : `Registry`
        Registry object in which to insert the dimension entry.
    instrumentName : `str`
        Name of the instrument this calibration label is associated with.

    Returns
    -------
    dataId : `DataId`
        New or existing data ID for the unbounded calibration.
    """
    d = dict(instrument=instrumentName, calibration_label="unbounded")
    try:
        return registry.expandDataId(dimension="calibration_label",
                                     metadata=["valid_first", "valid_last"], **d)
    except LookupError:
        pass
    unboundedDataId = DataId(universe=registry.dimensions, **d)
    unboundedDataId.entries["calibration_label"]["valid_first"] = datetime.min
    unboundedDataId.entries["calibration_label"]["valid_last"] = datetime.max
    registry.addDimensionEntry("calibration_label", unboundedDataId)
    return unboundedDataId
コード例 #2
0
    def testSkyMapPacking(self):
        """Test that packing Tract+Patch into an integer in Gen3 works and is
        self-consistent.

        Note that this packing does *not* use the same algorithm as Gen2 and
        hence generates different IDs, because the Gen2 algorithm is
        problematically tied to the *default* SkyMap for a particular camera,
        rather than the SkyMap actually used.
        """
        # SkyMap used by ci_hsc has only one tract, so the test coverage in
        # that area isn't great.  That's okay because that's tested in SkyMap;
        # what we care about here is that the converted repo has the necessary
        # metadata to construct and use these packers at all.
        for patch in [0, 43, 52]:
            dataId = self.butler.registry.expandDataId(skymap="ci_hsc",
                                                       tract=0,
                                                       patch=patch,
                                                       abstract_filter='r')
            packer1 = self.butler.registry.makeDataIdPacker(
                "tract_patch", dataId)
            packer2 = self.butler.registry.makeDataIdPacker(
                "tract_patch_abstract_filter", dataId)
            self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
            self.assertEqual(
                packer1.unpack(packer1.pack(dataId)),
                DataId(dataId, dimensions=packer1.dimensions.required))
            self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId)
            self.assertEqual(packer1.pack(dataId, abstract_filter='i'),
                             packer1.pack(dataId))
            self.assertNotEqual(packer2.pack(dataId, abstract_filter='i'),
                                packer2.pack(dataId))
コード例 #3
0
def updateVisitEntryFromObsInfo(dataId, obsInfo):
    """Construct a visit Dimension entry from
    `astro_metadata_translator.ObservationInfo`.

    Parameters
    ----------
    dataId : `dict` or `DataId`
        Dictionary of Dimension link fields for (at least) visit. If a true
        `DataId`, this object will be modified and returned.
    obsInfo : `astro_metadata_translator.ObservationInfo`
        A `~astro_metadata_translator.ObservationInfo` object corresponding to
        the exposure.

    Returns
    -------
    dataId : `DataId`
        A data ID with the entry for the visit dimension updated.
    """
    dataId = DataId(dataId)
    dataId.entries[dataId.dimensions()["visit"]].update(
        datetime_begin=obsInfo.datetime_begin.to_datetime(),
        datetime_end=obsInfo.datetime_end.to_datetime(),
        exposure_time=obsInfo.exposure_time.to_value("s"),
    )
    return dataId
コード例 #4
0
 def unpack(self, packedId):
     # Docstring inherited from DataIdPacker.unpack
     return DataId(
         {
             "instrument": self._instrumentName,
             "detector": packedId % self._detectorMax,
             self._observationLink: packedId // self._detectorMax
         },
         dimensions=self.dimensions.required)
コード例 #5
0
 def unpack(self, packedId):
     # Docstring inherited from DataIdPacker.unpack
     d = {"skymap": self._skyMapName}
     if self._filterMax is not None:
         d["abstract_filter"] = self.getFilterNameFromInt(
             packedId // self._tractPatchMax)
         packedId %= self._tractPatchMax
     d["tract"] = packedId // self._patchMax
     d["patch"] = packedId % self._patchMax
     return DataId(d, dimensions=self.dimensions.required)
コード例 #6
0
 def testDataIdPacker(self):
     registry = self.makeRegistry()
     if registry.limited:
         return
     registry.addDimensionEntry(
         "instrument", {
             "instrument": "DummyCam",
             "visit_max": 10,
             "exposure_max": 10,
             "detector_max": 2
         })
     registry.addDimensionEntry(
         "physical_filter", {
             "instrument": "DummyCam",
             "physical_filter": "R",
             "abstract_filter": "r"
         })
     registry.addDimensionEntry("visit", {
         "instrument": "DummyCam",
         "visit": 5,
         "physical_filter": "R"
     })
     registry.addDimensionEntry(
         "exposure", {
             "instrument": "DummyCam",
             "exposure": 4,
             "visit": 5,
             "physical_filter": "R"
         })
     dataId0 = registry.expandDataId(instrument="DummyCam")
     with self.assertRaises(LookupError):
         registry.packDataId("visit_detector", dataId0)
         registry.packDataId("exposure_detector", dataId0)
     dataId1 = DataId(dataId0, visit=5, detector=1)
     self.assertEqual(registry.packDataId("visit_detector", dataId1), 11)
     packer = registry.makeDataIdPacker("exposure_detector", dataId0)
     dataId2 = DataId(dataId0, exposure=4, detector=0)
     self.assertEqual(packer.pack(dataId0, exposure=4, detector=0), 8)
     self.assertEqual(packer.pack(dataId2), 8)
     self.assertEqual(registry.packDataId("exposure_detector", dataId2), 8)
     dataId2a = packer.unpack(8)
     self.assertEqual(dataId2, dataId2a)
コード例 #7
0
 def testPickle(self):
     """Test pickle support.
     """
     dataId = DataId(dict(instrument="DummyInstrument",
                          detector=1,
                          visit=2,
                          physical_filter="i"),
                     universe=self.universe)
     dataIdOut = pickle.loads(pickle.dumps(dataId))
     self.assertIsInstance(dataIdOut, DataId)
     self.assertEqual(dataId, dataIdOut)
コード例 #8
0
 def testWithoutFilter(self):
     covered = DimensionSet(universe=self.universe,
                            elements=["tract", "patch"])
     dimensions = DataIdPackerDimensions(given=self.given,
                                         required=self.given.union(covered))
     dataId = DataId(skymap=self.parameters["skymap"],
                     tract=2,
                     patch=6,
                     universe=self.universe)
     packer = SkyMapDataIdPacker(dimensions, **self.parameters)
     packedId = packer.pack(dataId)
     self.assertLessEqual(packedId.bit_length(), packer.maxBits)
     self.assertEqual(packer.unpack(packedId), dataId)
コード例 #9
0
    def testConstructor(self):
        """Test for making new instances.
        """

        dataId = DataId(dict(instrument="DummyInstrument",
                             detector=1,
                             visit=2,
                             physical_filter="i"),
                        universe=self.universe)
        self.assertEqual(len(dataId), 4)
        self.assertCountEqual(
            dataId.keys(),
            ("instrument", "detector", "visit", "physical_filter"))
コード例 #10
0
    def ensureDimensions(self, file):
        """Extract metadata from a raw file and add Exposure and Visit
        Dimension entries.

        Any needed Instrument, Detector, and PhysicalFilter Dimension entries must
        exist in the Registry before `run` is called.

        Parameters
        ----------
        file : `str` or path-like object
            Absolute path to the file to be ingested.

        Returns
        -------
        headers : `list` of `~lsst.daf.base.PropertyList`
            Result of calling `readHeaders`.
        dataId : `DataId`
            Data ID dictionary, as returned by `extractDataId`.
        """
        headers = self.readHeaders(file)
        obsInfo = ObservationInfo(headers[0])

        # Extract a DataId that covers all of self.dimensions.
        fullDataId = self.extractDataId(file, headers, obsInfo=obsInfo)

        for dimension in self.dimensions:
            dimensionDataId = DataId(fullDataId, dimension=dimension)
            if dimensionDataId not in self.dimensionEntriesDone[dimension]:
                # Next look in the Registry
                dimensionEntryDict = self.butler.registry.findDimensionEntry(
                    dimension, dimensionDataId)
                if dimensionEntryDict is None:
                    if dimension.name in ("Visit", "Exposure"):
                        # Add the entry into the Registry.
                        self.butler.registry.addDimensionEntry(
                            dimension, dimensionDataId)
                    else:
                        raise LookupError(
                            f"Entry for {dimension.name} with ID {dimensionDataId} not found; must be "
                            f"present in Registry prior to ingest.")
                # Record that we've handled this entry.
                self.dimensionEntriesDone[dimension].add(dimensionDataId)

        return headers, fullDataId
コード例 #11
0
 def testSkyPixIndirection(self):
     """Test that SingleDatasetQueryBuilder can look up datasets with
     skypix dimensions from a data ID with visit+detector dimensions.
     """
     # exposure <-> calibration_label lookups for master calibrations
     refcat = DatasetType(
         "refcat",
         self.registry.dimensions.extract(["skypix"]),
         "ImageU"
     )
     builder = SingleDatasetQueryBuilder.fromSingleCollection(self.registry, refcat, collection="refcats")
     newLinks = builder.relateDimensions(
         self.registry.dimensions.extract(["instrument", "visit", "detector"], implied=True)
     )
     self.assertEqual(newLinks, set(["instrument", "visit", "detector"]))
     self.assertIsNotNone(builder.findSelectableByName("visit_detector_skypix_join"))
     usedLinks = builder.whereDataId(DataId(instrument="HSC", visit=12, detector=34,
                                            physical_filter="HSC-R2", abstract_filter="r",
                                            universe=self.registry.dimensions))
     self.assertEqual(usedLinks, set(["instrument", "visit", "detector"]))
コード例 #12
0
ファイル: ingest.py プロジェクト: craiglagegit/obs_base
    def extractDataId(self, file, headers, obsInfo):
        """Return the Data ID dictionary that should be used to label a file.

        Parameters
        ----------
        file : `str` or path-like object
            Absolute path to the file being ingested (prior to any transfers).
        headers : `list` of `~lsst.daf.base.PropertyList`
            All headers returned by `readHeaders()`.
        obsInfo : `astro_metadata_translator.ObservationInfo`
            Observational metadata extracted from the headers.

        Returns
        -------
        dataId : `DataId`
            A mapping whose key-value pairs uniquely identify raw datasets.
            Must have ``dataId.dimensions() <= self.dimensions``, with at least
            Instrument, Exposure, and Detector present.
        """
        toRemove = set()
        if obsInfo.visit_id is None:
            toRemove.add("Visit")
        if obsInfo.physical_filter is None:
            toRemove.add("PhysicalFilter")
        if toRemove:
            dimensions = self.dimensions.difference(toRemove)
        else:
            dimensions = self.dimensions
        dataId = DataId(
            dimensions=dimensions,
            instrument=obsInfo.instrument,
            exposure=obsInfo.exposure_id,
            visit=obsInfo.visit_id,
            detector=obsInfo.detector_num,
            physical_filter=obsInfo.physical_filter,
        )
        updateExposureEntryFromObsInfo(dataId, obsInfo)
        if obsInfo.visit_id is not None:
            updateVisitEntryFromObsInfo(dataId, obsInfo)
        return dataId
コード例 #13
0
 def testCalibrationLabelIndirection(self):
     """Test that SingleDatasetQueryBuilder can look up datasets with
     calibration_label dimensions from a data ID with exposure dimensions.
     """
     # exposure <-> calibration_label lookups for master calibrations
     flat = DatasetType(
         "flat",
         self.registry.dimensions.extract(
             ["instrument", "detector", "physical_filter", "calibration_label"]
         ),
         "ImageU"
     )
     builder = SingleDatasetQueryBuilder.fromSingleCollection(self.registry, flat, collection="calib")
     newLinks = builder.relateDimensions(
         self.registry.dimensions.extract(["instrument", "exposure", "detector"], implied=True)
     )
     self.assertEqual(newLinks, set(["exposure"]))
     self.assertIsNotNone(builder.findSelectableByName("exposure_calibration_label_join"))
     usedLinks = builder.whereDataId(DataId(instrument="HSC", exposure=12, detector=34,
                                            physical_filter="HSC-R2", abstract_filter="r",
                                            universe=self.registry.dimensions))
     self.assertEqual(usedLinks, set(["instrument", "exposure", "detector", "physical_filter"]))
コード例 #14
0
    def writeCuratedCalibrations(self, butler):
        """Write human-curated calibration Datasets to the given Butler with
        the appropriate validity ranges.

        This is a temporary API that should go away once obs_ packages have
        a standardized approach to this problem.
        """

        # Write cameraGeom.Camera, with an infinite validity range.
        datasetType = DatasetType("camera",
                                  ("instrument", "calibration_label"),
                                  "TablePersistableCamera",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        unboundedDataId = addUnboundedCalibrationLabel(butler.registry,
                                                       self.getName())
        camera = self.getCamera()
        butler.put(camera, datasetType, unboundedDataId)

        # Write brighter-fatter kernel, with an infinite validity range.
        datasetType = DatasetType("bfKernel",
                                  ("instrument", "calibration_label"),
                                  "NumpyArray",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        # Load and then put instead of just moving the file in part to ensure
        # the version in-repo is written with Python 3 and does not need
        # `encoding='latin1'` to be read.
        bfKernel = self.getBrighterFatterKernel()
        butler.put(bfKernel, datasetType, unboundedDataId)

        # The following iterate over the values of the dictionaries returned by the transmission functions
        # and ignore the date that is supplied. This is due to the dates not being ranges but single dates,
        # which do not give the proper notion of validity. As such unbounded calibration labels are used
        # when inserting into the database. In the future these could and probably should be updated to
        # properly account for what ranges are considered valid.

        # Write optical transmissions
        opticsTransmissions = getOpticsTransmission()
        datasetType = DatasetType("transmission_optics",
                                  ("instrument", "calibration_label"),
                                  "TablePersistableTransmissionCurve",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        for entry in opticsTransmissions.values():
            if entry is None:
                continue
            butler.put(entry, datasetType, unboundedDataId)

        # Write transmission sensor
        sensorTransmissions = getSensorTransmission()
        datasetType = DatasetType(
            "transmission_sensor",
            ("instrument", "detector", "calibration_label"),
            "TablePersistableTransmissionCurve",
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        for entry in sensorTransmissions.values():
            if entry is None:
                continue
            for sensor, curve in entry.items():
                dataId = DataId(unboundedDataId, detector=sensor)
                butler.put(curve, datasetType, dataId)

        # Write filter transmissions
        filterTransmissions = getFilterTransmission()
        datasetType = DatasetType(
            "transmission_filter",
            ("instrument", "physical_filter", "calibration_label"),
            "TablePersistableTransmissionCurve",
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        for entry in filterTransmissions.values():
            if entry is None:
                continue
            for band, curve in entry.items():
                dataId = DataId(unboundedDataId, physical_filter=band)
                butler.put(curve, datasetType, dataId)

        # Write atmospheric transmissions, this only as dimension of instrument as other areas will only
        # look up along this dimension (ISR)
        atmosphericTransmissions = getAtmosphereTransmission()
        datasetType = DatasetType("transmission_atmosphere", ("instrument", ),
                                  "TablePersistableTransmissionCurve",
                                  universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        for entry in atmosphericTransmissions.values():
            if entry is None:
                continue
            butler.put(entry, datasetType, {"instrument": self.getName()})

        # Write defects with validity ranges taken from obs_subaru_data/hsc/defects
        # (along with the defects themselves).
        datasetType = DatasetType(
            "defects", ("instrument", "detector", "calibration_label"),
            "DefectsList",
            universe=butler.registry.dimensions)
        butler.registry.registerDatasetType(datasetType)
        defectPath = os.path.join(getPackageDir("obs_subaru_data"), "hsc",
                                  "defects")
        camera = self.getCamera()
        defectsDict = read_all_defects(defectPath, camera)
        endOfTime = '20380119T031407'
        with butler.transaction():
            for det in defectsDict:
                detector = camera[det]
                times = sorted([k for k in defectsDict[det]])
                defects = [defectsDict[det][time] for time in times]
                times = times + [
                    parser.parse(endOfTime),
                ]
                for defect, beginTime, endTime in zip(defects, times[:-1],
                                                      times[1:]):
                    md = defect.getMetadata()
                    dataId = DataId(
                        universe=butler.registry.dimensions,
                        instrument=self.getName(),
                        calibration_label=
                        f"defect/{md['CALIBDATE']}/{md['DETECTOR']}")
                    dataId.entries["calibration_label"][
                        "valid_first"] = beginTime
                    dataId.entries["calibration_label"]["valid_last"] = endTime
                    butler.registry.addDimensionEntry("calibration_label",
                                                      dataId)
                    butler.put(defect,
                               datasetType,
                               dataId,
                               detector=detector.getId())
コード例 #15
0
    def traverse(self):
        """Return topologically ordered Quanta and their dependencies.

        This method iterates over all Quanta in topological order, enumerating
        them during iteration. Returned `QuantumIterData` object contains
        Quantum instance, its ``quantumId`` and ``quantumId`` of all its
        prerequsites (Quanta that produce inputs for this Quantum):
        - the ``quantumId`` values are generated by an iteration of a
          QuantumGraph, and are not intrinsic to the QuantumGraph
        - during iteration, each ID will appear in quantumId before it ever
          appears in dependencies.

        Yields
        ------
        quantumData : `QuantumIterData`
        """

        def orderedTaskNodes(graph):
            """Return topologically ordered task nodes.

            Yields
            ------
            nodes : `QuantumGraphTaskNodes`
            """
            # Tasks in a graph are probably topologically sorted already but there
            # is no guarantee for that. Just re-construct Pipeline and order tasks
            # in a pipeline using existing method.
            nodesMap = {id(item.taskDef): item for item in graph}
            pipeline = orderPipeline(Pipeline(item.taskDef for item in graph))
            for taskDef in pipeline:
                yield nodesMap[id(taskDef)]

        index = 0
        outputs = {}  # maps (DatasetType.name, DataId) to its producing quantum index
        for nodes in orderedTaskNodes(self):
            for quantum in nodes.quanta:

                # Find quantum dependencies (must be in `outputs` already)
                prereq = []
                for dataRef in chain.from_iterable(quantum.predictedInputs.values()):
                    # if data exists in butler then `id` is not None
                    if dataRef.id is None:
                        key = (dataRef.datasetType.name, DataId(dataRef.dataId))
                        try:
                            prereq.append(outputs[key])
                        except KeyError:
                            # The Quantum that makes our inputs is not in the graph,
                            # this could happen if we run on a "split graph" which is
                            # usually just one quantum. Check for number of Quanta
                            # in a graph and ignore error if it's just one.
                            # TODO: This code has to be removed or replaced with
                            # something more generic
                            if not (len(self) == 1 and len(self[0].quanta) == 1):
                                raise

                # Update `outputs` with this quantum outputs
                for dataRef in chain.from_iterable(quantum.outputs.values()):
                    key = (dataRef.datasetType.name, DataId(dataRef.dataId))
                    outputs[key] = index

                yield QuantumIterData(index, quantum, nodes.taskDef, prereq)
                index += 1
コード例 #16
0
    def testInstrumentDimensions(self):
        """Test involving only instrument dimensions, no joins to skymap"""
        registry = self.registry

        # need a bunch of dimensions and datasets for test
        registry.addDimensionEntry("instrument", dict(instrument="DummyCam", visit_max=25, exposure_max=300,
                                                      detector_max=6))
        registry.addDimensionEntry("physical_filter", dict(instrument="DummyCam",
                                                           physical_filter="dummy_r",
                                                           abstract_filter="r"))
        registry.addDimensionEntry("physical_filter", dict(instrument="DummyCam",
                                                           physical_filter="dummy_i",
                                                           abstract_filter="i"))
        for detector in (1, 2, 3, 4, 5):
            registry.addDimensionEntry("detector", dict(instrument="DummyCam", detector=detector))
        registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=10, physical_filter="dummy_i"))
        registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=11, physical_filter="dummy_r"))
        registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=20, physical_filter="dummy_r"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=100, visit=10,
                                                    physical_filter="dummy_i"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=101, visit=10,
                                                    physical_filter="dummy_i"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=110, visit=11,
                                                    physical_filter="dummy_r"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=111, visit=11,
                                                    physical_filter="dummy_r"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=200, visit=20,
                                                    physical_filter="dummy_r"))
        registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=201, visit=20,
                                                    physical_filter="dummy_r"))

        # dataset types
        collection1 = "test"
        collection2 = "test2"
        run = registry.makeRun(collection=collection1)
        run2 = registry.makeRun(collection=collection2)
        storageClass = StorageClass("testDataset")
        registry.storageClasses.registerStorageClass(storageClass)
        rawType = DatasetType(name="RAW",
                              dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
                              storageClass=storageClass)
        registry.registerDatasetType(rawType)
        calexpType = DatasetType(name="CALEXP",
                                 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
                                 storageClass=storageClass)
        registry.registerDatasetType(calexpType)

        # add pre-existing datasets
        for exposure in (100, 101, 110, 111):
            for detector in (1, 2, 3):
                # note that only 3 of 5 detectors have datasets
                dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
                ref = registry.addDataset(rawType, dataId=dataId, run=run)
                # exposures 100 and 101 appear in both collections, 100 has
                # different dataset_id in different collections, for 101 only
                # single dataset_id exists
                if exposure == 100:
                    registry.addDataset(rawType, dataId=dataId, run=run2)
                if exposure == 101:
                    registry.associate(run2.collection, [ref])
        # Add pre-existing datasets to second collection.
        for exposure in (200, 201):
            for detector in (3, 4, 5):
                # note that only 3 of 5 detectors have datasets
                dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
                registry.addDataset(rawType, dataId=dataId, run=run2)

        dimensions = registry.dimensions.empty.union(rawType.dimensions, calexpType.dimensions,
                                                     implied=True)

        # with empty expression
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1])
        rows = list(builder.execute())
        self.assertEqual(len(rows), 4*3)   # 4 exposures times 3 detectors
        for dataId in rows:
            self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit",
                                                  "physical_filter", "abstract_filter"))
            packer1 = registry.makeDataIdPacker("visit_detector", dataId)
            packer2 = registry.makeDataIdPacker("exposure_detector", dataId)
            self.assertEqual(packer1.unpack(packer1.pack(dataId)),
                             DataId(dataId, dimensions=packer1.dimensions.required))
            self.assertEqual(packer2.unpack(packer2.pack(dataId)),
                             DataId(dataId, dimensions=packer2.dimensions.required))
            self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
                              (100, 101, 110, 111))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))

        # second collection
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection2])
        rows = list(builder.execute())
        self.assertEqual(len(rows), 4*3)   # 4 exposures times 3 detectors
        for dataId in rows:
            self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit",
                                                  "physical_filter", "abstract_filter"))
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
                              (100, 101, 200, 201))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))

        # with two input datasets
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1, collection2])
        rows = list(builder.execute())
        self.assertEqual(len(set(rows)), 6*3)   # 6 exposures times 3 detectors; set needed to de-dupe
        for dataId in rows:
            self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit",
                                                  "physical_filter", "abstract_filter"))
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
                              (100, 101, 110, 111, 200, 201))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))

        # limit to single visit
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1])
        builder.whereParsedExpression("visit.visit = 10")
        rows = list(builder.execute())
        self.assertEqual(len(rows), 2*3)   # 2 exposures times 3 detectors
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))

        # more limiting expression, using link names instead of Table.column
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1])
        builder.whereParsedExpression("visit = 10 and detector > 1")
        rows = list(builder.execute())
        self.assertEqual(len(rows), 2*2)   # 2 exposures times 2 detectors
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))

        # expression excludes everything
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1])
        builder.whereParsedExpression("visit.visit > 1000")
        rows = list(builder.execute())
        self.assertEqual(len(rows), 0)

        # Selecting by physical_filter, this is not in the dimensions, but it
        # is a part of the full expression so it should work too.
        builder = DataIdQueryBuilder.fromDimensions(registry, dimensions)
        builder.requireDataset(rawType, collections=[collection1])
        builder.whereParsedExpression("physical_filter = 'dummy_r'")
        rows = list(builder.execute())
        self.assertEqual(len(rows), 2*3)   # 2 exposures times 3 detectors
        self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
        self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
        self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))