def updateVisitEntryFromObsInfo(dataId, obsInfo): """Construct a visit Dimension entry from `astro_metadata_translator.ObservationInfo`. Parameters ---------- dataId : `dict` or `DataId` Dictionary of Dimension link fields for (at least) visit. If a true `DataId`, this object will be modified and returned. obsInfo : `astro_metadata_translator.ObservationInfo` A `~astro_metadata_translator.ObservationInfo` object corresponding to the exposure. Returns ------- dataId : `DataId` A data ID with the entry for the visit dimension updated. """ dataId = DataId(dataId) dataId.entries[dataId.dimensions()["visit"]].update( datetime_begin=obsInfo.datetime_begin.to_datetime(), datetime_end=obsInfo.datetime_end.to_datetime(), exposure_time=obsInfo.exposure_time.to_value("s"), ) return dataId
def addUnboundedCalibrationLabel(registry, instrumentName): """Add a special 'unbounded' calibration_label dimension entry for the given camera that is valid for any exposure. If such an entry already exists, this function just returns a `DataId` for the existing entry. Parameters ---------- registry : `Registry` Registry object in which to insert the dimension entry. instrumentName : `str` Name of the instrument this calibration label is associated with. Returns ------- dataId : `DataId` New or existing data ID for the unbounded calibration. """ d = dict(instrument=instrumentName, calibration_label="unbounded") try: return registry.expandDataId(dimension="calibration_label", metadata=["valid_first", "valid_last"], **d) except LookupError: pass unboundedDataId = DataId(universe=registry.dimensions, **d) unboundedDataId.entries["calibration_label"]["valid_first"] = datetime.min unboundedDataId.entries["calibration_label"]["valid_last"] = datetime.max registry.addDimensionEntry("calibration_label", unboundedDataId) return unboundedDataId
def testConstructor(self): """Test for making new instances. """ dataId = DataId(dict(instrument="DummyInstrument", detector=1, visit=2, physical_filter="i"), universe=self.universe) self.assertEqual(len(dataId), 4) self.assertCountEqual( dataId.keys(), ("instrument", "detector", "visit", "physical_filter"))
def testSkyMapPacking(self): """Test that packing Tract+Patch into an integer in Gen3 works and is self-consistent. Note that this packing does *not* use the same algorithm as Gen2 and hence generates different IDs, because the Gen2 algorithm is problematically tied to the *default* SkyMap for a particular camera, rather than the SkyMap actually used. """ # SkyMap used by ci_hsc has only one tract, so the test coverage in # that area isn't great. That's okay because that's tested in SkyMap; # what we care about here is that the converted repo has the necessary # metadata to construct and use these packers at all. for patch in [0, 43, 52]: dataId = self.butler.registry.expandDataId(skymap="ci_hsc", tract=0, patch=patch, abstract_filter='r') packer1 = self.butler.registry.makeDataIdPacker( "tract_patch", dataId) packer2 = self.butler.registry.makeDataIdPacker( "tract_patch_abstract_filter", dataId) self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) self.assertEqual( packer1.unpack(packer1.pack(dataId)), DataId(dataId, dimensions=packer1.dimensions.required)) self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId) self.assertEqual(packer1.pack(dataId, abstract_filter='i'), packer1.pack(dataId)) self.assertNotEqual(packer2.pack(dataId, abstract_filter='i'), packer2.pack(dataId))
def unpack(self, packedId): # Docstring inherited from DataIdPacker.unpack return DataId( { "instrument": self._instrumentName, "detector": packedId % self._detectorMax, self._observationLink: packedId // self._detectorMax }, dimensions=self.dimensions.required)
def writeCuratedCalibrations(self, butler): """Write human-curated calibration Datasets to the given Butler with the appropriate validity ranges. This is a temporary API that should go away once obs_ packages have a standardized approach to this problem. """ # Write cameraGeom.Camera, with an infinite validity range. datasetType = DatasetType("camera", ("Instrument", "CalibrationLabel"), "TablePersistableCamera") butler.registry.registerDatasetType(datasetType) unboundedDataId = addUnboundedCalibrationLabel(butler.registry, self.getName()) camera = self.getCamera() butler.put(camera, datasetType, unboundedDataId) # Write brighter-fatter kernel, with an infinite validity range. datasetType = DatasetType("bfKernel", ("Instrument", "CalibrationLabel"), "NumpyArray") butler.registry.registerDatasetType(datasetType) # Load and then put instead of just moving the file in part to ensure # the version in-repo is written with Python 3 and does not need # `encoding='latin1'` to be read. bfKernel = self.getBrighterFatterKernel() butler.put(bfKernel, datasetType, unboundedDataId) # Write defects with validity ranges taken from obs_subaru/hsc/defects # (along with the defects themselves). datasetType = DatasetType("defects", ("Instrument", "Detector", "CalibrationLabel"), "Catalog") butler.registry.registerDatasetType(datasetType) defectPath = os.path.join(getPackageDir("obs_subaru"), "hsc", "defects") dbPath = os.path.join(defectPath, "defectRegistry.sqlite3") db = sqlite3.connect(dbPath) db.row_factory = sqlite3.Row sql = "SELECT path, ccd, validStart, validEnd FROM defect" with butler.transaction(): for row in db.execute(sql): dataId = DataId(universe=butler.registry.dimensions, instrument=self.getName(), calibration_label=f"defect/{row['path']}/{row['ccd']}") dataId.entries["CalibrationLabel"]["valid_first"] = readDateTime(row["validStart"]) dataId.entries["CalibrationLabel"]["valid_last"] = readDateTime(row["validEnd"]) butler.registry.addDimensionEntry("CalibrationLabel", dataId) ref = butler.registry.addDataset(datasetType, dataId, run=butler.run, recursive=True, detector=row['ccd']) butler.datastore.ingest(os.path.join(defectPath, row["path"]), ref, transfer="copy")
def unpack(self, packedId): # Docstring inherited from DataIdPacker.unpack d = {"skymap": self._skyMapName} if self._filterMax is not None: d["abstract_filter"] = self.getFilterNameFromInt( packedId // self._tractPatchMax) packedId %= self._tractPatchMax d["tract"] = packedId // self._patchMax d["patch"] = packedId % self._patchMax return DataId(d, dimensions=self.dimensions.required)
def testDataIdPacker(self): registry = self.makeRegistry() if registry.limited: return registry.addDimensionEntry( "instrument", { "instrument": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2 }) registry.addDimensionEntry( "physical_filter", { "instrument": "DummyCam", "physical_filter": "R", "abstract_filter": "r" }) registry.addDimensionEntry("visit", { "instrument": "DummyCam", "visit": 5, "physical_filter": "R" }) registry.addDimensionEntry( "exposure", { "instrument": "DummyCam", "exposure": 4, "visit": 5, "physical_filter": "R" }) dataId0 = registry.expandDataId(instrument="DummyCam") with self.assertRaises(LookupError): registry.packDataId("visit_detector", dataId0) registry.packDataId("exposure_detector", dataId0) dataId1 = DataId(dataId0, visit=5, detector=1) self.assertEqual(registry.packDataId("visit_detector", dataId1), 11) packer = registry.makeDataIdPacker("exposure_detector", dataId0) dataId2 = DataId(dataId0, exposure=4, detector=0) self.assertEqual(packer.pack(dataId0, exposure=4, detector=0), 8) self.assertEqual(packer.pack(dataId2), 8) self.assertEqual(registry.packDataId("exposure_detector", dataId2), 8) dataId2a = packer.unpack(8) self.assertEqual(dataId2, dataId2a)
def testPickle(self): """Test pickle support. """ dataId = DataId(dict(instrument="DummyInstrument", detector=1, visit=2, physical_filter="i"), universe=self.universe) dataIdOut = pickle.loads(pickle.dumps(dataId)) self.assertIsInstance(dataIdOut, DataId) self.assertEqual(dataId, dataIdOut)
def testWithoutFilter(self): covered = DimensionSet(universe=self.universe, elements=["tract", "patch"]) dimensions = DataIdPackerDimensions(given=self.given, required=self.given.union(covered)) dataId = DataId(skymap=self.parameters["skymap"], tract=2, patch=6, universe=self.universe) packer = SkyMapDataIdPacker(dimensions, **self.parameters) packedId = packer.pack(dataId) self.assertLessEqual(packedId.bit_length(), packer.maxBits) self.assertEqual(packer.unpack(packedId), dataId)
def _refFromConnection(butler: Butler, connection: DimensionedConnection, dataId: DataId, **kwargs: Any) -> DatasetRef: """Create a DatasetRef for a connection in a collection. Parameters ---------- butler : `lsst.daf.butler.Butler` The collection to point to. connection : `lsst.pipe.base.connectionTypes.DimensionedConnection` The connection defining the dataset type to point to. dataId The data ID for the dataset to point to. **kwargs Additional keyword arguments used to augment or construct a `~lsst.daf.butler.DataCoordinate`. Returns ------- ref : `lsst.daf.butler.DatasetRef` A reference to a dataset compatible with ``connection``, with ID ``dataId``, in the collection pointed to by ``butler``. """ universe = butler.registry.dimensions # DatasetRef only tests if required dimension is missing, but not extras _checkDimensionsMatch(universe, set(connection.dimensions), dataId.keys()) dataId = DataCoordinate.standardize(dataId, **kwargs, universe=universe) # skypix is a PipelineTask alias for "some spatial index", Butler doesn't # understand it. Code copied from TaskDatasetTypes.fromTaskDef if "skypix" in connection.dimensions: datasetType = butler.registry.getDatasetType(connection.name) else: datasetType = connection.makeDatasetType(universe) try: butler.registry.getDatasetType(datasetType.name) except KeyError: raise ValueError(f"Invalid dataset type {connection.name}.") try: ref = DatasetRef(datasetType=datasetType, dataId=dataId) return ref except KeyError as e: raise ValueError( f"Dataset type ({connection.name}) and ID {dataId.byName()} not compatible." ) from e
def ensureDimensions(self, file): """Extract metadata from a raw file and add Exposure and Visit Dimension entries. Any needed Instrument, Detector, and PhysicalFilter Dimension entries must exist in the Registry before `run` is called. Parameters ---------- file : `str` or path-like object Absolute path to the file to be ingested. Returns ------- headers : `list` of `~lsst.daf.base.PropertyList` Result of calling `readHeaders`. dataId : `DataId` Data ID dictionary, as returned by `extractDataId`. """ headers = self.readHeaders(file) obsInfo = ObservationInfo(headers[0]) # Extract a DataId that covers all of self.dimensions. fullDataId = self.extractDataId(file, headers, obsInfo=obsInfo) for dimension in self.dimensions: dimensionDataId = DataId(fullDataId, dimension=dimension) if dimensionDataId not in self.dimensionEntriesDone[dimension]: # Next look in the Registry dimensionEntryDict = self.butler.registry.findDimensionEntry( dimension, dimensionDataId) if dimensionEntryDict is None: if dimension.name in ("Visit", "Exposure"): # Add the entry into the Registry. self.butler.registry.addDimensionEntry( dimension, dimensionDataId) else: raise LookupError( f"Entry for {dimension.name} with ID {dimensionDataId} not found; must be " f"present in Registry prior to ingest.") # Record that we've handled this entry. self.dimensionEntriesDone[dimension].add(dimensionDataId) return headers, fullDataId
def testSkyPixIndirection(self): """Test that SingleDatasetQueryBuilder can look up datasets with skypix dimensions from a data ID with visit+detector dimensions. """ # exposure <-> calibration_label lookups for master calibrations refcat = DatasetType( "refcat", self.registry.dimensions.extract(["skypix"]), "ImageU" ) builder = SingleDatasetQueryBuilder.fromSingleCollection(self.registry, refcat, collection="refcats") newLinks = builder.relateDimensions( self.registry.dimensions.extract(["instrument", "visit", "detector"], implied=True) ) self.assertEqual(newLinks, set(["instrument", "visit", "detector"])) self.assertIsNotNone(builder.findSelectableByName("visit_detector_skypix_join")) usedLinks = builder.whereDataId(DataId(instrument="HSC", visit=12, detector=34, physical_filter="HSC-R2", abstract_filter="r", universe=self.registry.dimensions)) self.assertEqual(usedLinks, set(["instrument", "visit", "detector"]))
def extractDataId(self, file, headers, obsInfo): """Return the Data ID dictionary that should be used to label a file. Parameters ---------- file : `str` or path-like object Absolute path to the file being ingested (prior to any transfers). headers : `list` of `~lsst.daf.base.PropertyList` All headers returned by `readHeaders()`. obsInfo : `astro_metadata_translator.ObservationInfo` Observational metadata extracted from the headers. Returns ------- dataId : `DataId` A mapping whose key-value pairs uniquely identify raw datasets. Must have ``dataId.dimensions() <= self.dimensions``, with at least Instrument, Exposure, and Detector present. """ toRemove = set() if obsInfo.visit_id is None: toRemove.add("Visit") if obsInfo.physical_filter is None: toRemove.add("PhysicalFilter") if toRemove: dimensions = self.dimensions.difference(toRemove) else: dimensions = self.dimensions dataId = DataId( dimensions=dimensions, instrument=obsInfo.instrument, exposure=obsInfo.exposure_id, visit=obsInfo.visit_id, detector=obsInfo.detector_num, physical_filter=obsInfo.physical_filter, ) updateExposureEntryFromObsInfo(dataId, obsInfo) if obsInfo.visit_id is not None: updateVisitEntryFromObsInfo(dataId, obsInfo) return dataId
def testCalibrationLabelIndirection(self): """Test that SingleDatasetQueryBuilder can look up datasets with calibration_label dimensions from a data ID with exposure dimensions. """ # exposure <-> calibration_label lookups for master calibrations flat = DatasetType( "flat", self.registry.dimensions.extract( ["instrument", "detector", "physical_filter", "calibration_label"] ), "ImageU" ) builder = SingleDatasetQueryBuilder.fromSingleCollection(self.registry, flat, collection="calib") newLinks = builder.relateDimensions( self.registry.dimensions.extract(["instrument", "exposure", "detector"], implied=True) ) self.assertEqual(newLinks, set(["exposure"])) self.assertIsNotNone(builder.findSelectableByName("exposure_calibration_label_join")) usedLinks = builder.whereDataId(DataId(instrument="HSC", exposure=12, detector=34, physical_filter="HSC-R2", abstract_filter="r", universe=self.registry.dimensions)) self.assertEqual(usedLinks, set(["instrument", "exposure", "detector", "physical_filter"]))
def writeCuratedCalibrations(self, butler): """Write human-curated calibration Datasets to the given Butler with the appropriate validity ranges. This is a temporary API that should go away once obs_ packages have a standardized approach to this problem. """ # Write cameraGeom.Camera, with an infinite validity range. datasetType = DatasetType("camera", ("instrument", "calibration_label"), "TablePersistableCamera", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) unboundedDataId = addUnboundedCalibrationLabel(butler.registry, self.getName()) camera = self.getCamera() butler.put(camera, datasetType, unboundedDataId) # Write brighter-fatter kernel, with an infinite validity range. datasetType = DatasetType("bfKernel", ("instrument", "calibration_label"), "NumpyArray", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Load and then put instead of just moving the file in part to ensure # the version in-repo is written with Python 3 and does not need # `encoding='latin1'` to be read. bfKernel = self.getBrighterFatterKernel() butler.put(bfKernel, datasetType, unboundedDataId) # The following iterate over the values of the dictionaries returned by the transmission functions # and ignore the date that is supplied. This is due to the dates not being ranges but single dates, # which do not give the proper notion of validity. As such unbounded calibration labels are used # when inserting into the database. In the future these could and probably should be updated to # properly account for what ranges are considered valid. # Write optical transmissions opticsTransmissions = getOpticsTransmission() datasetType = DatasetType("transmission_optics", ("instrument", "calibration_label"), "TablePersistableTransmissionCurve", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) for entry in opticsTransmissions.values(): if entry is None: continue butler.put(entry, datasetType, unboundedDataId) # Write transmission sensor sensorTransmissions = getSensorTransmission() datasetType = DatasetType( "transmission_sensor", ("instrument", "detector", "calibration_label"), "TablePersistableTransmissionCurve", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) for entry in sensorTransmissions.values(): if entry is None: continue for sensor, curve in entry.items(): dataId = DataId(unboundedDataId, detector=sensor) butler.put(curve, datasetType, dataId) # Write filter transmissions filterTransmissions = getFilterTransmission() datasetType = DatasetType( "transmission_filter", ("instrument", "physical_filter", "calibration_label"), "TablePersistableTransmissionCurve", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) for entry in filterTransmissions.values(): if entry is None: continue for band, curve in entry.items(): dataId = DataId(unboundedDataId, physical_filter=band) butler.put(curve, datasetType, dataId) # Write atmospheric transmissions, this only as dimension of instrument as other areas will only # look up along this dimension (ISR) atmosphericTransmissions = getAtmosphereTransmission() datasetType = DatasetType("transmission_atmosphere", ("instrument", ), "TablePersistableTransmissionCurve", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) for entry in atmosphericTransmissions.values(): if entry is None: continue butler.put(entry, datasetType, {"instrument": self.getName()}) # Write defects with validity ranges taken from obs_subaru_data/hsc/defects # (along with the defects themselves). datasetType = DatasetType( "defects", ("instrument", "detector", "calibration_label"), "DefectsList", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) defectPath = os.path.join(getPackageDir("obs_subaru_data"), "hsc", "defects") camera = self.getCamera() defectsDict = read_all_defects(defectPath, camera) endOfTime = '20380119T031407' with butler.transaction(): for det in defectsDict: detector = camera[det] times = sorted([k for k in defectsDict[det]]) defects = [defectsDict[det][time] for time in times] times = times + [ parser.parse(endOfTime), ] for defect, beginTime, endTime in zip(defects, times[:-1], times[1:]): md = defect.getMetadata() dataId = DataId( universe=butler.registry.dimensions, instrument=self.getName(), calibration_label= f"defect/{md['CALIBDATE']}/{md['DETECTOR']}") dataId.entries["calibration_label"][ "valid_first"] = beginTime dataId.entries["calibration_label"]["valid_last"] = endTime butler.registry.addDimensionEntry("calibration_label", dataId) butler.put(defect, datasetType, dataId, detector=detector.getId())
def makeQuantum( task: PipelineTask, butler: Butler, dataId: DataId, ioDataIds: Mapping[str, Union[DataId, Sequence[DataId]]], ) -> Quantum: """Create a Quantum for a particular data ID(s). Parameters ---------- task : `lsst.pipe.base.PipelineTask` The task whose processing the quantum represents. butler : `lsst.daf.butler.Butler` The collection the quantum refers to. dataId: any data ID type The data ID of the quantum. Must have the same dimensions as ``task``'s connections class. ioDataIds : `collections.abc.Mapping` [`str`] A mapping keyed by input/output names. Values must be data IDs for single connections and sequences of data IDs for multiple connections. Returns ------- quantum : `lsst.daf.butler.Quantum` A quantum for ``task``, when called with ``dataIds``. """ connections = task.config.ConnectionsClass(config=task.config) try: _checkDimensionsMatch(butler.registry.dimensions, connections.dimensions, dataId.keys()) except ValueError as e: raise ValueError("Error in quantum dimensions.") from e inputs = defaultdict(list) outputs = defaultdict(list) for name in itertools.chain(connections.inputs, connections.prerequisiteInputs): try: connection = connections.__getattribute__(name) _checkDataIdMultiplicity(name, ioDataIds[name], connection.multiple) ids = _normalizeDataIds(ioDataIds[name]) for id in ids: ref = _refFromConnection(butler, connection, id) inputs[ref.datasetType].append(ref) except (ValueError, KeyError) as e: raise ValueError(f"Error in connection {name}.") from e for name in connections.outputs: try: connection = connections.__getattribute__(name) _checkDataIdMultiplicity(name, ioDataIds[name], connection.multiple) ids = _normalizeDataIds(ioDataIds[name]) for id in ids: ref = _refFromConnection(butler, connection, id) outputs[ref.datasetType].append(ref) except (ValueError, KeyError) as e: raise ValueError(f"Error in connection {name}.") from e quantum = Quantum( taskClass=type(task), dataId=DataCoordinate.standardize(dataId, universe=butler.registry.dimensions), inputs=inputs, outputs=outputs, ) return quantum
def traverse(self): """Return topologically ordered Quanta and their dependencies. This method iterates over all Quanta in topological order, enumerating them during iteration. Returned `QuantumIterData` object contains Quantum instance, its ``quantumId`` and ``quantumId`` of all its prerequsites (Quanta that produce inputs for this Quantum): - the ``quantumId`` values are generated by an iteration of a QuantumGraph, and are not intrinsic to the QuantumGraph - during iteration, each ID will appear in quantumId before it ever appears in dependencies. Yields ------ quantumData : `QuantumIterData` """ def orderedTaskNodes(graph): """Return topologically ordered task nodes. Yields ------ nodes : `QuantumGraphTaskNodes` """ # Tasks in a graph are probably topologically sorted already but there # is no guarantee for that. Just re-construct Pipeline and order tasks # in a pipeline using existing method. nodesMap = {id(item.taskDef): item for item in graph} pipeline = orderPipeline(Pipeline(item.taskDef for item in graph)) for taskDef in pipeline: yield nodesMap[id(taskDef)] index = 0 outputs = {} # maps (DatasetType.name, DataId) to its producing quantum index for nodes in orderedTaskNodes(self): for quantum in nodes.quanta: # Find quantum dependencies (must be in `outputs` already) prereq = [] for dataRef in chain.from_iterable(quantum.predictedInputs.values()): # if data exists in butler then `id` is not None if dataRef.id is None: key = (dataRef.datasetType.name, DataId(dataRef.dataId)) try: prereq.append(outputs[key]) except KeyError: # The Quantum that makes our inputs is not in the graph, # this could happen if we run on a "split graph" which is # usually just one quantum. Check for number of Quanta # in a graph and ignore error if it's just one. # TODO: This code has to be removed or replaced with # something more generic if not (len(self) == 1 and len(self[0].quanta) == 1): raise # Update `outputs` with this quantum outputs for dataRef in chain.from_iterable(quantum.outputs.values()): key = (dataRef.datasetType.name, DataId(dataRef.dataId)) outputs[key] = index yield QuantumIterData(index, quantum, nodes.taskDef, prereq) index += 1
def testInstrumentDimensions(self): """Test involving only instrument dimensions, no joins to skymap""" registry = self.registry # need a bunch of dimensions and datasets for test registry.addDimensionEntry("instrument", dict(instrument="DummyCam", visit_max=25, exposure_max=300, detector_max=6)) registry.addDimensionEntry("physical_filter", dict(instrument="DummyCam", physical_filter="dummy_r", abstract_filter="r")) registry.addDimensionEntry("physical_filter", dict(instrument="DummyCam", physical_filter="dummy_i", abstract_filter="i")) for detector in (1, 2, 3, 4, 5): registry.addDimensionEntry("detector", dict(instrument="DummyCam", detector=detector)) registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=10, physical_filter="dummy_i")) registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=11, physical_filter="dummy_r")) registry.addDimensionEntry("visit", dict(instrument="DummyCam", visit=20, physical_filter="dummy_r")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=100, visit=10, physical_filter="dummy_i")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=101, visit=10, physical_filter="dummy_i")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=110, visit=11, physical_filter="dummy_r")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=111, visit=11, physical_filter="dummy_r")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=200, visit=20, physical_filter="dummy_r")) registry.addDimensionEntry("exposure", dict(instrument="DummyCam", exposure=201, visit=20, physical_filter="dummy_r")) # dataset types collection1 = "test" collection2 = "test2" run = registry.makeRun(collection=collection1) run2 = registry.makeRun(collection=collection2) storageClass = StorageClass("testDataset") registry.storageClasses.registerStorageClass(storageClass) rawType = DatasetType(name="RAW", dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), storageClass=storageClass) registry.registerDatasetType(rawType) calexpType = DatasetType(name="CALEXP", dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), storageClass=storageClass) registry.registerDatasetType(calexpType) # add pre-existing datasets for exposure in (100, 101, 110, 111): for detector in (1, 2, 3): # note that only 3 of 5 detectors have datasets dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) ref = registry.addDataset(rawType, dataId=dataId, run=run) # exposures 100 and 101 appear in both collections, 100 has # different dataset_id in different collections, for 101 only # single dataset_id exists if exposure == 100: registry.addDataset(rawType, dataId=dataId, run=run2) if exposure == 101: registry.associate(run2.collection, [ref]) # Add pre-existing datasets to second collection. for exposure in (200, 201): for detector in (3, 4, 5): # note that only 3 of 5 detectors have datasets dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) registry.addDataset(rawType, dataId=dataId, run=run2) dimensions = registry.dimensions.empty.union(rawType.dimensions, calexpType.dimensions, implied=True) # with empty expression builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1]) rows = list(builder.execute()) self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors for dataId in rows: self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit", "physical_filter", "abstract_filter")) packer1 = registry.makeDataIdPacker("visit_detector", dataId) packer2 = registry.makeDataIdPacker("exposure_detector", dataId) self.assertEqual(packer1.unpack(packer1.pack(dataId)), DataId(dataId, dimensions=packer1.dimensions.required)) self.assertEqual(packer2.unpack(packer2.pack(dataId)), DataId(dataId, dimensions=packer2.dimensions.required)) self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) # second collection builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection2]) rows = list(builder.execute()) self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors for dataId in rows: self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit", "physical_filter", "abstract_filter")) self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) # with two input datasets builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1, collection2]) rows = list(builder.execute()) self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe for dataId in rows: self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit", "physical_filter", "abstract_filter")) self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) # limit to single visit builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1]) builder.whereParsedExpression("visit.visit = 10") rows = list(builder.execute()) self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) # more limiting expression, using link names instead of Table.column builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1]) builder.whereParsedExpression("visit = 10 and detector > 1") rows = list(builder.execute()) self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) # expression excludes everything builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1]) builder.whereParsedExpression("visit.visit > 1000") rows = list(builder.execute()) self.assertEqual(len(rows), 0) # Selecting by physical_filter, this is not in the dimensions, but it # is a part of the full expression so it should work too. builder = DataIdQueryBuilder.fromDimensions(registry, dimensions) builder.requireDataset(rawType, collections=[collection1]) builder.whereParsedExpression("physical_filter = 'dummy_r'") rows = list(builder.execute()) self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
def writeCuratedCalibrations(self, butler): """Write human-curated calibration Datasets to the given Butler with the appropriate validity ranges. This is a temporary API that should go away once obs_ packages have a standardized approach to this problem. """ # Write cameraGeom.Camera, with an infinite validity range. datasetType = DatasetType("camera", ("instrument", "calibration_label"), "TablePersistableCamera") butler.registry.registerDatasetType(datasetType) unboundedDataId = addUnboundedCalibrationLabel(butler.registry, self.getName()) camera = self.getCamera() butler.put(camera, datasetType, unboundedDataId) # Write brighter-fatter kernel, with an infinite validity range. datasetType = DatasetType("bfKernel", ("instrument", "calibration_label"), "NumpyArray") butler.registry.registerDatasetType(datasetType) # Load and then put instead of just moving the file in part to ensure # the version in-repo is written with Python 3 and does not need # `encoding='latin1'` to be read. bfKernel = self.getBrighterFatterKernel() butler.put(bfKernel, datasetType, unboundedDataId) # The following iterate over the values of the dictionaries returned by the transmission functions # and ignore the date that is supplied. This is due to the dates not being ranges but single dates, # which do not give the proper notion of validity. As such unbounded calibration labels are used # when inserting into the database. In the future these could and probably should be updated to # properly account for what ranges are considered valid. # Write optical transmissions opticsTransmissions = getOpticsTransmission() datasetType = DatasetType("transmission_optics", ("instrument", "calibration_label"), "TablePersistableTransmissionCurve") butler.registry.registerDatasetType(datasetType) for entry in opticsTransmissions.values(): if entry is None: continue butler.put(entry, datasetType, unboundedDataId) # Write transmission sensor sensorTransmissions = getSensorTransmission() datasetType = DatasetType("transmission_sensor", ("instrument", "detector", "calibration_label"), "TablePersistableTransmissionCurve") butler.registry.registerDatasetType(datasetType) for entry in sensorTransmissions.values(): if entry is None: continue for sensor, curve in entry.items(): dataId = DataId(unboundedDataId, detector=sensor) butler.put(curve, datasetType, dataId) # Write filter transmissions filterTransmissions = getFilterTransmission() datasetType = DatasetType("transmission_filter", ("instrument", "physical_filter", "calibration_label"), "TablePersistableTransmissionCurve") butler.registry.registerDatasetType(datasetType) for entry in filterTransmissions.values(): if entry is None: continue for band, curve in entry.items(): dataId = DataId(unboundedDataId, physical_filter=band) butler.put(curve, datasetType, dataId) # Write atmospheric transmissions, this only as dimension of instrument as other areas will only # look up along this dimension (ISR) atmosphericTransmissions = getAtmosphereTransmission() datasetType = DatasetType("transmission_atmosphere", ("instrument",), "TablePersistableTransmissionCurve") butler.registry.registerDatasetType(datasetType) for entry in atmosphericTransmissions.values(): if entry is None: continue butler.put(entry, datasetType, {"instrument": self.getName()}) # Write defects with validity ranges taken from obs_subaru/hsc/defects # (along with the defects themselves). datasetType = DatasetType("defects", ("instrument", "detector", "calibration_label"), "DefectsList") butler.registry.registerDatasetType(datasetType) defectPath = os.path.join(getPackageDir("obs_subaru"), "hsc", "defects") dbPath = os.path.join(defectPath, "defectRegistry.sqlite3") db = sqlite3.connect(dbPath) db.row_factory = sqlite3.Row sql = "SELECT path, ccd, validStart, validEnd FROM defect" with butler.transaction(): for row in db.execute(sql): dataId = DataId(universe=butler.registry.dimensions, instrument=self.getName(), calibration_label=f"defect/{row['path']}/{row['ccd']}") dataId.entries["calibration_label"]["valid_first"] = readDateTime(row["validStart"]) dataId.entries["calibration_label"]["valid_last"] = readDateTime(row["validEnd"]) butler.registry.addDimensionEntry("calibration_label", dataId) ref = butler.registry.addDataset(datasetType, dataId, run=butler.run, recursive=True, detector=row['ccd']) butler.datastore.ingest(os.path.join(defectPath, row["path"]), ref, transfer="copy")