def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes): universe = parent.dimensions.universe self.taskDef = taskDef self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions) assert self.dimensions.issubset(parent.dimensions) # Initialize _DatasetDicts as subsets of the one or two # corresponding dicts in the parent _PipelineScaffolding. self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs, parent.initInputs, parent.initIntermediates) self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs, parent.initIntermediates, parent.initOutputs) self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs, parent.inputs, parent.intermediates) self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs, parent.intermediates, parent.outputs) self.prerequisites = _DatasetDict.fromSubset( datasetTypes.prerequisites, parent.prerequisites) self.dataIds = set() self.quanta = {}
def randomDimensionSubset( self, n: int = 3, graph: Optional[DimensionGraph] = None) -> DimensionGraph: """Generate a random `DimensionGraph` that has a subset of the dimensions in a given one. Parameters ---------- n : `int` Number of dimensions to select, before automatic expansion by `DimensionGraph`. dataIds : `DimensionGraph`, optional Dimensions to select ffrom. Defaults to ``self.allDataIds.graph``. Returns ------- selected : `DimensionGraph` ``n`` or more dimensions randomly selected from ``graph`` with replacement. """ if graph is None: graph = self.allDataIds.graph return DimensionGraph(graph.universe, names=self.rng.sample( list(graph.dimensions.names), max(n, len(graph.dimensions))))
def runDefineVisits(self, pool=None): if self.task.defineVisits is None: return dimensions = DimensionGraph(self.task.universe, names=["exposure"]) exposureDataIds = set(ref.dataId.subset(dimensions) for ref in self._rawRefs) self.task.log.info("Defining visits from exposures.") self.task.defineVisits.run(exposureDataIds, pool=pool)
def testCalibrationDimensions(self): graph = DimensionGraph(self.universe, names=("physical_filter", "detector")) self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "physical_filter", "band")) self.assertCountEqual(graph.required.names, ("instrument", "detector", "physical_filter")) self.assertCountEqual(graph.implied.names, ("band",)) self.assertCountEqual(graph.elements.names, graph.dimensions.names) self.assertCountEqual(graph.governors.names, {"instrument"})
def testSubsetCalculation(self): """Test that independent spatial and temporal options are computed correctly. """ graph = DimensionGraph(self.universe, names=("visit", "detector", "tract", "patch", "htm7", "exposure")) self.assertCountEqual(graph.spatial.names, ("observation_regions", "skymap_regions", "htm")) self.assertCountEqual(graph.temporal.names, ("observation_timespans",))
def testSkyMapDimensions(self): graph = DimensionGraph(self.universe, names=("patch",)) self.assertCountEqual(graph.dimensions.names, ("skymap", "tract", "patch")) self.assertCountEqual(graph.required.names, ("skymap", "tract", "patch")) self.assertCountEqual(graph.implied.names, ()) self.assertCountEqual(graph.elements.names, graph.dimensions.names) self.assertCountEqual(graph.spatial.names, ("skymap_regions",)) self.assertCountEqual(graph.governors.names, {"skymap"}) self.assertEqual(graph.spatial.names, {"skymap_regions"}) self.assertEqual(next(iter(graph.spatial)).governor, self.universe["skymap"])
def testInstrumentDimensions(self): graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit")) self.assertCountEqual(graph.dimensions.names, ("instrument", "exposure", "detector", "visit", "physical_filter", "band", "visit_system")) self.assertCountEqual(graph.required.names, ("instrument", "exposure", "detector", "visit")) self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system")) self.assertCountEqual(graph.elements.names - graph.dimensions.names, ("visit_detector_region", "visit_definition")) self.assertCountEqual(graph.governors.names, {"instrument"})
def testWithoutFilter(self): dimensions = DimensionGraph(universe=self.universe, names=["tract", "patch"]) dataId = DataCoordinate.standardize(skymap=self.fixed["skymap"], tract=2, patch=6, universe=self.universe) packer = SkyMapDimensionPacker(self.fixed, dimensions) packedId = packer.pack(dataId) self.assertLessEqual(packedId.bit_length(), packer.maxBits) self.assertEqual(packer.unpack(packedId), dataId)
def setUp(self): self.universe = DimensionUniverse() self.fixed = ExpandedDataCoordinate( DimensionGraph(universe=self.universe, names=["skymap"]), values=("unimportant", ), records={ "skymap": self.universe["skymap"].RecordClass.fromDict({ "name": "unimportant", "tract_max": 5, "patch_nx_max": 3, "patch_ny_max": 3, }) })
def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass", run="run2", conform=True): """Make a simple DatasetRef""" if dataId is None: dataId = self.dataId # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()), StorageClass(storageClassName), parentStorageClass=parentStorageClass) return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
def setUp(self): self.universe = DimensionUniverse() self.fixed = DataCoordinate.fromFullValues( DimensionGraph(universe=self.universe, names=["skymap"]), values=("unimportant", ), ).expanded( records={ "skymap": self.universe["skymap"].RecordClass( name="unimportant", tract_max=5, patch_nx_max=3, patch_ny_max=3, ) })
def testObservationDimensions(self): graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit")) self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "visit", "exposure", "physical_filter", "band", "visit_system")) self.assertCountEqual(graph.required.names, ("instrument", "detector", "exposure", "visit")) self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system")) self.assertCountEqual(graph.elements.names - graph.dimensions.names, ("visit_detector_region", "visit_definition")) self.assertCountEqual(graph.spatial.names, ("observation_regions",)) self.assertCountEqual(graph.temporal.names, ("observation_timespans",)) self.assertCountEqual(graph.governors.names, {"instrument"}) self.assertEqual(graph.spatial.names, {"observation_regions"}) self.assertEqual(graph.temporal.names, {"observation_timespans"}) self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"]) self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"])
def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass", conform=True): """Make a simple DatasetRef""" if dataId is None: dataId = self.dataId datasetType = DatasetType( datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()), StorageClass(storageClassName)) return DatasetRef(datasetType, dataId, id=1, run="run2", conform=conform)
def testRegistryConfig(self): configFile = os.path.join(TESTDIR, "config", "basic", "posixDatastore.yaml") config = Config(configFile) universe = DimensionUniverse() self.factory.registerFormatters(config["datastore", "formatters"], universe=universe) # Create a DatasetRef with and without instrument matching the # one in the config file. dimensions = universe.extract( ("visit", "physical_filter", "instrument")) sc = StorageClass("DummySC", dict, None) refPviHsc = self.makeDatasetRef("pvi", dimensions, sc, { "instrument": "DummyHSC", "physical_filter": "v" }, conform=False) refPviHscFmt = self.factory.getFormatterClass(refPviHsc) self.assertIsFormatter(refPviHscFmt) self.assertIn("JsonFormatter", refPviHscFmt.name()) refPviNotHsc = self.makeDatasetRef("pvi", dimensions, sc, { "instrument": "DummyNotHSC", "physical_filter": "v" }, conform=False) refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc) self.assertIsFormatter(refPviNotHscFmt) self.assertIn("PickleFormatter", refPviNotHscFmt.name()) # Create a DatasetRef that should fall back to using Dimensions refPvixHsc = self.makeDatasetRef("pvix", dimensions, sc, { "instrument": "DummyHSC", "physical_filter": "v" }, conform=False) refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc) self.assertIsFormatter(refPvixNotHscFmt) self.assertIn("PickleFormatter", refPvixNotHscFmt.name()) # Create a DatasetRef that should fall back to using StorageClass dimensionsNoV = DimensionGraph(universe, names=("physical_filter", "instrument")) refPvixNotHscDims = self.makeDatasetRef("pvix", dimensionsNoV, sc, { "instrument": "DummyHSC", "physical_filter": "v" }, conform=False) refPvixNotHscDims_fmt = self.factory.getFormatterClass( refPvixNotHscDims) self.assertIsFormatter(refPvixNotHscDims_fmt) self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name())
def testRegistryConfig(self): configFile = os.path.join(TESTDIR, "config", "basic", "posixDatastore.yaml") config = Config(configFile) universe = DimensionUniverse() self.factory.registerFormatters(config["datastore", "formatters"], universe=universe) # Create a DatasetRef with and without instrument matching the # one in the config file. dimensions = universe.extract(("visit", "physical_filter", "instrument")) sc = StorageClass("DummySC", dict, None) refPviHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyHSC", "physical_filter": "v"}, conform=False) refPviHscFmt = self.factory.getFormatterClass(refPviHsc) self.assertIsFormatter(refPviHscFmt) self.assertIn("JsonFormatter", refPviHscFmt.name()) refPviNotHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyNotHSC", "physical_filter": "v"}, conform=False) refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc) self.assertIsFormatter(refPviNotHscFmt) self.assertIn("PickleFormatter", refPviNotHscFmt.name()) # Create a DatasetRef that should fall back to using Dimensions refPvixHsc = self.makeDatasetRef("pvix", dimensions, sc, {"instrument": "DummyHSC", "physical_filter": "v"}, conform=False) refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc) self.assertIsFormatter(refPvixNotHscFmt) self.assertIn("PickleFormatter", refPvixNotHscFmt.name()) # Create a DatasetRef that should fall back to using StorageClass dimensionsNoV = DimensionGraph(universe, names=("physical_filter", "instrument")) refPvixNotHscDims = self.makeDatasetRef("pvix", dimensionsNoV, sc, {"instrument": "DummyHSC", "physical_filter": "v"}, conform=False) refPvixNotHscDims_fmt = self.factory.getFormatterClass(refPvixNotHscDims) self.assertIsFormatter(refPvixNotHscDims_fmt) self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name()) # Check that parameters are stored refParam = self.makeDatasetRef("paramtest", dimensions, sc, {"instrument": "DummyNotHSC", "physical_filter": "v"}, conform=False) lookup, refParam_fmt, kwargs = self.factory.getFormatterClassWithMatch(refParam) self.assertIn("writeParameters", kwargs) expected = {"max": 5, "min": 2, "comment": "Additional commentary", "recipe": "recipe1"} self.assertEqual(kwargs["writeParameters"], expected) self.assertIn("FormatterTest", refParam_fmt.name()) f = self.factory.getFormatter(refParam, self.fileDescriptor) self.assertEqual(f.writeParameters, expected) f = self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"min": 22, "extra": 50}) self.assertEqual(f.writeParameters, {"max": 5, "min": 22, "comment": "Additional commentary", "extra": 50, "recipe": "recipe1"}) self.assertIn("recipe1", f.writeRecipes) self.assertEqual(f.writeParameters["recipe"], "recipe1") with self.assertRaises(ValueError): # "new" is not allowed as a write parameter self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"new": 1}) with self.assertRaises(RuntimeError): # "mode" is a required recipe parameter self.factory.getFormatter(refParam, self.fileDescriptor, writeRecipes={"recipe3": {"notmode": 1}})
class _TaskScaffolding: """Helper class aggregating information about a `PipelineTask`, used when constructing a `QuantumGraph`. See `_PipelineScaffolding` for a top-down description of the full scaffolding data structure. Parameters ---------- taskDef : `TaskDef` Data structure that identifies the task class and its config. parent : `_PipelineScaffolding` The parent data structure that will hold the instance being constructed. datasetTypes : `TaskDatasetTypes` Data structure that categorizes the dataset types used by this task. """ def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes): universe = parent.dimensions.universe self.taskDef = taskDef self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions) assert self.dimensions.issubset(parent.dimensions) # Initialize _DatasetDicts as subsets of the one or two # corresponding dicts in the parent _PipelineScaffolding. self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs, parent.initInputs, parent.initIntermediates) self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs, parent.initIntermediates, parent.initOutputs) self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs, parent.inputs, parent.intermediates) self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs, parent.intermediates, parent.outputs) self.prerequisites = _DatasetDict.fromSubset( datasetTypes.prerequisites, parent.prerequisites) self.dataIds = set() self.quanta = {} def __repr__(self): # Default dataclass-injected __repr__ gets caught in an infinite loop # because of back-references. return f"_TaskScaffolding(taskDef={self.taskDef}, ...)" taskDef: TaskDef """Data structure that identifies the task class and its config (`TaskDef`). """ dimensions: DimensionGraph """The dimensions of a single `Quantum` of this task (`DimensionGraph`). """ initInputs: _DatasetDict """Dictionary containing information about datasets used to construct this task (`_DatasetDict`). """ initOutputs: _DatasetDict """Dictionary containing information about datasets produced as a side-effect of constructing this task (`_DatasetDict`). """ inputs: _DatasetDict """Dictionary containing information about datasets used as regular, graph-constraining inputs to this task (`_DatasetDict`). """ outputs: _DatasetDict """Dictionary containing information about datasets produced by this task (`_DatasetDict`). """ prerequisites: _DatasetDict """Dictionary containing information about input datasets that must be present in the repository before any Pipeline containing this task is run (`_DatasetDict`). """ quanta: Dict[DataCoordinate, _QuantumScaffolding] """Dictionary mapping data ID to a scaffolding object for the Quantum of this task with that data ID. """ def makeQuantumSet(self) -> Set[Quantum]: """Create a `set` of `Quantum` from the information in ``self``. Returns ------- nodes : `set` of `Quantum The `Quantum` elements corresponding to this task. """ return set(q.makeQuantum() for q in self.quanta.values())