def testRunQuantum(self): inputId = { "instrument": self.CAMERA_ID, "visit": self.VISIT_ID, "detector": self.CHIP_ID, } butler = self._makeButler() # self.task.config not persistable because it refers to a local class # We don't actually use the persisted config, so just make a new one butler.put(self.task.ConfigClass(), "apdb_marker", inputId) quantum = Quantum(taskClass=self.taskClass) quantum.addPredictedInput( ref_from_connection(butler, self.connections.dbInfo, inputId)) quantum.addOutput( ref_from_connection(butler, self.connections.measurement, { "instrument": self.CAMERA_ID, })) run_quantum(self.task, butler, quantum) # Did output data ID get passed to DummyTask.run? measurement = butler.get(self.connections.measurement.name, instrument=self.CAMERA_ID) self.assertEqual(measurement.quantity, len(self.CAMERA_ID) * u.dimensionless_unscaled)
def _makeQuanta(self, config): """Create set of Quanta """ run = Run(collection=1, environment=None, pipeline=None) descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input) dstype0 = descriptor.datasetType descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output) dstype1 = descriptor.datasetType quanta = [] for visit in range(100): quantum = Quantum(run=run, task=None) quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit)) quantum.addOutput(self._makeDSRefVisit(dstype1, visit)) quanta.append(quantum) return quanta
def _makeQuanta(self, config): """Create set of Quanta """ universe = DimensionUniverse() run = Run(collection=1, environment=None, pipeline=None) connections = config.connections.ConnectionsClass(config=config) dstype0 = connections.input.makeDatasetType(universe) dstype1 = connections.output.makeDatasetType(universe) quanta = [] for visit in range(100): quantum = Quantum(run=run) quantum.addPredictedInput( self._makeDSRefVisit(dstype0, visit, universe)) quantum.addOutput(self._makeDSRefVisit(dstype1, visit, universe)) quanta.append(quantum) return quanta
def _makeQuanta(self, config): """Create set of Quanta """ universe = DimensionUniverse.fromConfig() run = Run(collection=1, environment=None, pipeline=None) descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.input) dstype0 = descriptor.makeDatasetType(universe) descriptor = pipeBase.DatasetTypeDescriptor.fromConfig(config.output) dstype1 = descriptor.makeDatasetType(universe) quanta = [] for visit in range(100): quantum = Quantum(run=run, task=None) quantum.addPredictedInput(self._makeDSRefVisit(dstype0, visit)) quantum.addOutput(self._makeDSRefVisit(dstype1, visit)) quanta.append(quantum) return quanta
def testAddInputsOutputs(self): """Test of addPredictedInput() method. """ quantum = Quantum(taskName="some.task.object", run=None) # start with empty self.assertEqual(quantum.predictedInputs, dict()) universe = DimensionUniverse() instrument = "DummyCam" datasetTypeName = "test_ds" storageClass = StorageClass("testref_StructuredData") datasetType = DatasetType(datasetTypeName, universe.extract(("instrument", "visit")), storageClass) # add one ref ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42)) quantum.addPredictedInput(ref) self.assertIn(datasetTypeName, quantum.predictedInputs) self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 1) # add second ref ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43)) quantum.addPredictedInput(ref) self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 2) # mark last ref as actually used self.assertEqual(quantum.actualInputs, dict()) quantum._markInputUsed(ref) self.assertIn(datasetTypeName, quantum.actualInputs) self.assertEqual(len(quantum.actualInputs[datasetTypeName]), 1) # add couple of outputs too self.assertEqual(quantum.outputs, dict()) ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42)) quantum.addOutput(ref) self.assertIn(datasetTypeName, quantum.outputs) self.assertEqual(len(quantum.outputs[datasetTypeName]), 1) ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43)) quantum.addOutput(ref) self.assertEqual(len(quantum.outputs[datasetTypeName]), 2)
def _makeGraph(self, taskDatasets, inputs, outputs, initInputs, initOutputs, originInfo, userQuery): """Make QuantumGraph instance. Parameters ---------- taskDatasets : sequence of `_TaskDatasetTypes` Tasks with their inputs and outputs. inputs : `set` of `DatasetType` Datasets which should already exist in input repository outputs : `set` of `DatasetType` Datasets which will be created by tasks initInputs : `set` of `DatasetType` Datasets which should exist in input repository, and will be used in task initialization initOutputs : `set` of `DatasetType` Datasets which which will be created in task initialization originInfo : `DatasetOriginInfo` Object which provides names of the input/output collections. userQuery : `str` String which defunes user-defined selection for registry, should be empty or `None` if there is no restrictions on data selection. Returns ------- `QuantumGraph` instance. """ parsedQuery = self._parseUserQuery(userQuery or "") expr = None if parsedQuery is None else str(parsedQuery) rows = self.registry.selectDimensions(originInfo, expr, inputs, outputs) # store result locally for multi-pass algorithm below # TODO: change it to single pass dimensionVerse = [] for row in rows: _LOG.debug("row: %s", row) dimensionVerse.append(row) # Next step is to group by task quantum dimensions qgraph = QuantumGraph() qgraph._inputDatasetTypes = inputs qgraph._outputDatasetTypes = outputs for dsType in initInputs: for collection in originInfo.getInputCollections(dsType.name): result = self.registry.find(collection, dsType) if result is not None: qgraph.initInputs.append(result) break else: raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input" " collection") for dsType in initOutputs: qgraph.initOutputs.append(DatasetRef(dsType, {})) for taskDss in taskDatasets: taskQuantaInputs = {} # key is the quantum dataId (as tuple) taskQuantaOutputs = {} # key is the quantum dataId (as tuple) qlinks = [] for dimensionName in taskDss.taskDef.config.quantum.dimensions: dimension = self.dimensions[dimensionName] qlinks += dimension.link _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks) # some rows will be non-unique for subset of dimensions, create # temporary structure to remove duplicates for row in dimensionVerse: qkey = tuple((col, row.dataId[col]) for col in qlinks) _LOG.debug("qkey: %s", qkey) def _dataRefKey(dataRef): return tuple(sorted(dataRef.dataId.items())) qinputs = taskQuantaInputs.setdefault(qkey, {}) for dsType in taskDss.inputs: dataRefs = qinputs.setdefault(dsType, {}) dataRef = row.datasetRefs[dsType] dataRefs[_dataRefKey(dataRef)] = dataRef _LOG.debug("add input dataRef: %s %s", dsType.name, dataRef) qoutputs = taskQuantaOutputs.setdefault(qkey, {}) for dsType in taskDss.outputs: dataRefs = qoutputs.setdefault(dsType, {}) dataRef = row.datasetRefs[dsType] dataRefs[_dataRefKey(dataRef)] = dataRef _LOG.debug("add output dataRef: %s %s", dsType.name, dataRef) # pre-flight does not fill dataset components, and graph users # may need to know that, re-retrieve all input datasets to have # their components properly filled. for qinputs in taskQuantaInputs.values(): for dataRefs in qinputs.values(): for key in dataRefs.keys(): if dataRefs[key].id is not None: dataRefs[key] = self.registry.getDataset(dataRefs[key].id) # all nodes for this task quanta = [] for qkey in taskQuantaInputs: # taskQuantaInputs and taskQuantaOutputs have the same keys _LOG.debug("make quantum for qkey: %s", qkey) quantum = Quantum(run=None, task=None) # add all outputs, but check first that outputs don't exist outputs = list(chain.from_iterable(dataRefs.values() for dataRefs in taskQuantaOutputs[qkey].values())) for ref in outputs: _LOG.debug("add output: %s", ref) if self.skipExisting and all(ref.id is not None for ref in outputs): _LOG.debug("all output dataRefs already exist, skip quantum") continue if any(ref.id is not None for ref in outputs): # some outputs exist, can't override them raise OutputExistsError(taskDss.taskDef.taskName, outputs) for ref in outputs: quantum.addOutput(ref) # add all inputs for dataRefs in taskQuantaInputs[qkey].values(): for ref in dataRefs.values(): quantum.addPredictedInput(ref) _LOG.debug("add input: %s", ref) quanta.append(quantum) qgraph.append(QuantumGraphNodes(taskDss.taskDef, quanta)) return qgraph
def _makeGraph(self, taskDatasets, required, optional, prerequisite, initInputs, initOutputs, originInfo, userQuery, perDatasetTypeDimensions=()): """Make QuantumGraph instance. Parameters ---------- taskDatasets : sequence of `_TaskDatasetTypes` Tasks with their inputs and outputs. required : `set` of `~lsst.daf.butler.DatasetType` Datasets that must exist in the repository in order to generate a QuantumGraph node that consumes them. optional : `set` of `~lsst.daf.butler.DatasetType` Datasets that will be produced by the graph, but may exist in the repository. If ``self.skipExisting`` and all outputs of a particular node already exist, it will be skipped. Otherwise pre-existing datasets of these types will cause `OutputExistsError` to be raised. prerequisite : `set` of `~lsst.daf.butler.DatasetType` Datasets that must exist in the repository, but whose absence should cause `PrerequisiteMissingError` to be raised if they are needed by any graph node that would otherwise be created. initInputs : `set` of `DatasetType` Datasets which should exist in input repository, and will be used in task initialization initOutputs : `set` of `DatasetType` Datasets which which will be created in task initialization originInfo : `DatasetOriginInfo` Object which provides names of the input/output collections. userQuery : `str` String which defines user-defined selection for registry, should be empty or `None` if there is no restrictions on data selection. perDatasetTypeDimensions : iterable of `Dimension` or `str` Dimensions (or names thereof) that may have different values for different dataset types within the same quantum. Returns ------- `QuantumGraph` instance. """ rows = self.registry.selectMultipleDatasetTypes( originInfo, userQuery, required=required, optional=optional, prerequisite=prerequisite, perDatasetTypeDimensions=perDatasetTypeDimensions) # store result locally for multi-pass algorithm below # TODO: change it to single pass dimensionVerse = [] try: for row in rows: _LOG.debug("row: %s", row) dimensionVerse.append(row) except LookupError as err: raise PrerequisiteMissingError(str(err)) from err # Next step is to group by task quantum dimensions qgraph = QuantumGraph() qgraph._inputDatasetTypes = (required | prerequisite) qgraph._outputDatasetTypes = optional for dsType in initInputs: for collection in originInfo.getInputCollections(dsType.name): result = self.registry.find(collection, dsType) if result is not None: qgraph.initInputs.append(result) break else: raise GraphBuilderError( f"Could not find initInput {dsType.name} in any input" " collection") for dsType in initOutputs: qgraph.initOutputs.append(DatasetRef(dsType, {})) for taskDss in taskDatasets: taskQuantaInputs = {} # key is the quantum dataId (as tuple) taskQuantaOutputs = {} # key is the quantum dataId (as tuple) qlinks = [] for dimensionName in taskDss.taskDef.config.quantum.dimensions: dimension = self.dimensions[dimensionName] qlinks += dimension.links() _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks) # some rows will be non-unique for subset of dimensions, create # temporary structure to remove duplicates for row in dimensionVerse: qkey = tuple((col, row.dataId[col]) for col in qlinks) _LOG.debug("qkey: %s", qkey) def _datasetRefKey(datasetRef): return tuple(sorted(datasetRef.dataId.items())) qinputs = taskQuantaInputs.setdefault(qkey, {}) for dsType in taskDss.inputs: datasetRefs = qinputs.setdefault(dsType, {}) datasetRef = row.datasetRefs[dsType] datasetRefs[_datasetRefKey(datasetRef)] = datasetRef _LOG.debug("add input datasetRef: %s %s", dsType.name, datasetRef) qoutputs = taskQuantaOutputs.setdefault(qkey, {}) for dsType in taskDss.outputs: datasetRefs = qoutputs.setdefault(dsType, {}) datasetRef = row.datasetRefs[dsType] datasetRefs[_datasetRefKey(datasetRef)] = datasetRef _LOG.debug("add output datasetRef: %s %s", dsType.name, datasetRef) # all nodes for this task quanta = [] for qkey in taskQuantaInputs: # taskQuantaInputs and taskQuantaOutputs have the same keys _LOG.debug("make quantum for qkey: %s", qkey) quantum = Quantum(run=None, task=None) # add all outputs, but check first that outputs don't exist outputs = list( chain.from_iterable( datasetRefs.values() for datasetRefs in taskQuantaOutputs[qkey].values())) for ref in outputs: _LOG.debug("add output: %s", ref) if self.skipExisting and all(ref.id is not None for ref in outputs): _LOG.debug( "all output datasetRefs already exist, skip quantum") continue if any(ref.id is not None for ref in outputs): # some outputs exist, can't override them raise OutputExistsError(taskDss.taskDef.taskName, outputs) for ref in outputs: quantum.addOutput(ref) # add all inputs for datasetRefs in taskQuantaInputs[qkey].values(): for ref in datasetRefs.values(): quantum.addPredictedInput(ref) _LOG.debug("add input: %s", ref) quanta.append(quantum) qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta)) return qgraph
def _makeGraph(self, taskDatasets, required, optional, prerequisite, initInputs, initOutputs, originInfo, userQuery, perDatasetTypeDimensions=()): """Make QuantumGraph instance. Parameters ---------- taskDatasets : sequence of `_TaskDatasetTypes` Tasks with their inputs and outputs. required : `set` of `~lsst.daf.butler.DatasetType` Datasets that must exist in the repository in order to generate a QuantumGraph node that consumes them. optional : `set` of `~lsst.daf.butler.DatasetType` Datasets that will be produced by the graph, but may exist in the repository. If ``self.skipExisting`` and all outputs of a particular node already exist, it will be skipped. Otherwise pre-existing datasets of these types will cause `OutputExistsError` to be raised. prerequisite : `set` of `~lsst.daf.butler.DatasetType` Datasets that must exist in the repository, but whose absence should cause `PrerequisiteMissingError` to be raised if they are needed by any graph node that would otherwise be created. initInputs : `set` of `DatasetType` Datasets which should exist in input repository, and will be used in task initialization initOutputs : `set` of `DatasetType` Datasets which which will be created in task initialization originInfo : `DatasetOriginInfo` Object which provides names of the input/output collections. userQuery : `str` String which defines user-defined selection for registry, should be empty or `None` if there is no restrictions on data selection. perDatasetTypeDimensions : iterable of `Dimension` or `str` Dimensions (or names thereof) that may have different values for different dataset types within the same quantum. Returns ------- `QuantumGraph` instance. """ rows = self.registry.selectMultipleDatasetTypes( originInfo, userQuery, required=required, optional=optional, prerequisite=prerequisite, perDatasetTypeDimensions=perDatasetTypeDimensions ) # store result locally for multi-pass algorithm below # TODO: change it to single pass dimensionVerse = [] try: for row in rows: _LOG.debug("row: %s", row) dimensionVerse.append(row) except LookupError as err: raise PrerequisiteMissingError(str(err)) from err # Next step is to group by task quantum dimensions qgraph = QuantumGraph() qgraph._inputDatasetTypes = (required | prerequisite) qgraph._outputDatasetTypes = optional for dsType in initInputs: for collection in originInfo.getInputCollections(dsType.name): result = self.registry.find(collection, dsType) if result is not None: qgraph.initInputs.append(result) break else: raise GraphBuilderError(f"Could not find initInput {dsType.name} in any input" " collection") for dsType in initOutputs: qgraph.initOutputs.append(DatasetRef(dsType, {})) for taskDss in taskDatasets: taskQuantaInputs = {} # key is the quantum dataId (as tuple) taskQuantaOutputs = {} # key is the quantum dataId (as tuple) qlinks = [] for dimensionName in taskDss.taskDef.config.quantum.dimensions: dimension = self.dimensions[dimensionName] qlinks += dimension.links() _LOG.debug("task %s qdimensions: %s", taskDss.taskDef.label, qlinks) # some rows will be non-unique for subset of dimensions, create # temporary structure to remove duplicates for row in dimensionVerse: qkey = tuple((col, row.dataId[col]) for col in qlinks) _LOG.debug("qkey: %s", qkey) def _datasetRefKey(datasetRef): return tuple(sorted(datasetRef.dataId.items())) qinputs = taskQuantaInputs.setdefault(qkey, {}) for dsType in taskDss.inputs: datasetRefs = qinputs.setdefault(dsType, {}) datasetRef = row.datasetRefs[dsType] datasetRefs[_datasetRefKey(datasetRef)] = datasetRef _LOG.debug("add input datasetRef: %s %s", dsType.name, datasetRef) qoutputs = taskQuantaOutputs.setdefault(qkey, {}) for dsType in taskDss.outputs: datasetRefs = qoutputs.setdefault(dsType, {}) datasetRef = row.datasetRefs[dsType] datasetRefs[_datasetRefKey(datasetRef)] = datasetRef _LOG.debug("add output datasetRef: %s %s", dsType.name, datasetRef) # all nodes for this task quanta = [] for qkey in taskQuantaInputs: # taskQuantaInputs and taskQuantaOutputs have the same keys _LOG.debug("make quantum for qkey: %s", qkey) quantum = Quantum(run=None, task=None) # add all outputs, but check first that outputs don't exist outputs = list(chain.from_iterable(datasetRefs.values() for datasetRefs in taskQuantaOutputs[qkey].values())) for ref in outputs: _LOG.debug("add output: %s", ref) if self.skipExisting and all(ref.id is not None for ref in outputs): _LOG.debug("all output datasetRefs already exist, skip quantum") continue if any(ref.id is not None for ref in outputs): # some outputs exist, can't override them raise OutputExistsError(taskDss.taskDef.taskName, outputs) for ref in outputs: quantum.addOutput(ref) # add all inputs for datasetRefs in taskQuantaInputs[qkey].values(): for ref in datasetRefs.values(): quantum.addPredictedInput(ref) _LOG.debug("add input: %s", ref) quanta.append(quantum) qgraph.append(QuantumGraphTaskNodes(taskDss.taskDef, quanta)) return qgraph