class _TaskScaffolding: """Helper class aggregating information about a `PipelineTask`, used when constructing a `QuantumGraph`. See `_PipelineScaffolding` for a top-down description of the full scaffolding data structure. Parameters ---------- taskDef : `TaskDef` Data structure that identifies the task class and its config. parent : `_PipelineScaffolding` The parent data structure that will hold the instance being constructed. datasetTypes : `TaskDatasetTypes` Data structure that categorizes the dataset types used by this task. Raises ------ GraphBuilderError Raised if the task's dimensions are not a subset of the union of the pipeline's dataset dimensions. """ def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes): universe = parent.dimensions.universe self.taskDef = taskDef self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions) if not self.dimensions.issubset(parent.dimensions): raise GraphBuilderError(f"Task with label '{taskDef.label}' has dimensions " f"{self.dimensions} that are not a subset of " f"the pipeline dimensions {parent.dimensions}.") # Initialize _DatasetScaffoldingDicts as subsets of the one or two # corresponding dicts in the parent _PipelineScaffolding. self.initInputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initInputs, parent.initInputs, parent.initIntermediates) self.initOutputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initOutputs, parent.initIntermediates, parent.initOutputs) self.inputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.inputs, parent.inputs, parent.intermediates) self.outputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.outputs, parent.intermediates, parent.outputs) self.prerequisites = _DatasetScaffoldingDict.fromSubset(datasetTypes.prerequisites, parent.prerequisites) # Add backreferences to the _DatasetScaffolding objects that point to # this Task. for dataset in itertools.chain(self.initInputs.values(), self.inputs.values(), self.prerequisites.values()): dataset.consumers[self.taskDef.label] = self for dataset in itertools.chain(self.initOutputs.values(), self.outputs.values()): assert dataset.producer is None dataset.producer = self self.dataIds = set() self.quanta = [] taskDef: TaskDef """Data structure that identifies the task class and its config (`TaskDef`). """ dimensions: DimensionGraph """The dimensions of a single `Quantum` of this task (`DimensionGraph`). """ initInputs: _DatasetScaffoldingDict """Dictionary containing information about datasets used to construct this task (`_DatasetScaffoldingDict`). """ initOutputs: _DatasetScaffoldingDict """Dictionary containing information about datasets produced as a side-effect of constructing this task (`_DatasetScaffoldingDict`). """ inputs: _DatasetScaffoldingDict """Dictionary containing information about datasets used as regular, graph-constraining inputs to this task (`_DatasetScaffoldingDict`). """ outputs: _DatasetScaffoldingDict """Dictionary containing information about datasets produced by this task (`_DatasetScaffoldingDict`). """ prerequisites: _DatasetScaffoldingDict """Dictionary containing information about input datasets that must be present in the repository before any Pipeline containing this task is run (`_DatasetScaffoldingDict`). """ dataIds: Set[ExpandedDataCoordinate] """Data IDs for all quanta for this task in the graph (`set` of `ExpandedDataCoordinate`). Populated after construction by `_PipelineScaffolding.fillDataIds`. """ quanta: List[Quantum] """All quanta for this task in the graph (`list` of `Quantum`). Populated after construction by `_PipelineScaffolding.fillQuanta`. """ def addQuantum(self, quantum: Quantum): config = self.taskDef.config connectionClass = config.connections.ConnectionsClass connectionInstance = connectionClass(config=config) # This will raise if one of the check conditions is not met, which is the intended # behavior result = connectionInstance.adjustQuantum(quantum.predictedInputs) quantum._predictedInputs = NamedKeyDict(result) # If this function has reached this far add the quantum self.quanta.append(quantum) def makeQuantumGraphTaskNodes(self) -> QuantumGraphTaskNodes: """Create a `QuantumGraphTaskNodes` instance from the information in ``self``. Returns ------- nodes : `QuantumGraphTaskNodes` The `QuantumGraph` elements corresponding to this task. """ return QuantumGraphTaskNodes( taskDef=self.taskDef, quanta=self.quanta, initInputs=self.initInputs.unpackRefs(), initOutputs=self.initOutputs.unpackRefs(), )
class _TaskScaffolding: """Helper class aggregating information about a `PipelineTask`, used when constructing a `QuantumGraph`. See `_PipelineScaffolding` for a top-down description of the full scaffolding data structure. Parameters ---------- taskDef : `TaskDef` Data structure that identifies the task class and its config. parent : `_PipelineScaffolding` The parent data structure that will hold the instance being constructed. datasetTypes : `TaskDatasetTypes` Data structure that categorizes the dataset types used by this task. """ def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes): universe = parent.dimensions.universe self.taskDef = taskDef self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions) assert self.dimensions.issubset(parent.dimensions) # Initialize _DatasetDicts as subsets of the one or two # corresponding dicts in the parent _PipelineScaffolding. self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs, parent.initInputs, parent.initIntermediates) self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs, parent.initIntermediates, parent.initOutputs) self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs, parent.inputs, parent.intermediates) self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs, parent.intermediates, parent.outputs) self.prerequisites = _DatasetDict.fromSubset( datasetTypes.prerequisites, parent.prerequisites) self.dataIds = set() self.quanta = {} def __repr__(self): # Default dataclass-injected __repr__ gets caught in an infinite loop # because of back-references. return f"_TaskScaffolding(taskDef={self.taskDef}, ...)" taskDef: TaskDef """Data structure that identifies the task class and its config (`TaskDef`). """ dimensions: DimensionGraph """The dimensions of a single `Quantum` of this task (`DimensionGraph`). """ initInputs: _DatasetDict """Dictionary containing information about datasets used to construct this task (`_DatasetDict`). """ initOutputs: _DatasetDict """Dictionary containing information about datasets produced as a side-effect of constructing this task (`_DatasetDict`). """ inputs: _DatasetDict """Dictionary containing information about datasets used as regular, graph-constraining inputs to this task (`_DatasetDict`). """ outputs: _DatasetDict """Dictionary containing information about datasets produced by this task (`_DatasetDict`). """ prerequisites: _DatasetDict """Dictionary containing information about input datasets that must be present in the repository before any Pipeline containing this task is run (`_DatasetDict`). """ quanta: Dict[DataCoordinate, _QuantumScaffolding] """Dictionary mapping data ID to a scaffolding object for the Quantum of this task with that data ID. """ def makeQuantumSet(self) -> Set[Quantum]: """Create a `set` of `Quantum` from the information in ``self``. Returns ------- nodes : `set` of `Quantum The `Quantum` elements corresponding to this task. """ return set(q.makeQuantum() for q in self.quanta.values())