Ejemplo n.º 1
0
 def testRegions(self):
     """Test that data IDs for a few known dimensions have the expected
     regions.
     """
     for dataId in self.randomDataIds(n=4).subset(
             DimensionGraph(self.allDataIds.universe, names=["visit"])):
         self.assertIsNotNone(dataId.region)
         self.assertEqual(dataId.graph.spatial.names, {"visit"})
         self.assertEqual(dataId.region, dataId.records["visit"].region)
     for dataId in self.randomDataIds(n=4).subset(
             DimensionGraph(self.allDataIds.universe,
                            names=["visit", "detector"])):
         self.assertIsNotNone(dataId.region)
         self.assertEqual(dataId.graph.spatial.names,
                          {"visit_detector_region"})
         self.assertEqual(dataId.region,
                          dataId.records["visit_detector_region"].region)
     for dataId in self.randomDataIds(n=4).subset(
             DimensionGraph(self.allDataIds.universe, names=["tract"])):
         self.assertIsNotNone(dataId.region)
         self.assertEqual(dataId.graph.spatial.names, {"tract"})
         self.assertEqual(dataId.region, dataId.records["tract"].region)
     for dataId in self.randomDataIds(n=4).subset(
             DimensionGraph(self.allDataIds.universe, names=["patch"])):
         self.assertIsNotNone(dataId.region)
         self.assertEqual(dataId.graph.spatial.names, {"patch"})
         self.assertEqual(dataId.region, dataId.records["patch"].region)
Ejemplo n.º 2
0
    def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding, datasetTypes: TaskDatasetTypes):
        universe = parent.dimensions.universe
        self.taskDef = taskDef
        self.dimensions = DimensionGraph(universe, names=taskDef.connections.dimensions)
        if not self.dimensions.issubset(parent.dimensions):
            raise GraphBuilderError(f"Task with label '{taskDef.label}' has dimensions "
                                    f"{self.dimensions} that are not a subset of "
                                    f"the pipeline dimensions {parent.dimensions}.")

        # Initialize _DatasetScaffoldingDicts as subsets of the one or two
        # corresponding dicts in the parent _PipelineScaffolding.
        self.initInputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initInputs,
                                                             parent.initInputs, parent.initIntermediates)
        self.initOutputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.initOutputs,
                                                              parent.initIntermediates, parent.initOutputs)
        self.inputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.inputs,
                                                         parent.inputs, parent.intermediates)
        self.outputs = _DatasetScaffoldingDict.fromSubset(datasetTypes.outputs,
                                                          parent.intermediates, parent.outputs)
        self.prerequisites = _DatasetScaffoldingDict.fromSubset(datasetTypes.prerequisites,
                                                                parent.prerequisites)
        # Add backreferences to the _DatasetScaffolding objects that point to
        # this Task.
        for dataset in itertools.chain(self.initInputs.values(), self.inputs.values(),
                                       self.prerequisites.values()):
            dataset.consumers[self.taskDef.label] = self
        for dataset in itertools.chain(self.initOutputs.values(), self.outputs.values()):
            assert dataset.producer is None
            dataset.producer = self
        self.dataIds = set()
        self.quanta = []
Ejemplo n.º 3
0
 def __init__(self, taskDef: TaskDef, parent: _PipelineScaffolding,
              datasetTypes: TaskDatasetTypes):
     universe = parent.dimensions.universe
     self.taskDef = taskDef
     self.dimensions = DimensionGraph(universe,
                                      names=taskDef.connections.dimensions)
     assert self.dimensions.issubset(parent.dimensions)
     # Initialize _DatasetDicts as subsets of the one or two
     # corresponding dicts in the parent _PipelineScaffolding.
     self.initInputs = _DatasetDict.fromSubset(datasetTypes.initInputs,
                                               parent.initInputs,
                                               parent.initIntermediates)
     self.initOutputs = _DatasetDict.fromSubset(datasetTypes.initOutputs,
                                                parent.initIntermediates,
                                                parent.initOutputs)
     self.inputs = _DatasetDict.fromSubset(datasetTypes.inputs,
                                           parent.inputs,
                                           parent.intermediates)
     self.outputs = _DatasetDict.fromSubset(datasetTypes.outputs,
                                            parent.intermediates,
                                            parent.outputs)
     self.prerequisites = _DatasetDict.fromSubset(
         datasetTypes.prerequisites, parent.prerequisites)
     self.dataIds = set()
     self.quanta = {}
Ejemplo n.º 4
0
    def randomDimensionSubset(
            self,
            n: int = 3,
            graph: Optional[DimensionGraph] = None) -> DimensionGraph:
        """Generate a random `DimensionGraph` that has a subset of the
        dimensions in a given one.

        Parameters
        ----------
        n : `int`
             Number of dimensions to select, before automatic expansion by
             `DimensionGraph`.
        dataIds : `DimensionGraph`, optional
            Dimensions to select ffrom.  Defaults to ``self.allDataIds.graph``.

        Returns
        -------
        selected : `DimensionGraph`
            ``n`` or more dimensions randomly selected from ``graph`` with
            replacement.
        """
        if graph is None:
            graph = self.allDataIds.graph
        return DimensionGraph(graph.universe,
                              names=self.rng.sample(
                                  list(graph.dimensions.names),
                                  max(n, len(graph.dimensions))))
Ejemplo n.º 5
0
 def testSkyMapDimensions(self):
     graph = DimensionGraph(self.universe, names=("patch",))
     self.assertCountEqual(graph.dimensions.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.required.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.implied.names, ())
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
     self.assertCountEqual(graph.spatial.names, ("patch",))
Ejemplo n.º 6
0
def loadDimensionData() -> DataCoordinateSequence:
    """Load dimension data from an export file included in the code repository.

    Returns
    -------
    dataIds : `DataCoordinateSet`
        A set containing all data IDs in the export file.
    """
    # Create an in-memory SQLite database and Registry just to import the YAML
    # data and retreive it as a set of DataCoordinate objects.
    config = RegistryConfig()
    config["db"] = "sqlite://"
    registry = Registry.fromConfig(config, create=True)
    with open(DIMENSION_DATA_FILE, 'r') as stream:
        backend = YamlRepoImportBackend(stream, registry)
    backend.register()
    backend.load(datastore=None)
    dimensions = DimensionGraph(registry.dimensions,
                                names=["visit", "detector", "tract", "patch"])
    return DataCoordinateSequence(
        dataIds=tuple(registry.queryDimensions(dimensions, expand=True)),
        graph=dimensions,
        hasFull=True,
        hasRecords=True,
    )
Ejemplo n.º 7
0
 def runDefineVisits(self, pool=None):
     if self.task.defineVisits is None:
         return
     dimensions = DimensionGraph(self.task.universe, names=["exposure"])
     exposureDataIds = set(ref.dataId.subset(dimensions) for ref in self._rawRefs)
     self.task.log.info("Defining visits from exposures.")
     self.task.defineVisits.run(exposureDataIds, pool=pool)
Ejemplo n.º 8
0
 def testCalibrationDimensions(self):
     graph = DimensionGraph(self.universe, names=("physical_filter", "detector"))
     self.assertCountEqual(graph.dimensions.names,
                           ("instrument", "detector", "physical_filter", "band"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "physical_filter"))
     self.assertCountEqual(graph.implied.names, ("band",))
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
     self.assertCountEqual(graph.governors.names, {"instrument"})
Ejemplo n.º 9
0
 def testTimespans(self):
     """Test that data IDs for a few known dimensions have the expected
     timespans.
     """
     for dataId in self.randomDataIds(n=4).subset(
             DimensionGraph(self.allDataIds.universe, names=["visit"])):
         self.assertIsNotNone(dataId.timespan)
         self.assertEqual(dataId.graph.temporal.names, {"visit"})
         self.assertEqual(dataId.timespan, dataId.records["visit"].timespan)
Ejemplo n.º 10
0
 def testCalibrationDimensions(self):
     graph = DimensionGraph(self.universe, names=("calibration_label", "physical_filter", "detector"))
     self.assertCountEqual(graph.dimensions.names,
                           ("instrument", "detector", "calibration_label",
                            "physical_filter", "abstract_filter"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "calibration_label",
                                                  "physical_filter"))
     self.assertCountEqual(graph.implied.names, ("abstract_filter",))
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
Ejemplo n.º 11
0
 def testObservationDimensions(self):
     graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit"))
     self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "visit", "exposure",
                                                    "physical_filter", "abstract_filter", "visit_system"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "exposure", "visit"))
     self.assertCountEqual(graph.implied.names, ("physical_filter", "abstract_filter", "visit_system"))
     self.assertCountEqual(graph.elements.names - graph.dimensions.names,
                           ("visit_detector_region", "visit_definition"))
     self.assertCountEqual(graph.spatial.names, ("visit_detector_region",))
     self.assertCountEqual(graph.temporal.names, ("exposure",))
Ejemplo n.º 12
0
 def testSkyMapDimensions(self):
     graph = DimensionGraph(self.universe, names=("patch",))
     self.assertCountEqual(graph.dimensions.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.required.names, ("skymap", "tract", "patch"))
     self.assertCountEqual(graph.implied.names, ())
     self.assertCountEqual(graph.elements.names, graph.dimensions.names)
     self.assertCountEqual(graph.spatial.names, ("skymap_regions",))
     self.assertCountEqual(graph.governors.names, {"skymap"})
     self.assertEqual(graph.spatial.names, {"skymap_regions"})
     self.assertEqual(next(iter(graph.spatial)).governor, self.universe["skymap"])
Ejemplo n.º 13
0
 def testInstrumentDimensions(self):
     graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit"))
     self.assertCountEqual(graph.dimensions.names,
                           ("instrument", "exposure", "detector",
                            "visit", "physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.required.names, ("instrument", "exposure", "detector", "visit"))
     self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.elements.names - graph.dimensions.names,
                           ("visit_detector_region", "visit_definition"))
     self.assertCountEqual(graph.governors.names, {"instrument"})
Ejemplo n.º 14
0
 def testSubsetCalculation(self):
     """Test that independent spatial and temporal options are computed
     correctly.
     """
     graph = DimensionGraph(self.universe, names=("visit", "detector", "tract", "patch", "htm7",
                                                  "exposure", "calibration_label"))
     self.assertCountEqual(graph.spatial.names,
                           ("visit_detector_region", "patch", "htm7"))
     self.assertCountEqual(graph.temporal.names,
                           ("exposure", "calibration_label"))
Ejemplo n.º 15
0
 def testSubsetCalculation(self):
     """Test that independent spatial and temporal options are computed
     correctly.
     """
     graph = DimensionGraph(self.universe, names=("visit", "detector", "tract", "patch", "htm7",
                                                  "exposure"))
     self.assertCountEqual(graph.spatial.names,
                           ("observation_regions", "skymap_regions", "htm"))
     self.assertCountEqual(graph.temporal.names,
                           ("observation_timespans",))
Ejemplo n.º 16
0
 def testWithoutFilter(self):
     dimensions = DimensionGraph(universe=self.universe,
                                 names=["tract", "patch"])
     dataId = DataCoordinate.standardize(skymap=self.fixed["skymap"],
                                         tract=2,
                                         patch=6,
                                         universe=self.universe)
     packer = SkyMapDimensionPacker(self.fixed, dimensions)
     packedId = packer.pack(dataId)
     self.assertLessEqual(packedId.bit_length(), packer.maxBits)
     self.assertEqual(packer.unpack(packedId), dataId)
Ejemplo n.º 17
0
 def setUp(self):
     self.universe = DimensionUniverse()
     self.fixed = ExpandedDataCoordinate(
         DimensionGraph(universe=self.universe, names=["skymap"]),
         values=("unimportant", ),
         records={
             "skymap":
             self.universe["skymap"].RecordClass.fromDict({
                 "name": "unimportant",
                 "tract_max": 5,
                 "patch_nx_max": 3,
                 "patch_ny_max": 3,
             })
         })
Ejemplo n.º 18
0
    def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass",
                       run="run2", conform=True):
        """Make a simple DatasetRef"""
        if dataId is None:
            dataId = self.dataId

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None

        datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()),
                                  StorageClass(storageClassName),
                                  parentStorageClass=parentStorageClass)
        return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
Ejemplo n.º 19
0
 def setUp(self):
     self.universe = DimensionUniverse()
     self.fixed = DataCoordinate.fromFullValues(
         DimensionGraph(universe=self.universe, names=["skymap"]),
         values=("unimportant", ),
     ).expanded(
         records={
             "skymap":
             self.universe["skymap"].RecordClass(
                 name="unimportant",
                 tract_max=5,
                 patch_nx_max=3,
                 patch_ny_max=3,
             )
         })
Ejemplo n.º 20
0
 def testObservationDimensions(self):
     graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit"))
     self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "visit", "exposure",
                                                    "physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.required.names, ("instrument", "detector", "exposure", "visit"))
     self.assertCountEqual(graph.implied.names, ("physical_filter", "band", "visit_system"))
     self.assertCountEqual(graph.elements.names - graph.dimensions.names,
                           ("visit_detector_region", "visit_definition"))
     self.assertCountEqual(graph.spatial.names, ("observation_regions",))
     self.assertCountEqual(graph.temporal.names, ("observation_timespans",))
     self.assertCountEqual(graph.governors.names, {"instrument"})
     self.assertEqual(graph.spatial.names, {"observation_regions"})
     self.assertEqual(graph.temporal.names, {"observation_timespans"})
     self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"])
     self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"])
Ejemplo n.º 21
0
 def checkGraphInvariants(self, graph):
     elements = list(graph.elements)
     for n, element in enumerate(elements):
         # Ordered comparisons on graphs behave like sets.
         self.assertLessEqual(element.graph, graph)
         # Ordered comparisons on elements correspond to the ordering within
         # a DimensionUniverse (topological, with deterministic
         # tiebreakers).
         for other in elements[:n]:
             self.assertLess(other, element)
             self.assertLessEqual(other, element)
         for other in elements[n + 1:]:
             self.assertGreater(other, element)
             self.assertGreaterEqual(other, element)
         if isinstance(element, Dimension):
             self.assertEqual(element.graph.required, element.required)
     self.assertEqual(DimensionGraph(self.universe, graph.required), graph)
     self.assertCountEqual(graph.required, [
         dimension for dimension in graph.dimensions
         if not any(dimension in other.graph.implied
                    for other in graph.elements)
     ])
     self.assertCountEqual(graph.implied, graph.dimensions - graph.required)
     self.assertCountEqual(graph.dimensions, [
         element
         for element in graph.elements if isinstance(element, Dimension)
     ])
     self.assertCountEqual(graph.dimensions,
                           itertools.chain(graph.required, graph.implied))
     # Check primary key traversal order: each element should follow any it
     # requires, and element that is implied by any other in the graph
     # follow at least one of those.
     seen = NamedValueSet()
     for element in graph.primaryKeyTraversalOrder:
         with self.subTest(required=graph.required,
                           implied=graph.implied,
                           element=element):
             seen.add(element)
             self.assertLessEqual(element.graph.required, seen)
             if element in graph.implied:
                 self.assertTrue(any(element in s.implied for s in seen))
     self.assertCountEqual(seen, graph.elements)
     # Test encoding and decoding of DimensionGraphs to bytes.
     encoded = graph.encode()
     self.assertEqual(len(encoded), self.universe.getEncodeLength())
     self.assertEqual(
         DimensionGraph.decode(encoded, universe=self.universe), graph)
Ejemplo n.º 22
0
 def makeDatasetRef(self,
                    datasetTypeName,
                    dataId=None,
                    storageClassName="DefaultStorageClass",
                    conform=True):
     """Make a simple DatasetRef"""
     if dataId is None:
         dataId = self.dataId
     datasetType = DatasetType(
         datasetTypeName, DimensionGraph(self.universe,
                                         names=dataId.keys()),
         StorageClass(storageClassName))
     return DatasetRef(datasetType,
                       dataId,
                       id=1,
                       run="run2",
                       conform=conform)
Ejemplo n.º 23
0
    def _expandExposureId(self, dataId: DataId) -> DataCoordinate:
        """Return the expanded version of an exposure ID.

        A private method to allow ID expansion in a pool without resorting
        to local callables.

        Parameters
        ----------
        dataId : `dict` or `DataCoordinate`
            Exposure-level data ID.

        Returns
        -------
        expanded : `DataCoordinate`
            A data ID that includes full metadata for all exposure dimensions.
        """
        dimensions = DimensionGraph(self.universe, names=["exposure"])
        return self.butler.registry.expandDataId(dataId, graph=dimensions)
Ejemplo n.º 24
0
    def testRegistryConfig(self):
        configFile = os.path.join(TESTDIR, "config", "basic",
                                  "posixDatastore.yaml")
        config = Config(configFile)
        universe = DimensionUniverse()
        self.factory.registerFormatters(config["datastore", "formatters"],
                                        universe=universe)

        # Create a DatasetRef with and without instrument matching the
        # one in the config file.
        dimensions = universe.extract(
            ("visit", "physical_filter", "instrument"))
        sc = StorageClass("DummySC", dict, None)
        refPviHsc = self.makeDatasetRef("pvi",
                                        dimensions,
                                        sc, {
                                            "instrument": "DummyHSC",
                                            "physical_filter": "v"
                                        },
                                        conform=False)
        refPviHscFmt = self.factory.getFormatterClass(refPviHsc)
        self.assertIsFormatter(refPviHscFmt)
        self.assertIn("JsonFormatter", refPviHscFmt.name())

        refPviNotHsc = self.makeDatasetRef("pvi",
                                           dimensions,
                                           sc, {
                                               "instrument": "DummyNotHSC",
                                               "physical_filter": "v"
                                           },
                                           conform=False)
        refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc)
        self.assertIsFormatter(refPviNotHscFmt)
        self.assertIn("PickleFormatter", refPviNotHscFmt.name())

        # Create a DatasetRef that should fall back to using Dimensions
        refPvixHsc = self.makeDatasetRef("pvix",
                                         dimensions,
                                         sc, {
                                             "instrument": "DummyHSC",
                                             "physical_filter": "v"
                                         },
                                         conform=False)
        refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc)
        self.assertIsFormatter(refPvixNotHscFmt)
        self.assertIn("PickleFormatter", refPvixNotHscFmt.name())

        # Create a DatasetRef that should fall back to using StorageClass
        dimensionsNoV = DimensionGraph(universe,
                                       names=("physical_filter", "instrument"))
        refPvixNotHscDims = self.makeDatasetRef("pvix",
                                                dimensionsNoV,
                                                sc, {
                                                    "instrument": "DummyHSC",
                                                    "physical_filter": "v"
                                                },
                                                conform=False)
        refPvixNotHscDims_fmt = self.factory.getFormatterClass(
            refPvixNotHscDims)
        self.assertIsFormatter(refPvixNotHscDims_fmt)
        self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name())
Ejemplo n.º 25
0
    def testRegistryConfig(self):
        configFile = os.path.join(TESTDIR, "config", "basic", "posixDatastore.yaml")
        config = Config(configFile)
        universe = DimensionUniverse()
        self.factory.registerFormatters(config["datastore", "formatters"], universe=universe)

        # Create a DatasetRef with and without instrument matching the
        # one in the config file.
        dimensions = universe.extract(("visit", "physical_filter", "instrument"))
        sc = StorageClass("DummySC", dict, None)
        refPviHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyHSC",
                                                                "physical_filter": "v"},
                                        conform=False)
        refPviHscFmt = self.factory.getFormatterClass(refPviHsc)
        self.assertIsFormatter(refPviHscFmt)
        self.assertIn("JsonFormatter", refPviHscFmt.name())

        refPviNotHsc = self.makeDatasetRef("pvi", dimensions, sc, {"instrument": "DummyNotHSC",
                                                                   "physical_filter": "v"},
                                           conform=False)
        refPviNotHscFmt = self.factory.getFormatterClass(refPviNotHsc)
        self.assertIsFormatter(refPviNotHscFmt)
        self.assertIn("PickleFormatter", refPviNotHscFmt.name())

        # Create a DatasetRef that should fall back to using Dimensions
        refPvixHsc = self.makeDatasetRef("pvix", dimensions, sc, {"instrument": "DummyHSC",
                                                                  "physical_filter": "v"},
                                         conform=False)
        refPvixNotHscFmt = self.factory.getFormatterClass(refPvixHsc)
        self.assertIsFormatter(refPvixNotHscFmt)
        self.assertIn("PickleFormatter", refPvixNotHscFmt.name())

        # Create a DatasetRef that should fall back to using StorageClass
        dimensionsNoV = DimensionGraph(universe, names=("physical_filter", "instrument"))
        refPvixNotHscDims = self.makeDatasetRef("pvix", dimensionsNoV, sc, {"instrument": "DummyHSC",
                                                                            "physical_filter": "v"},
                                                conform=False)
        refPvixNotHscDims_fmt = self.factory.getFormatterClass(refPvixNotHscDims)
        self.assertIsFormatter(refPvixNotHscDims_fmt)
        self.assertIn("YamlFormatter", refPvixNotHscDims_fmt.name())

        # Check that parameters are stored
        refParam = self.makeDatasetRef("paramtest", dimensions, sc, {"instrument": "DummyNotHSC",
                                                                     "physical_filter": "v"},
                                       conform=False)
        lookup, refParam_fmt, kwargs = self.factory.getFormatterClassWithMatch(refParam)
        self.assertIn("writeParameters", kwargs)
        expected = {"max": 5, "min": 2, "comment": "Additional commentary", "recipe": "recipe1"}
        self.assertEqual(kwargs["writeParameters"], expected)
        self.assertIn("FormatterTest", refParam_fmt.name())

        f = self.factory.getFormatter(refParam, self.fileDescriptor)
        self.assertEqual(f.writeParameters, expected)

        f = self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"min": 22,
                                                                                      "extra": 50})
        self.assertEqual(f.writeParameters, {"max": 5, "min": 22, "comment": "Additional commentary",
                                             "extra": 50, "recipe": "recipe1"})

        self.assertIn("recipe1", f.writeRecipes)
        self.assertEqual(f.writeParameters["recipe"], "recipe1")

        with self.assertRaises(ValueError):
            # "new" is not allowed as a write parameter
            self.factory.getFormatter(refParam, self.fileDescriptor, writeParameters={"new": 1})

        with self.assertRaises(RuntimeError):
            # "mode" is a required recipe parameter
            self.factory.getFormatter(refParam, self.fileDescriptor, writeRecipes={"recipe3": {"notmode": 1}})