Ejemplo n.º 1
0
 def iterDatasets(self) -> Iterator[FileDataset]:
     # Docstring inherited from RepoConverter.
     # Iterate over reference catalog files.
     for refCat, dimension in self._refCats:
         datasetType = DatasetType(refCat,
                                   dimensions=[dimension],
                                   universe=self.task.universe,
                                   storageClass="SimpleCatalog")
         if self.subset is None:
             regex = re.compile(r"(\d+)\.fits")
             for fileName in os.listdir(
                     os.path.join(self.root, "ref_cats", refCat)):
                 m = regex.match(fileName)
                 if m is not None:
                     htmId = int(m.group(1))
                     dataId = self.task.registry.expandDataId(
                         {dimension: htmId})
                     yield FileDataset(path=os.path.join(
                         self.root, "ref_cats", refCat, fileName),
                                       refs=DatasetRef(datasetType, dataId))
         else:
             for begin, end in self.subset.skypix[dimension]:
                 for htmId in range(begin, end):
                     dataId = self.task.registry.expandDataId(
                         {dimension: htmId})
                     yield FileDataset(path=os.path.join(
                         self.root, "ref_cats", refCat, f"{htmId}.fits"),
                                       refs=DatasetRef(datasetType, dataId))
     yield from super().iterDatasets()
Ejemplo n.º 2
0
 def testConstructor(self):
     """Test that construction preserves and validates values.
     """
     # Construct an unresolved ref.
     ref = DatasetRef(self.datasetType, self.dataId)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
                      msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     # Constructing an unresolved ref with run and/or components should
     # fail.
     run = "somerun"
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, run=run)
     # Passing a data ID that is missing dimensions should fail.
     with self.assertRaises(KeyError):
         DatasetRef(self.datasetType, {"instrument": "DummyCam"})
     # Constructing a resolved ref should preserve run as well as everything
     # else.
     ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
                      msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     self.assertEqual(ref.id, 1)
     self.assertEqual(ref.run, run)
Ejemplo n.º 3
0
 def testResolving(self):
     ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun")
     unresolvedRef = ref.unresolved()
     self.assertIsNotNone(ref.id)
     self.assertIsNone(unresolvedRef.id)
     self.assertIsNone(unresolvedRef.run)
     self.assertNotEqual(ref, unresolvedRef)
     self.assertEqual(ref.unresolved(), unresolvedRef)
     self.assertEqual(ref.datasetType, unresolvedRef.datasetType)
     self.assertEqual(ref.dataId, unresolvedRef.dataId)
     reresolvedRef = unresolvedRef.resolved(id=1, run="somerun")
     self.assertEqual(ref, reresolvedRef)
     self.assertEqual(reresolvedRef.unresolved(), unresolvedRef)
     self.assertIsNotNone(reresolvedRef.run)
Ejemplo n.º 4
0
    def ingestExposureDatasets(self,
                               exposure: RawExposureData,
                               *,
                               run: Optional[str] = None) -> List[DatasetRef]:
        """Ingest all raw files in one exposure.

        Parameters
        ----------
        exposure : `RawExposureData`
            A structure containing information about the exposure to be
            ingested.  Must have `RawExposureData.records` populated and all
            data ID attributes expanded.
        run : `str`, optional
            Name of a RUN-type collection to write to, overriding
            ``self.butler.run``.

        Returns
        -------
        refs : `list` of `lsst.daf.butler.DatasetRef`
            Dataset references for ingested raws.
        """
        datasets = [
            FileDataset(path=os.path.abspath(file.filename),
                        refs=[
                            DatasetRef(self.datasetType, d.dataId)
                            for d in file.datasets
                        ],
                        formatter=file.FormatterClass)
            for file in exposure.files
        ]
        self.butler.ingest(*datasets, transfer=self.config.transfer, run=run)
        return [ref for dataset in datasets for ref in dataset.refs]
Ejemplo n.º 5
0
 def testDetach(self):
     datasetTypeName = "test"
     storageClass = StorageClass("testref_StructuredData")
     dimensions = self.universe.extract(("instrument", "visit"))
     dataId = dict(instrument="DummyCam", visit=42)
     datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
     ref = DatasetRef(datasetType, dataId, id=1)
     detachedRef = ref.detach()
     self.assertIsNotNone(ref.id)
     self.assertIsNone(detachedRef.id)
     self.assertEqual(ref.datasetType, detachedRef.datasetType)
     self.assertEqual(ref.dataId, detachedRef.dataId)
     self.assertEqual(ref.predictedConsumers,
                      detachedRef.predictedConsumers)
     self.assertEqual(ref.actualConsumers, detachedRef.actualConsumers)
     self.assertEqual(ref.components, detachedRef.components)
Ejemplo n.º 6
0
    def ingestExposureDatasets(self, exposure: RawExposureData, butler: Optional[Butler] = None
                               ) -> List[DatasetRef]:
        """Ingest all raw files in one exposure.

        Parameters
        ----------
        exposure : `RawExposureData`
            A structure containing information about the exposure to be
            ingested.  Must have `RawExposureData.records` populated and all
            data ID attributes expanded.
        butler : `lsst.daf.butler.Butler`, optional
            Butler to use for ingest.  If not provided, ``self.butler`` will
            be used.

        Returns
        -------
        refs : `list` of `lsst.daf.butler.DatasetRef`
            Dataset references for ingested raws.
        """
        if butler is None:
            butler = self.butler
        datasets = [FileDataset(path=os.path.abspath(file.filename),
                                refs=[DatasetRef(self.datasetType, d.dataId) for d in file.datasets],
                                formatter=file.FormatterClass)
                    for file in exposure.files]
        butler.ingest(*datasets, transfer=self.config.transfer)
        return [ref for dataset in datasets for ref in dataset.refs]
Ejemplo n.º 7
0
 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
     # Docstring inherited from DatasetRecordStorageManager.
     sql = sqlalchemy.sql.select(
         [
             self._static.dataset.columns.dataset_type_id,
             self._static.dataset.columns[self._collections.getRunForeignKeyName()],
         ]
     ).select_from(
         self._static.dataset
     ).where(
         self._static.dataset.columns.id == id
     )
     row = self._db.query(sql).fetchone()
     if row is None:
         return None
     recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
     if recordsForType is None:
         self.refresh(universe=universe)
         recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id])
         assert recordsForType is not None, "Should be guaranteed by foreign key constraints."
     return DatasetRef(
         recordsForType.datasetType,
         dataId=recordsForType.getDataId(id=id),
         id=id,
         run=self._collections[row[self._collections.getRunForeignKeyName()]].name
     )
Ejemplo n.º 8
0
 def prep(self):
     # Docstring inherited from RepoConverter.
     self.task.log.info(f"Looking for skymaps in root {self.root}.")
     for coaddName, datasetTypeName in SKYMAP_DATASET_TYPES.items():
         if not self.task.isDatasetTypeIncluded(datasetTypeName):
             continue
         try:
             exists = self.butler2.datasetExists(datasetTypeName)
         except AttributeError:
             # This mapper doesn't even define this dataset type.
             continue
         if not exists:
             continue
         instance = self.butler2.get(datasetTypeName)
         name = self.task.useSkyMap(instance, datasetTypeName)
         datasetType = DatasetType(datasetTypeName,
                                   dimensions=["skymap"],
                                   storageClass="SkyMap",
                                   universe=self.task.universe)
         dataId = DataCoordinate.standardize(skymap=name,
                                             universe=self.task.universe)
         struct = FoundSkyMap(name=name,
                              instance=instance,
                              coaddName=coaddName,
                              ref=DatasetRef(datasetType, dataId),
                              filename=self.butler2.getUri(datasetTypeName))
         self._foundSkyMapsByCoaddName[coaddName] = struct
         self.task.log.info("Found skymap %s in %s in %s.", name,
                            datasetTypeName, self.root)
     super().prep()
Ejemplo n.º 9
0
 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]:
     # Docstring inherited from DatasetRecordStorageManager.
     staticRow = {
         "dataset_type_id": self._dataset_type_id,
         self._runKeyColumn: run.key,
     }
     dataIds = list(dataIds)
     # Insert into the static dataset table, generating autoincrement
     # dataset_id values.
     with self._db.transaction():
         datasetIds = self._db.insert(self._static.dataset, *([staticRow]*len(dataIds)),
                                      returnIds=True)
         assert datasetIds is not None
         # Combine the generated dataset_id values and data ID fields to
         # form rows to be inserted into the dynamic table.
         protoDynamicRow = {
             "dataset_type_id": self._dataset_type_id,
             self._collections.getCollectionForeignKeyName(): run.key,
         }
         dynamicRows = [
             dict(protoDynamicRow, dataset_id=dataset_id, **dataId.byName())
             for dataId, dataset_id in zip(dataIds, datasetIds)
         ]
         # Insert those rows into the dynamic table.  This is where we'll
         # get any unique constraint violations.
         self._db.insert(self._dynamic, *dynamicRows)
     for dataId, datasetId in zip(dataIds, datasetIds):
         yield DatasetRef(
             datasetType=self.datasetType,
             dataId=dataId,
             id=datasetId,
             run=run.name,
         )
Ejemplo n.º 10
0
 def makeDatasetRef(self, datasetTypeName, dataUnits, storageClass, dataId, id=None):
     """Make a DatasetType and wrap it in a DatasetRef for a test"""
     datasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
     if id is None:
         self.id += 1
         id = self.id
     return DatasetRef(datasetType, dataId, id=id)
Ejemplo n.º 11
0
    def handle(self, path: str, nextDataId2,
               datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
               predicate: Callable[[DataCoordinate], bool]):
        dataId3, calibDate = self.translate(nextDataId2, partial=True)

        def get_detectors(filename):
            fitsData = lsst.afw.fits.Fits(filename, 'r')
            # NOTE: The primary header (HDU=0) does not contain detector data.
            detectors = []
            for i in range(1, fitsData.countHdus()):
                fitsData.setHdu(i)
                metadata = fitsData.readMetadata()
                detectors.append(metadata['CCDNUM'])
            return detectors

        if predicate(dataId3):
            detectors = get_detectors(path)
            refs = []
            for detector in detectors:
                newDataId3 = DataCoordinate.standardize(dataId3,
                                                        graph=self._datasetType.dimensions,
                                                        detector=detector)
                refs.append(DatasetRef(self._datasetType, newDataId3))

            datasets[self._datasetType][calibDate].append(
                FileDataset(refs=refs, path=path, formatter=self._formatter)
            )
Ejemplo n.º 12
0
    def testSorting(self):
        """Can we sort a DatasetRef"""
        ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1))
        ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10))
        ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22))

        # Enable detailed diff report
        self.maxDiff = None

        # This will sort them on visit number
        sort = sorted([ref3, ref1, ref2])
        self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}")

        # Now include a run
        ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2)
        self.assertEqual(ref1.run, "b")
        ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2)
        ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1)
        ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3)

        # This will sort them on run before visit
        sort = sorted([ref3, ref1, ref2, ref4])
        self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}")

        # Now with strings
        with self.assertRaises(TypeError):
            sort = sorted(["z", ref1, "c"])
Ejemplo n.º 13
0
    def ingestStrayLightData(self, butler, directory, *, transfer=None):
        """Ingest externally-produced y-band stray light data files into
        a data repository.

        Parameters
        ----------
        butler : `lsst.daf.butler.Butler`
            Butler initialized with the collection to ingest into.
        directory : `str`
            Directory containing yBackground-*.fits files.
        transfer : `str`, optional
            If not `None`, must be one of 'move', 'copy', 'hardlink', or
            'symlink', indicating how to transfer the files.
        """
        calibrationLabel = "y-LED-encoder-on"
        # LEDs covered up around 2018-01-01, no need for correctin after that
        # date.
        datetime_end = datetime.datetime(2018, 1, 1)
        datasets = []
        # TODO: should we use a more generic name for the dataset type?
        # This is just the (rather HSC-specific) name used in Gen2, and while
        # the instances of this dataset are camera-specific, the datasetType
        # (which is used in the generic IsrTask) should not be.
        datasetType = DatasetType("yBackground",
                                  dimensions=("physical_filter", "detector",
                                              "calibration_label"),
                                  storageClass="StrayLightData",
                                  universe=butler.registry.dimensions)
        for detector in self.getCamera():
            path = os.path.join(directory,
                                f"ybackground-{detector.getId():03d}.fits")
            if not os.path.exists(path):
                log.warn(
                    f"No stray light data found for detector {detector.getId()} @ {path}."
                )
                continue
            ref = DatasetRef(datasetType,
                             dataId={
                                 "instrument": self.getName(),
                                 "detector": detector.getId(),
                                 "physical_filter": "HSC-Y",
                                 "calibration_label": calibrationLabel
                             })
            datasets.append(
                FileDataset(ref=ref,
                            path=path,
                            formatter=SubaruStrayLightDataFormatter))
        with butler.transaction():
            butler.registry.registerDatasetType(datasetType)
            butler.registry.insertDimensionData(
                "calibration_label", {
                    "instrument": self.getName(),
                    "name": calibrationLabel,
                    "datetime_begin": datetime.date.min,
                    "datetime_end": datetime_end
                })
            butler.ingest(*datasets, transfer=transfer)
Ejemplo n.º 14
0
 def handle(self, path: str, nextDataId2,
            datasets: Mapping[DatasetType, List[FileDataset]], *, log: Log,
            predicate: Callable[[DataCoordinate], bool]):
     # Docstring inherited from ParsedPathElementHandler.
     dataId3 = self.translate(nextDataId2, partial=False, log=log)
     if predicate(dataId3):
         datasets[self._datasetType].append(
             FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
                         path=path))
Ejemplo n.º 15
0
    def comparableRef(self, ref: DatasetRef) -> DatasetRef:
        """Return a DatasetRef that can be compared to a DatasetRef from
        other repository.

        For repositories that do not support round-trip of ID values this
        method returns unresolved DatasetRef, for round-trip-safe repos it
        returns unchanged ref.
        """
        return ref if self.datasetsIdType is uuid.UUID else ref.unresolved()
Ejemplo n.º 16
0
 def _makeDSRefVisit(self, dstype, visitId, universe):
     return DatasetRef(datasetType=dstype,
                       dataId=DataCoordinate.standardize(
                           detector="X",
                           visit=visitId,
                           physical_filter='a',
                           abstract_filter='b',
                           instrument='TestInstrument',
                           universe=universe))
Ejemplo n.º 17
0
 def makeDatasetRef(self, datasetTypeName, dataId=None):
     """Make a simple DatasetRef"""
     if dataId is None:
         dataId = self.dataId
     if datasetTypeName not in self.datasetTypes:
         self.datasetTypes[datasetTypeName] = DatasetType(
             datasetTypeName, list(dataId.keys()), StorageClass())
     datasetType = self.datasetTypes[datasetTypeName]
     return DatasetRef(datasetType, dataId)
Ejemplo n.º 18
0
 def makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None,
                    conform=True):
     """Make a DatasetType and wrap it in a DatasetRef for a test"""
     datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
     if id is None:
         self.id += 1
         id = self.id
     if run is None:
         run = "dummy"
     return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
Ejemplo n.º 19
0
    def testAddInputsOutputs(self):
        """Test of addPredictedInput() method.
        """
        quantum = Quantum(taskName="some.task.object", run=None)

        # start with empty
        self.assertEqual(quantum.predictedInputs, dict())
        universe = DimensionUniverse()
        instrument = "DummyCam"
        datasetTypeName = "test_ds"
        storageClass = StorageClass("testref_StructuredData")
        datasetType = DatasetType(datasetTypeName,
                                  universe.extract(("instrument", "visit")),
                                  storageClass)

        # add one ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addPredictedInput(ref)
        self.assertIn(datasetTypeName, quantum.predictedInputs)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 1)
        # add second ref
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addPredictedInput(ref)
        self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 2)

        # mark last ref as actually used
        self.assertEqual(quantum.actualInputs, dict())
        quantum._markInputUsed(ref)
        self.assertIn(datasetTypeName, quantum.actualInputs)
        self.assertEqual(len(quantum.actualInputs[datasetTypeName]), 1)

        # add couple of outputs too
        self.assertEqual(quantum.outputs, dict())
        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42))
        quantum.addOutput(ref)
        self.assertIn(datasetTypeName, quantum.outputs)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 1)

        ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43))
        quantum.addOutput(ref)
        self.assertEqual(len(quantum.outputs[datasetTypeName]), 2)
Ejemplo n.º 20
0
 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
     # Docstring inherited from DatastoreRegistryBridge
     sql = sqlalchemy.sql.select([
         self._tables.dataset_location.columns.datastore_name
     ]).select_from(self._tables.dataset_location).where(
         self._tables.dataset_location.columns.dataset_id ==
         ref.getCheckedId())
     for row in self._db.query(sql).fetchall():
         yield row[self._tables.dataset_location.columns.datastore_name]
     for name, bridge in self._ephemeral.items():
         if ref in bridge:
             yield name
Ejemplo n.º 21
0
 def handle(self, path: str, nextDataId2,
            datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
            predicate: Callable[[DataCoordinate], bool]):
     # Docstring inherited from ParsedPathElementHandler.
     dataId3, calibDate = self.translate(nextDataId2, partial=False)
     if predicate(dataId3):
         datasets[self._datasetType][calibDate].append(
             FileDataset(
                 refs=[DatasetRef(self._datasetType, dataId3)],
                 path=path, formatter=self._formatter
             )
         )
Ejemplo n.º 22
0
 def _makeDSRefVisit(self, dstype, visitId, universe):
     return DatasetRef(
         datasetType=dstype,
         dataId=DataCoordinate.standardize(
             detector="X",
             visit=visitId,
             physical_filter="a",
             band="b",
             instrument="TestInstrument",
             universe=universe,
         ),
     )
Ejemplo n.º 23
0
    def testConstructor(self):
        """Test of constructor.
        """
        # Quantum specific arguments
        taskName = "some.task.object"  # can't use a real PipelineTask due to inverted package dependency

        quantum = Quantum(taskName=taskName)
        self.assertEqual(quantum.taskName, taskName)
        self.assertEqual(quantum.initInputs, {})
        self.assertEqual(quantum.inputs, NamedKeyDict())
        self.assertEqual(quantum.outputs, {})
        self.assertIsNone(quantum.dataId)

        universe = DimensionUniverse()
        instrument = "DummyCam"
        datasetTypeName = "test_ds"
        storageClass = StorageClass("testref_StructuredData")
        datasetType = DatasetType(datasetTypeName,
                                  universe.extract(("instrument", "visit")),
                                  storageClass)
        predictedInputs = {
            datasetType: [
                DatasetRef(datasetType, dict(instrument=instrument, visit=42)),
                DatasetRef(datasetType, dict(instrument=instrument, visit=43))
            ]
        }
        outputs = {
            datasetType: [
                DatasetRef(datasetType, dict(instrument=instrument, visit=42)),
                DatasetRef(datasetType, dict(instrument=instrument, visit=43))
            ]
        }

        quantum = Quantum(taskName=taskName,
                          inputs=predictedInputs,
                          outputs=outputs)
        self.assertEqual(len(quantum.inputs[datasetType]), 2)
        self.assertEqual(len(quantum.outputs[datasetType]), 2)
Ejemplo n.º 24
0
 def testConstructor(self):
     """Test that construction preserves and validates values.
     """
     # Construct an unresolved ref.
     ref = DatasetRef(self.datasetType, self.dataId)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, self.dataId, msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     self.assertIsNone(ref.components)
     # Constructing an unresolved ref with run and/or components should
     # fail.
     run = "somerun"
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, run=run)
     components = {
         "a": DatasetRef(self.datasetType.makeComponentDatasetType("a"), self.dataId, id=2, run=run)
     }
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, components=components)
     # Passing a data ID that is missing dimensions should fail.
     with self.assertRaises(KeyError):
         DatasetRef(self.datasetType, {"instrument": "DummyCam"})
     # Constructing a resolved ref should preserve run and components,
     # as well as everything else.
     ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run, components=components)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, self.dataId, msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     self.assertEqual(ref.id, 1)
     self.assertEqual(ref.run, run)
     self.assertEqual(ref.components, components)
     # Constructing a resolved ref with bad component storage classes
     # should fail.
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, id=1, run=run, components={"b": components["a"]})
     # Constructing a resolved ref with unresolved components should fail.
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, id=1, run=run,
                    components={"a": components["a"].unresolved()})
     # Constructing a resolved ref with bad component names should fail.
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, id=1, run=run,
                    components={"c": components["a"]})
Ejemplo n.º 25
0
 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
     # Docstring inherited from DatasetRecordStorageManager.
     assert dataId.graph == self.datasetType.dimensions
     sql = self.select(collection=collection, dataId=dataId, id=SimpleQuery.Select,
                       run=SimpleQuery.Select).combine()
     row = self._db.query(sql).fetchone()
     if row is None:
         return None
     return DatasetRef(
         datasetType=self.datasetType,
         dataId=dataId,
         id=row["id"],
         run=self._collections[row[self._runKeyColumn]].name
     )
Ejemplo n.º 26
0
    def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass",
                       run="run2", conform=True):
        """Make a simple DatasetRef"""
        if dataId is None:
            dataId = self.dataId

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None

        datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()),
                                  StorageClass(storageClassName),
                                  parentStorageClass=parentStorageClass)
        return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
Ejemplo n.º 27
0
 def makeDatasetRef(self,
                    datasetTypeName,
                    dimensions,
                    storageClass,
                    dataId,
                    id=None,
                    run=None):
     """Make a DatasetType and wrap it in a DatasetRef for a test"""
     datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
     if id is None:
         self.id += 1
         id = self.id
     if run is None:
         run = Run(id=1, collection="dummy")
     return DatasetRef(datasetType, dataId, id=id, run=run)
Ejemplo n.º 28
0
 def testConstructor(self):
     """Test construction preserves values.
     """
     datasetTypeName = "test"
     storageClass = StorageClass("testref_StructuredData")
     dimensions = self.universe.extract(("instrument", "visit"))
     dataId = dict(instrument="DummyCam", visit=42)
     datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
     ref = DatasetRef(datasetType, dataId)
     self.assertEqual(ref.datasetType, datasetType)
     self.assertEqual(ref.dataId, dataId)
     self.assertIsNone(ref.producer)
     self.assertEqual(ref.predictedConsumers, dict())
     self.assertEqual(ref.actualConsumers, dict())
     self.assertEqual(ref.components, dict())
Ejemplo n.º 29
0
    def _makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None,
                        conform=True):
        # helper for makeDatasetRef

        # Pretend we have a parent if this looks like a composite
        compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName)
        parentStorageClass = StorageClass("component") if componentName else None

        datasetType = DatasetType(datasetTypeName, dimensions, storageClass,
                                  parentStorageClass=parentStorageClass)
        if id is None:
            self.id += 1
            id = self.id
        if run is None:
            run = "dummy"
        return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
Ejemplo n.º 30
0
 def insert(self, run: RunRecord,
            dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]:
     # Docstring inherited from DatasetRecordStorage.
     staticRow = {
         "dataset_type_id": self._dataset_type_id,
         self._runKeyColumn: run.key,
     }
     # Iterate over data IDs, transforming a possibly-single-pass iterable
     # into a list, and remembering any governor dimension values we see.
     governorValues = GovernorDimensionRestriction.makeEmpty(
         self.datasetType.dimensions.universe)
     dataIdList = []
     for dataId in dataIds:
         dataIdList.append(dataId)
         governorValues.update_extract(dataId)
     with self._db.transaction():
         # Insert into the static dataset table, generating autoincrement
         # dataset_id values.
         datasetIds = self._db.insert(self._static.dataset,
                                      *([staticRow] * len(dataIdList)),
                                      returnIds=True)
         assert datasetIds is not None
         # Update the summary tables for this collection in case this is the
         # first time this dataset type or these governor values will be
         # inserted there.
         self._summaries.update(run, self.datasetType,
                                self._dataset_type_id, governorValues)
         # Combine the generated dataset_id values and data ID fields to
         # form rows to be inserted into the tags table.
         protoTagsRow = {
             "dataset_type_id": self._dataset_type_id,
             self._collections.getCollectionForeignKeyName(): run.key,
         }
         tagsRows = [
             dict(protoTagsRow, dataset_id=dataset_id, **dataId.byName())
             for dataId, dataset_id in zip(dataIdList, datasetIds)
         ]
         # Insert those rows into the tags table.  This is where we'll
         # get any unique constraint violations.
         self._db.insert(self._tags, *tagsRows)
     for dataId, datasetId in zip(dataIdList, datasetIds):
         yield DatasetRef(
             datasetType=self.datasetType,
             dataId=dataId,
             id=datasetId,
             run=run.name,
         )