Esempio n. 1
0
 def testConstructor(self):
     """Test that construction preserves and validates values.
     """
     # Construct an unresolved ref.
     ref = DatasetRef(self.datasetType, self.dataId)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
                      msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     # Constructing an unresolved ref with run and/or components should
     # fail.
     run = "somerun"
     with self.assertRaises(ValueError):
         DatasetRef(self.datasetType, self.dataId, run=run)
     # Passing a data ID that is missing dimensions should fail.
     with self.assertRaises(KeyError):
         DatasetRef(self.datasetType, {"instrument": "DummyCam"})
     # Constructing a resolved ref should preserve run as well as everything
     # else.
     ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run)
     self.assertEqual(ref.datasetType, self.datasetType)
     self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe),
                      msg=ref.dataId)
     self.assertIsInstance(ref.dataId, DataCoordinate)
     self.assertEqual(ref.id, 1)
     self.assertEqual(ref.run, run)
Esempio n. 2
0
    def splitByStateFlags(self,
                          dataIds: Optional[DataCoordinateSequence] = None,
                          *,
                          expanded: bool = True,
                          complete: bool = True,
                          minimal: bool = True) -> SplitByStateFlags:
        """Given a sequence of data IDs, generate new equivalent sequences
        containing less information.

        Parameters
        ----------
        dataIds : `DataCoordinateSequence`, optional.
            Data IDs to start from.  Defaults to ``self.allDataIds``.
            ``dataIds.hasRecords()`` and ``dataIds.hasFull()`` must both return
            `True`.
        expanded : `bool`, optional
            If `True` (default) include the original data IDs that contain all
            information in the result.
        complete : `bool`, optional
            If `True` (default) include data IDs for which ``hasFull()``
            returns `True` but ``hasRecords()`` does not.
        minimal : `bool`, optional
            If `True` (default) include data IDS that only contain values for
            required dimensions, for which ``hasFull()`` may not return `True`.

        Returns
        -------
        split : `SplitByStateFlags`
            A dataclass holding the indicated data IDs in attributes that
            correspond to the boolean keyword arguments.
        """
        if dataIds is None:
            dataIds = self.allDataIds
        assert dataIds.hasFull() and dataIds.hasRecords()
        result = SplitByStateFlags(expanded=dataIds)
        if complete:
            result.complete = DataCoordinateSequence([
                DataCoordinate.standardize(e.full.byName(),
                                           graph=dataIds.graph)
                for e in result.expanded
            ],
                                                     graph=dataIds.graph)
            self.assertTrue(result.complete.hasFull())
            self.assertFalse(result.complete.hasRecords())
        if minimal:
            result.minimal = DataCoordinateSequence([
                DataCoordinate.standardize(e.byName(), graph=dataIds.graph)
                for e in result.expanded
            ],
                                                    graph=dataIds.graph)
            self.assertEqual(result.minimal.hasFull(),
                             not dataIds.graph.implied)
            self.assertFalse(result.minimal.hasRecords())
        if not expanded:
            result.expanded = None
        return result
Esempio n. 3
0
 def visits(self):
     butler = Butler(self.root, collections=[self.outputRun])
     return {
         DataCoordinate.standardize(instrument="MegaPrime",
                                    visit=1038843,
                                    universe=butler.registry.dimensions): [
             DataCoordinate.standardize(instrument="MegaPrime",
                                        exposure=1038843,
                                        universe=butler.registry.dimensions)
         ]
     }
Esempio n. 4
0
 def translate(self,
               dataId2: dict,
               *,
               partial: bool = False,
               log: Log) -> Optional[DataCoordinate]:
     # Docstring inherited from PathElementHandler.
     rawDataId3 = self._translator(dataId2, partial=partial, log=log)
     if partial:
         return DataCoordinate.standardize(
             rawDataId3, universe=self._datasetType.dimensions.universe)
     else:
         return DataCoordinate.standardize(
             rawDataId3, graph=self._datasetType.dimensions)
Esempio n. 5
0
    def makeGraph(self, pipeline, collections, run, userQuery):
        """Create execution graph for a pipeline.

        Parameters
        ----------
        pipeline : `Pipeline`
            Pipeline definition, task names/classes and their configs.
        collections
            Expressions representing the collections to search for input
            datasets.  May be any of the types accepted by
            `lsst.daf.butler.CollectionSearch.fromExpression`.
        run : `str`, optional
            Name of the `~lsst.daf.butler.CollectionType.RUN` collection for
            output datasets, if it already exists.
        userQuery : `str`
            String which defines user-defined selection for registry, should be
            empty or `None` if there is no restrictions on data selection.

        Returns
        -------
        graph : `QuantumGraph`

        Raises
        ------
        UserExpressionError
            Raised when user expression cannot be parsed.
        OutputExistsError
            Raised when output datasets already exist.
        Exception
            Other exceptions types may be raised by underlying registry
            classes.
        """
        scaffolding = _PipelineScaffolding(pipeline, registry=self.registry)

        instrument = pipeline.getInstrument()
        if isinstance(instrument, str):
            instrument = doImport(instrument)
        if instrument is not None:
            dataId = DataCoordinate.standardize(
                instrument=instrument.getName(),
                universe=self.registry.dimensions)
        else:
            dataId = DataCoordinate.makeEmpty(self.registry.dimensions)
        with scaffolding.connectDataIds(self.registry, collections, userQuery,
                                        dataId) as commonDataIds:
            scaffolding.resolveDatasetRefs(self.registry,
                                           collections,
                                           run,
                                           commonDataIds,
                                           skipExisting=self.skipExisting)
        return scaffolding.makeQuantumGraph()
Esempio n. 6
0
 def translate(self, dataId2: dict, *, partial: bool = False
               ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
     # Docstring inherited from PathElementHandler.
     rawDataId3, calibDate = self._translator(dataId2, partial=partial)
     if partial:
         return (
             DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
             calibDate,
         )
     else:
         return (
             DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions),
             calibDate
         )
Esempio n. 7
0
    def getDataId(self, id: DatasetId) -> DataCoordinate:
        """Return DataId for a dataset.

        Parameters
        ----------
        id : `DatasetId`
            Unique dataset identifier.

        Returns
        -------
        dataId : `DataCoordinate`
            DataId for the dataset.
        """
        # This query could return multiple rows (one for each tagged collection
        # the dataset is in, plus one for its run collection), and we don't
        # care which of those we get.
        sql = self._tags.select().where(
            sqlalchemy.sql.and_(
                self._tags.columns.dataset_id == id,
                self._tags.columns.dataset_type_id ==
                self._dataset_type_id)).limit(1)
        row = self._db.query(sql).fetchone()
        assert row is not None, "Should be guaranteed by caller and foreign key constraints."
        return DataCoordinate.standardize(
            {
                dimension.name: row[dimension.name]
                for dimension in self.datasetType.dimensions.required
            },
            graph=self.datasetType.dimensions)
Esempio n. 8
0
    def _calculate_dataset_info(self, header, filename):
        """Calculate a RawFileDatasetInfo from the supplied information.

        Parameters
        ----------
        header : `Mapping`
            Header from the dataset.
        filename : `str`
            Filename to use for error messages.

        Returns
        -------
        dataset : `RawFileDatasetInfo`
            The region, dataId, and observation information associated with
            this dataset.
        """
        obsInfo = ObservationInfo(header)
        dataId = DataCoordinate.standardize(instrument=obsInfo.instrument,
                                            exposure=obsInfo.exposure_id,
                                            detector=obsInfo.detector_num,
                                            universe=self.universe)
        if obsInfo.instrument != self.instrument.getName():
            raise ValueError(f"Incorrect instrument (expected {self.instrument.getName()}, "
                             f"got {obsInfo.instrument}) for file {filename}.")

        FormatterClass = self.instrument.getRawFormatter(dataId)
        region = self._calculate_region_from_dataset_metadata(obsInfo, header, FormatterClass)
        return RawFileDatasetInfo(obsInfo=obsInfo, region=region, dataId=dataId)
Esempio n. 9
0
    def testSkyMapPacking(self):
        """Test that packing Tract+Patch into an integer in Gen3 works and is
        self-consistent.

        Note that this packing does *not* use the same algorithm as Gen2 and
        hence generates different IDs, because the Gen2 algorithm is
        problematically tied to the *default* SkyMap for a particular camera,
        rather than the SkyMap actually used.
        """
        # SkyMap used by ci_hsc has only one tract, so the test coverage in
        # that area isn't great.  That's okay because that's tested in SkyMap;
        # what we care about here is that the converted repo has the necessary
        # metadata to construct and use these packers at all.
        for patch in [0, 43, 52]:
            dataId = self.butler.registry.expandDataId(
                skymap="discrete/ci_hsc", tract=0, patch=patch, band='r')
            packer1 = self.butler.registry.dimensions.makePacker(
                "tract_patch", dataId)
            packer2 = self.butler.registry.dimensions.makePacker(
                "tract_patch_band", dataId)
            self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
            self.assertEqual(
                packer1.unpack(packer1.pack(dataId)),
                DataCoordinate.standardize(dataId, graph=packer1.dimensions))
            self.assertEqual(packer2.unpack(packer2.pack(dataId)), dataId)
            self.assertEqual(packer1.pack(dataId, band='i'),
                             packer1.pack(dataId))
            self.assertNotEqual(packer2.pack(dataId, band='i'),
                                packer2.pack(dataId))
Esempio n. 10
0
 def prep(self):
     # Docstring inherited from RepoConverter.
     self.task.log.info(f"Looking for skymaps in root {self.root}.")
     for coaddName, datasetTypeName in SKYMAP_DATASET_TYPES.items():
         if not self.task.isDatasetTypeIncluded(datasetTypeName):
             continue
         try:
             exists = self.butler2.datasetExists(datasetTypeName)
         except AttributeError:
             # This mapper doesn't even define this dataset type.
             continue
         if not exists:
             continue
         instance = self.butler2.get(datasetTypeName)
         name = self.task.useSkyMap(instance, datasetTypeName)
         datasetType = DatasetType(datasetTypeName,
                                   dimensions=["skymap"],
                                   storageClass="SkyMap",
                                   universe=self.task.universe)
         dataId = DataCoordinate.standardize(skymap=name,
                                             universe=self.task.universe)
         struct = FoundSkyMap(name=name,
                              instance=instance,
                              coaddName=coaddName,
                              ref=DatasetRef(datasetType, dataId),
                              filename=self.butler2.getUri(datasetTypeName))
         self._foundSkyMapsByCoaddName[coaddName] = struct
         self.task.log.info("Found skymap %s in %s in %s.", name,
                            datasetTypeName, self.root)
     super().prep()
Esempio n. 11
0
    def handle(self, path: str, nextDataId2,
               datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
               predicate: Callable[[DataCoordinate], bool]):
        dataId3, calibDate = self.translate(nextDataId2, partial=True)

        def get_detectors(filename):
            fitsData = lsst.afw.fits.Fits(filename, 'r')
            # NOTE: The primary header (HDU=0) does not contain detector data.
            detectors = []
            for i in range(1, fitsData.countHdus()):
                fitsData.setHdu(i)
                metadata = fitsData.readMetadata()
                detectors.append(metadata['CCDNUM'])
            return detectors

        if predicate(dataId3):
            detectors = get_detectors(path)
            refs = []
            for detector in detectors:
                newDataId3 = DataCoordinate.standardize(dataId3,
                                                        graph=self._datasetType.dimensions,
                                                        detector=detector)
                refs.append(DatasetRef(self._datasetType, newDataId3))

            datasets[self._datasetType][calibDate].append(
                FileDataset(refs=refs, path=path, formatter=self._formatter)
            )
Esempio n. 12
0
 def translate(self, dataId2: dict, *, partial: bool = False
               ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
     assert partial is True, "We always require partial, to ignore 'ccdnum'"
     rawDataId3, calibDate = self._translator(dataId2, partial=partial)
     return (
         DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
         calibDate,
     )
Esempio n. 13
0
 def setUp(self):
     self.butler = Butler(os.path.join(getPackageDir("ci_hsc_gen3"), "DATA"), writeable=False,
                          collections=["HSC/calib/2013-06-17", "HSC/runs/ci_hsc"])
     # We need to provide a physical_filter value to fully identify a flat,
     # but this still leaves the band as an implied value that this data ID
     # doesn't know.
     self.flatMinimalDataId = DataCoordinate.standardize(
         instrument="HSC", detector=0, physical_filter="HSC-R",
         universe=self.butler.registry.dimensions,
     )
     # For a calexp, the minimal data ID just has exposure and detector,
     # so both band and physical_filter are implied and not known here.
     self.calexpMinimalDataId = DataCoordinate.standardize(
         instrument="HSC", detector=100, visit=903334,
         universe=self.butler.registry.dimensions,
     )
     # Parameters with bbox to test that logic still works on subimage gets.
     self.parameters = {"bbox": Box2I(Point2I(0, 0), Point2I(8, 7))}
Esempio n. 14
0
 def _makeDSRefVisit(self, dstype, visitId, universe):
     return DatasetRef(datasetType=dstype,
                       dataId=DataCoordinate.standardize(
                           detector="X",
                           visit=visitId,
                           physical_filter='a',
                           abstract_filter='b',
                           instrument='TestInstrument',
                           universe=universe))
Esempio n. 15
0
    def _calculate_dataset_info(self, header, filename):
        """Calculate a RawFileDatasetInfo from the supplied information.

        Parameters
        ----------
        header : `Mapping`
            Header from the dataset.
        filename : `str`
            Filename to use for error messages.

        Returns
        -------
        dataset : `RawFileDatasetInfo`
            The dataId, and observation information associated with this
            dataset.
        """
        # To ensure we aren't slowed down for no reason, explicitly
        # list here the properties we need for the schema
        # Use a dict with values a boolean where True indicates
        # that it is required that we calculate this property.
        ingest_subset = {
            "altaz_begin": False,
            "boresight_rotation_coord": False,
            "boresight_rotation_angle": False,
            "dark_time": False,
            "datetime_begin": True,
            "datetime_end": True,
            "detector_num": True,
            "exposure_group": False,
            "exposure_id": True,
            "exposure_time": True,
            "instrument": True,
            "tracking_radec": False,
            "object": False,
            "observation_counter": False,
            "observation_id": True,
            "observation_reason": False,
            "observation_type": True,
            "observing_day": False,
            "physical_filter": True,
            "science_program": False,
            "visit_id": False,
        }

        obsInfo = ObservationInfo(
            header,
            pedantic=False,
            filename=filename,
            required={k
                      for k in ingest_subset if ingest_subset[k]},
            subset=set(ingest_subset))

        dataId = DataCoordinate.standardize(instrument=obsInfo.instrument,
                                            exposure=obsInfo.exposure_id,
                                            detector=obsInfo.detector_num,
                                            universe=self.universe)
        return RawFileDatasetInfo(obsInfo=obsInfo, dataId=dataId)
Esempio n. 16
0
 def unpack(self, packedId: int) -> DataCoordinate:
     # Docstring inherited from DataIdPacker.unpack
     d = {"skymap": self._skyMapName}
     if self._filterMax is not None:
         d["band"] = self.getFilterNameFromInt(packedId //
                                               self._tractPatchMax)
         packedId %= self._tractPatchMax
     d["tract"] = packedId // self._patchMax
     d["patch"] = packedId % self._patchMax
     return DataCoordinate.standardize(d, graph=self.dimensions)
Esempio n. 17
0
    def setUp(self):
        self.id = 0
        self.factory = FormatterFactory()
        self.universe = DimensionUniverse()
        self.dataId = DataCoordinate.makeEmpty(self.universe)

        # Dummy FileDescriptor for testing getFormatter
        self.fileDescriptor = FileDescriptor(
            Location("/a/b/c", "d"),
            StorageClass("DummyStorageClass", dict, None))
Esempio n. 18
0
 def unpack(self, packedId):
     # Docstring inherited from DimensionPacker.unpack
     observation, detector = divmod(packedId, self._detectorMax)
     return DataCoordinate.standardize(
         {
             "instrument": self._instrumentName,
             "detector": detector,
             self._observationName: observation,
         },
         graph=self.dimensions
     )
Esempio n. 19
0
 def testWithoutFilter(self):
     dimensions = DimensionGraph(universe=self.universe,
                                 names=["tract", "patch"])
     dataId = DataCoordinate.standardize(skymap=self.fixed["skymap"],
                                         tract=2,
                                         patch=6,
                                         universe=self.universe)
     packer = SkyMapDimensionPacker(self.fixed, dimensions)
     packedId = packer.pack(dataId)
     self.assertLessEqual(packedId.bit_length(), packer.maxBits)
     self.assertEqual(packer.unpack(packedId), dataId)
Esempio n. 20
0
 def _makeDSRefVisit(self, dstype, visitId, universe):
     return DatasetRef(
         datasetType=dstype,
         dataId=DataCoordinate.standardize(
             detector="X",
             visit=visitId,
             physical_filter="a",
             band="b",
             instrument="TestInstrument",
             universe=universe,
         ),
     )
Esempio n. 21
0
 def setUp(self):
     self.universe = DimensionUniverse()
     self.fixed = DataCoordinate.fromFullValues(
         DimensionGraph(universe=self.universe, names=["skymap"]),
         values=("unimportant", ),
     ).expanded(
         records={
             "skymap":
             self.universe["skymap"].RecordClass(
                 name="unimportant",
                 tract_max=5,
                 patch_nx_max=3,
                 patch_ny_max=3,
             )
         })
Esempio n. 22
0
    def _makeDatasetId(self, run: RunRecord, dataId: DataCoordinate,
                       idGenerationMode: DatasetIdGenEnum) -> uuid.UUID:
        """Generate dataset ID for a dataset.

        Parameters
        ----------
        run : `RunRecord`
            The record object describing the RUN collection for the dataset.
        dataId : `DataCoordinate`
            Expanded data ID for the dataset.
        idGenerationMode : `DatasetIdGenEnum`
            ID generation option. `~DatasetIdGenEnum.UNIQUE` make a random
            UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
            deterministic UUID5-type ID based on a dataset type name and
            ``dataId``.  `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
            deterministic UUID5-type ID based on a dataset type name, run
            collection name, and ``dataId``.

        Returns
        -------
        datasetId : `uuid.UUID`
            Dataset identifier.
        """
        if idGenerationMode is DatasetIdGenEnum.UNIQUE:
            return uuid.uuid4()
        else:
            # WARNING: If you modify this code make sure that the order of
            # items in the `items` list below never changes.
            items: List[Tuple[str, str]] = []
            if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
                items = [
                    ("dataset_type", self.datasetType.name),
                ]
            elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
                items = [
                    ("dataset_type", self.datasetType.name),
                    ("run", run.name),
                ]
            else:
                raise ValueError(
                    f"Unexpected ID generation mode: {idGenerationMode}")

            for name, value in sorted(dataId.byName().items()):
                items.append((name, str(value)))
            data = ",".join(f"{key}={value}" for key, value in items)
            return uuid.uuid5(self.NS_UUID, data)
Esempio n. 23
0
 def getDataId(self, id: int) -> DataCoordinate:
     # Docstring inherited from DatasetRecordStorageManager.
     # This query could return multiple rows (one for each tagged collection
     # the dataset is in, plus one for its run collection), and we don't
     # care which of those we get.
     sql = self._dynamic.select().where(
         sqlalchemy.sql.and_(
             self._dynamic.columns.dataset_id == id,
             self._dynamic.columns.dataset_type_id == self._dataset_type_id
         )
     ).limit(1)
     row = self._db.query(sql).fetchone()
     assert row is not None, "Should be guaranteed by caller and foreign key constraints."
     return DataCoordinate.standardize(
         {dimension.name: row[dimension.name] for dimension in self.datasetType.dimensions.required},
         graph=self.datasetType.dimensions
     )
Esempio n. 24
0
def _refFromConnection(butler: Butler, connection: DimensionedConnection,
                       dataId: DataId, **kwargs: Any) -> DatasetRef:
    """Create a DatasetRef for a connection in a collection.

    Parameters
    ----------
    butler : `lsst.daf.butler.Butler`
        The collection to point to.
    connection : `lsst.pipe.base.connectionTypes.DimensionedConnection`
        The connection defining the dataset type to point to.
    dataId
        The data ID for the dataset to point to.
    **kwargs
        Additional keyword arguments used to augment or construct
        a `~lsst.daf.butler.DataCoordinate`.

    Returns
    -------
    ref : `lsst.daf.butler.DatasetRef`
        A reference to a dataset compatible with ``connection``, with ID
        ``dataId``, in the collection pointed to by ``butler``.
    """
    universe = butler.registry.dimensions
    # DatasetRef only tests if required dimension is missing, but not extras
    _checkDimensionsMatch(universe, set(connection.dimensions), dataId.keys())
    dataId = DataCoordinate.standardize(dataId, **kwargs, universe=universe)

    # skypix is a PipelineTask alias for "some spatial index", Butler doesn't
    # understand it. Code copied from TaskDatasetTypes.fromTaskDef
    if "skypix" in connection.dimensions:
        datasetType = butler.registry.getDatasetType(connection.name)
    else:
        datasetType = connection.makeDatasetType(universe)

    try:
        butler.registry.getDatasetType(datasetType.name)
    except KeyError:
        raise ValueError(f"Invalid dataset type {connection.name}.")
    try:
        ref = DatasetRef(datasetType=datasetType, dataId=dataId)
        return ref
    except KeyError as e:
        raise ValueError(
            f"Dataset type ({connection.name}) and ID {dataId.byName()} not compatible."
        ) from e
Esempio n. 25
0
    def testReadingBadNewFileWithFullDataId(self):
        """If we try to read a new calexp with a full data ID, the reader
        should check the filters in the file for consistency with the data ID
        (and in this case, find them inconsistent, which should result in
        warnings and returning what's in the data ID).
        """
        self.skip_mock()
        calexpBadDataId = DataCoordinate.standardize(
            self.calexpMinimalDataId,
            band="g",
            physical_filter="HSC-G",
            visit_system=0,
        )
        self.assertTrue(calexpBadDataId.hasFull())

        # Some tests are only relevant when reading full calexps.
        # By definition a disassembled exposure will have a correct
        # filterlabel written out.
        # In this situation the test becomes moot since the filterLabel
        # formatter will not force a correct filter label into an
        # incorrect filter label based on DataId.
        _, components = self.butler.getURIs("calexp", calexpBadDataId)
        if components:
            raise unittest.SkipTest("Test not relevant because composite has been disassembled")

        with self.assertWarns(Warning):
            calexp = self.butler.get("calexp", calexpBadDataId)
        with self.assertWarns(Warning):
            calexpFilterLabel = self.butler.get("calexp.filter", calexpBadDataId)
        self.assertEqual(calexp.getFilter(), calexpFilterLabel)
        self.assertEqual(calexp.getFilter().bandLabel, calexpBadDataId["band"])
        self.assertEqual(calexp.getFilter().physicalLabel, calexpBadDataId["physical_filter"])
        self.assertEqual(calexpFilterLabel.bandLabel, calexpBadDataId["band"])
        self.assertEqual(calexpFilterLabel.physicalLabel, calexpBadDataId["physical_filter"])
        with self.assertWarns(Warning):
            calexpSub = self.butler.get("calexp", calexpBadDataId, parameters=self.parameters)
        self.assertEqual(calexp.getFilter(), calexpSub.getFilter())
Esempio n. 26
0
def ref_from_connection(butler, connection, data_id):
    """Create a DatasetRef for a connection in a collection.

    Parameters
    ----------
    butler : `lsst.daf.butler.Butler`
        The collection to point to.
    connection : `lsst.pipe.base.connectionTypes.DimensionedConnection`
        The connection defining the dataset type to point to.
    data_id : `Mapping` [`str`] or `lsst.daf.butler.DataCoordinate`
        The data ID for the dataset to point to.

    Returns
    -------
    ref : `lsst.daf.butler.DatasetRef`
        A reference to a dataset compatible with ``connection``, with ID
        ``data_id``, in the collection pointed to by ``butler``.
    """
    universe = butler.registry.dimensions
    data_id = DataCoordinate.standardize(data_id, universe=universe)
    return DatasetRef(
        datasetType=connection.makeDatasetType(universe),
        dataId=data_id,
    )
Esempio n. 27
0
    def pack_data_id(self, tract, patch, band=None):
        """Pack a skymap-based data ID into an integer.

        Parameters
        ----------
        tract : `int`
            Integer ID for the tract.
        patch : `tuple` (`int`) or `int`
            Either a 2-element (x, y) tuple (Gen2 patch ID) or a single integer
            (Gen3 patch ID, corresponding to the "sequential" patch index
            methods in this package).
        band : `str`, optional
            If provided, a filter name present in
            `SkyMapDimensionPacker.SUPPORTED_FILTERS` (which is aspirationally
            a list of all Gen3 'bands', but in practice may be missing some;
            see RFC-785).  If not provided, the packing algorithm that does
            not include the filter will be used.

        Returns
        -------
        packed : `int`
            Integer that corresponds to the data ID.
        max_bits : `int`
            Maximum number of bits that ``packed`` could have, assuming this
            skymap and presence or absence of ``band``.

        Notes
        -----
        This method uses a Gen3 `lsst.daf.butler.DimensionPacker` object under
        the hood to guarantee consistency with pure Gen3 code, but it does not
        require the caller to actually have a Gen3 butler available.  It does,
        however, require a filter value compatible with the Gen3 "band"
        dimension.

        This is a temporary interface intended to aid with the migration from
        Gen2 to Gen3 middleware.  It will be removed with the Gen2 middleware
        or when DM-31924 provides a longer-term replacement, whichever comes
        first.  Pure Gen3 code should use `lsst.daf.butler.DataCoordinate.pack`
        or other `lsst.daf.butler.DimensionPacker` interfaces.
        """
        from lsst.daf.butler import DataCoordinate, DimensionUniverse
        universe = DimensionUniverse()
        dummy_skymap_name = "unimportant"  # only matters to Gen3 registry
        tract_info = self[tract]
        patch_info = tract_info[patch]
        nx, ny = tract_info.getNumPatches()
        skymap_record = universe["skymap"].RecordClass(
            name=dummy_skymap_name,
            hash=self.getSha1(),
            tract_max=len(self),
            patch_nx_max=
            nx,  # assuming these are the same for all tracts for now
            patch_ny_max=ny,
        )
        skymap_data_id = DataCoordinate.standardize(
            skymap=dummy_skymap_name,
            universe=universe,
        ).expanded(records={"skymap": skymap_record}, )
        full_data_id = DataCoordinate.standardize(
            skymap=dummy_skymap_name,
            tract=tract_info.getId(),
            patch=tract_info.getSequentialPatchIndex(patch_info),
            universe=universe,
        )
        if band is None:
            packer = universe.makePacker("tract_patch", skymap_data_id)
        else:
            packer = universe.makePacker("tract_patch_band", skymap_data_id)
            full_data_id = DataCoordinate.standardize(full_data_id, band=band)
        return packer.pack(full_data_id, returnMaxBits=True)
Esempio n. 28
0
def ingestSimulated(repo,
                    locations,
                    regex,
                    output_run,
                    transfer="auto",
                    ingest_type="rawexp"):
    """Ingests raw frames into the butler registry

    Parameters
    ----------
    repo : `str`
        URI to the repository.
    locations : `list` [`str`]
        Files to ingest and directories to search for files that match
        ``regex`` to ingest.
    regex : `str`
        Regex string used to find files in directories listed in locations.
    output_run : `str`
        The path to the location, the run, where datasets should be put.
    transfer : `str` or None
        The external data transfer type, by default "auto".
    ingest_type : `str`
        ingest product data type.

    Raises
    ------
    Exception
        Raised if operations on configuration object fail.

    Notes
    -----
    This method inserts all datasets for an exposure within a transaction,
    guaranteeing that partial exposures are never ingested.  The exposure
    dimension record is inserted with `Registry.syncDimensionData` first
    (in its own transaction), which inserts only if a record with the same
    primary key does not already exist.  This allows different files within
    the same exposure to be incremented in different runs.
    """

    butler = Butler(repo, writeable=True)

    # make sure instrument and detector dimensions are populated
    with butler.registry.transaction():
        instrument_record = {
            "name": "simulator",
            "exposure_max": 600000,
            "detector_max": 6,
            "class_name": "spherex.instrument.SimulatorInstrument"
        }
        butler.registry.syncDimensionData("instrument", instrument_record)
        for idx in range(1, 7):
            detector_record = {
                "instrument": "simulator",
                "id": idx,
                "full_name": f"array{idx}"
            }
            butler.registry.syncDimensionData("detector", detector_record)

    dimension_universe = butler.registry.dimensions
    datasetType = DatasetType(ingest_type,
                              dimension_universe.extract(
                                  ("instrument", "detector", "exposure")),
                              "SPHERExImage",
                              universe=dimension_universe)
    # idempotent dataset type registration
    butler.registry.registerDatasetType(datasetType)

    # idempotent collection registration
    run = f"{ingest_type}r" if (output_run is None) else output_run
    butler.registry.registerCollection(run, type=CollectionType.RUN)

    n_failed = 0
    files = findFileResources(locations, regex)

    # example: sim_exposure_000000_array_1.fits or
    #   sim_exposure_000000_array_2_dark_current.fits
    pattern = re.compile(r"sim_exposure_(\d+)_array_(\d)[_,.]")

    # do we want to group observations?
    grp = datetime.date.today().strftime("%Y%m%d")

    datasets = []
    for file in files:
        # parse exposure and detector ids from file name
        m = pattern.search(file)
        if m is None:
            n_failed += 1
            logging.error(f"{file} does not match simulator file pattern")
            continue
        else:
            g = m.groups()
            if len(g) != 2:
                n_failed += 1
                logging.error(
                    f"Unable to get exposure and detector from file name: {file}"
                )
                continue
            else:
                [exposure_id, detector_id] = list(map(int, g))

        try:
            exposure_record = {
                "instrument": "simulator",
                "id": exposure_id,
                "name": f"{exposure_id:06d}",
                "group_name": f"{grp}",
                "timespan": Timespan(begin=None, end=None)
            }
            # idempotent insertion of individual dimension rows
            butler.registry.syncDimensionData("exposure", exposure_record)
        except Exception as e:
            n_failed += 1
            logging.error(
                f"Unable to insert exposure record for file {file}: {e}")
            continue

        dataId = DataCoordinate.standardize(
            instrument="simulator",
            detector=detector_id,
            exposure=exposure_id,
            universe=butler.registry.dimensions)
        ref = DatasetRef(datasetType, dataId=dataId)
        datasets.append(
            FileDataset(refs=ref, path=file, formatter=AstropyImageFormatter))

    with butler.transaction():
        butler.ingest(*datasets, transfer=transfer, run=run)
Esempio n. 29
0
def makeQuantum(
    task: PipelineTask,
    butler: Butler,
    dataId: DataId,
    ioDataIds: Mapping[str, Union[DataId, Sequence[DataId]]],
) -> Quantum:
    """Create a Quantum for a particular data ID(s).

    Parameters
    ----------
    task : `lsst.pipe.base.PipelineTask`
        The task whose processing the quantum represents.
    butler : `lsst.daf.butler.Butler`
        The collection the quantum refers to.
    dataId: any data ID type
        The data ID of the quantum. Must have the same dimensions as
        ``task``'s connections class.
    ioDataIds : `collections.abc.Mapping` [`str`]
        A mapping keyed by input/output names. Values must be data IDs for
        single connections and sequences of data IDs for multiple connections.

    Returns
    -------
    quantum : `lsst.daf.butler.Quantum`
        A quantum for ``task``, when called with ``dataIds``.
    """
    connections = task.config.ConnectionsClass(config=task.config)

    try:
        _checkDimensionsMatch(butler.registry.dimensions,
                              connections.dimensions, dataId.keys())
    except ValueError as e:
        raise ValueError("Error in quantum dimensions.") from e

    inputs = defaultdict(list)
    outputs = defaultdict(list)
    for name in itertools.chain(connections.inputs,
                                connections.prerequisiteInputs):
        try:
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                inputs[ref.datasetType].append(ref)
        except (ValueError, KeyError) as e:
            raise ValueError(f"Error in connection {name}.") from e
    for name in connections.outputs:
        try:
            connection = connections.__getattribute__(name)
            _checkDataIdMultiplicity(name, ioDataIds[name],
                                     connection.multiple)
            ids = _normalizeDataIds(ioDataIds[name])
            for id in ids:
                ref = _refFromConnection(butler, connection, id)
                outputs[ref.datasetType].append(ref)
        except (ValueError, KeyError) as e:
            raise ValueError(f"Error in connection {name}.") from e
    quantum = Quantum(
        taskClass=type(task),
        dataId=DataCoordinate.standardize(dataId,
                                          universe=butler.registry.dimensions),
        inputs=inputs,
        outputs=outputs,
    )
    return quantum
Esempio n. 30
0
 def testStandardize(self):
     """Test constructing a DataCoordinate from many different kinds of
     input via `DataCoordinate.standardize` and `DataCoordinate.subset`.
     """
     for n in range(5):
         dimensions = self.randomDimensionSubset()
         dataIds = self.randomDataIds(n=1).subset(dimensions)
         split = self.splitByStateFlags(dataIds)
         for m, dataId in enumerate(split.chain()):
             # Passing in any kind of DataCoordinate alone just returns
             # that object.
             self.assertIs(dataId, DataCoordinate.standardize(dataId))
             # Same if we also explicitly pass the dimensions we want.
             self.assertIs(dataId, DataCoordinate.standardize(dataId, graph=dataId.graph))
             # Same if we pass the dimensions and some irrelevant
             # kwargs.
             self.assertIs(dataId, DataCoordinate.standardize(dataId, graph=dataId.graph, htm7=12))
             # Test constructing a new data ID from this one with a
             # subset of the dimensions.
             # This is not possible for some combinations of
             # dimensions if hasFull is False (see
             # `DataCoordinate.subset` docs).
             newDimensions = self.randomDimensionSubset(n=1, graph=dataId.graph)
             if dataId.hasFull() or dataId.graph.required.issuperset(newDimensions.required):
                 newDataIds = [
                     dataId.subset(newDimensions),
                     DataCoordinate.standardize(dataId, graph=newDimensions),
                     DataCoordinate.standardize(dataId, graph=newDimensions, htm7=12),
                 ]
                 for newDataId in newDataIds:
                     with self.subTest(newDataId=newDataId, type=type(dataId)):
                         commonKeys = dataId.keys() & newDataId.keys()
                         self.assertTrue(commonKeys)
                         self.assertEqual(
                             [newDataId[k] for k in commonKeys],
                             [dataId[k] for k in commonKeys],
                         )
                         # This should never "downgrade" from
                         # Complete to Minimal or Expanded to Complete.
                         if dataId.hasRecords():
                             self.assertTrue(newDataId.hasRecords())
                         if dataId.hasFull():
                             self.assertTrue(newDataId.hasFull())
         # Start from a complete data ID, and pass its values in via several
         # different ways that should be equivalent.
         for dataId in split.complete:
             # Split the keys (dimension names) into two random subsets, so
             # we can pass some as kwargs below.
             keys1 = set(self.rng.sample(list(dataId.graph.dimensions.names),
                                         len(dataId.graph.dimensions)//2))
             keys2 = dataId.graph.dimensions.names - keys1
             newCompleteDataIds = [
                 DataCoordinate.standardize(dataId.full.byName(), universe=dataId.universe),
                 DataCoordinate.standardize(dataId.full.byName(), graph=dataId.graph),
                 DataCoordinate.standardize(DataCoordinate.makeEmpty(dataId.graph.universe),
                                            **dataId.full.byName()),
                 DataCoordinate.standardize(DataCoordinate.makeEmpty(dataId.graph.universe),
                                            graph=dataId.graph, **dataId.full.byName()),
                 DataCoordinate.standardize(**dataId.full.byName(), universe=dataId.universe),
                 DataCoordinate.standardize(graph=dataId.graph, **dataId.full.byName()),
                 DataCoordinate.standardize(
                     {k: dataId[k] for k in keys1},
                     universe=dataId.universe,
                     **{k: dataId[k] for k in keys2}
                 ),
                 DataCoordinate.standardize(
                     {k: dataId[k] for k in keys1},
                     graph=dataId.graph,
                     **{k: dataId[k] for k in keys2}
                 ),
             ]
             for newDataId in newCompleteDataIds:
                 with self.subTest(dataId=dataId, newDataId=newDataId, type=type(dataId)):
                     self.assertEqual(dataId, newDataId)
                     self.assertTrue(newDataId.hasFull())