def iterDatasets(self) -> Iterator[FileDataset]:
     # Docstring inherited from RepoConverter.
     # Iterate over reference catalog files.
     for refCat, dimension in self._refCats:
         datasetType = DatasetType(refCat,
                                   dimensions=[dimension],
                                   universe=self.task.universe,
                                   storageClass="SimpleCatalog")
         if self.subset is None:
             regex = re.compile(r"(\d+)\.fits")
             for fileName in os.listdir(
                     os.path.join(self.root, "ref_cats", refCat)):
                 m = regex.match(fileName)
                 if m is not None:
                     htmId = int(m.group(1))
                     dataId = self.task.registry.expandDataId(
                         {dimension: htmId})
                     yield FileDataset(path=os.path.join(
                         self.root, "ref_cats", refCat, fileName),
                                       refs=DatasetRef(datasetType, dataId))
         else:
             for begin, end in self.subset.skypix[dimension]:
                 for htmId in range(begin, end):
                     dataId = self.task.registry.expandDataId(
                         {dimension: htmId})
                     yield FileDataset(path=os.path.join(
                         self.root, "ref_cats", refCat, f"{htmId}.fits"),
                                       refs=DatasetRef(datasetType, dataId))
     yield from super().iterDatasets()
Esempio n. 2
0
 def rewrite(dataset: FileDataset) -> FileDataset:
     # Join the datastore root to the exported path.  This should yield
     # absolute paths that start with $CI_IMSIM_DIR.
     dataset.path = os.path.join(butler.datastore.root.ospath, dataset.path)
     # Remove symlinks in the path; this should result in absolute paths
     # that start with $TESTDATA_CI_IMSIM_DIR, because ci_imsim always
     # symlinks these datasets from there.
     dataset.path = os.path.realpath(dataset.path)
     return dataset
Esempio n. 3
0
    def testConstraints(self):
        """Test constraints model.  Assumes that each test class has the
        same constraints."""
        metrics = makeExampleMetrics()
        datastore = self.makeDatastore()

        sc1 = self.storageClassFactory.getStorageClass("StructuredData")
        sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
        dimensions = self.universe.extract(
            ("visit", "physical_filter", "instrument"))
        dataId = {
            "visit": 52,
            "physical_filter": "V",
            "instrument": "DummyCamComp"
        }

        # Write empty file suitable for ingest check (JSON and YAML variants)
        testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
        testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
        for datasetTypeName, sc, accepted in (("metric", sc1,
                                               True), ("metric2", sc1, False),
                                              ("metric33", sc1,
                                               True), ("metric2", sc2, True)):
            # Choose different temp file depending on StorageClass
            testfile = testfile_j if sc.name.endswith("Json") else testfile_y

            with self.subTest(datasetTypeName=datasetTypeName,
                              storageClass=sc.name,
                              file=testfile.name):
                ref = self.makeDatasetRef(datasetTypeName,
                                          dimensions,
                                          sc,
                                          dataId,
                                          conform=False)
                if accepted:
                    datastore.put(metrics, ref)
                    self.assertTrue(datastore.exists(ref))
                    datastore.remove(ref)

                    # Try ingest
                    if self.canIngest:
                        datastore.ingest(FileDataset(testfile.name, [ref]),
                                         transfer="link")
                        self.assertTrue(datastore.exists(ref))
                        datastore.remove(ref)
                else:
                    with self.assertRaises(DatasetTypeNotSupportedError):
                        datastore.put(metrics, ref)
                    self.assertFalse(datastore.exists(ref))

                    # Again with ingest
                    if self.canIngest:
                        with self.assertRaises(DatasetTypeNotSupportedError):
                            datastore.ingest(FileDataset(testfile.name, [ref]),
                                             transfer="link")
                        self.assertFalse(datastore.exists(ref))
Esempio n. 4
0
 def failOutsideRoot(obj, path, ref):
     """Can't ingest files outside of datastore root unless
     auto."""
     if mode == "auto":
         datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
         self.assertTrue(datastore.exists(ref))
     else:
         with self.assertRaises(RuntimeError):
             datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
         self.assertFalse(datastore.exists(ref))
Esempio n. 5
0
 def rewrite(dataset: FileDataset) -> FileDataset:
     # Join the datastore root to the exported path.  This should yield
     # absolute paths that start with $CI_HSC_GEN2_DIR.
     dataset.path = os.path.join(butler.datastore.root.ospath, dataset.path)
     # Remove symlinks in the path; this should result in absolute paths
     # that start with $TESTDATA_CI_HSC_DIR, because ci_hsc_gen2 always
     # symlinks these datasets from there.
     dataset.path = os.path.realpath(dataset.path)
     # Recompute the path relative to $TESTDATA_CI_HSC_DIR, so we can deal
     # with that moving around after the export file is created.
     dataset.path = os.path.relpath(dataset.path,
                                    getPackageDir("testdata_ci_hsc"))
     return dataset
Esempio n. 6
0
 def failInputDoesNotExist(obj, path, ref):
     """Can't ingest files if we're given a bad path."""
     with self.assertRaises(FileNotFoundError):
         datastore.ingest(FileDataset(
             path="this-file-does-not-exist.yaml", refs=ref),
                          transfer=mode)
     self.assertFalse(datastore.exists(ref))
Esempio n. 7
0
 def succeed(obj, path, ref):
     """Ingest a file by transferring it to the template
     location."""
     datastore.ingest(FileDataset(path=os.path.abspath(path),
                                  refs=ref),
                      transfer=mode)
     self.assertEqual(obj, datastore.get(ref))
Esempio n. 8
0
 def failOutsideRoot(obj, path, ref):
     """Can't ingest files outside of datastore root."""
     with self.assertRaises(RuntimeError):
         datastore.ingest(FileDataset(path=os.path.abspath(path),
                                      refs=ref),
                          transfer=None)
     self.assertFalse(datastore.exists(ref))
Esempio n. 9
0
 def succeed(obj, path, ref):
     """Ingest a file already in the datastore root."""
     # first move it into the root, and adjust the path accordingly
     path = shutil.copy(path, datastore.root)
     path = os.path.relpath(path, start=datastore.root)
     datastore.ingest(FileDataset(path=path, refs=ref), transfer=None)
     self.assertEqual(obj, datastore.get(ref))
Esempio n. 10
0
    def ingestExposureDatasets(self, exposure: RawExposureData, butler: Optional[Butler] = None
                               ) -> List[DatasetRef]:
        """Ingest all raw files in one exposure.

        Parameters
        ----------
        exposure : `RawExposureData`
            A structure containing information about the exposure to be
            ingested.  Must have `RawExposureData.records` populated and all
            data ID attributes expanded.
        butler : `lsst.daf.butler.Butler`, optional
            Butler to use for ingest.  If not provided, ``self.butler`` will
            be used.

        Returns
        -------
        refs : `list` of `lsst.daf.butler.DatasetRef`
            Dataset references for ingested raws.
        """
        if butler is None:
            butler = self.butler
        datasets = [FileDataset(path=os.path.abspath(file.filename),
                                refs=[DatasetRef(self.datasetType, d.dataId) for d in file.datasets],
                                formatter=file.FormatterClass)
                    for file in exposure.files]
        butler.ingest(*datasets, transfer=self.config.transfer)
        return [ref for dataset in datasets for ref in dataset.refs]
Esempio n. 11
0
    def handle(self, path: str, nextDataId2,
               datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
               predicate: Callable[[DataCoordinate], bool]):
        dataId3, calibDate = self.translate(nextDataId2, partial=True)

        def get_detectors(filename):
            fitsData = lsst.afw.fits.Fits(filename, 'r')
            # NOTE: The primary header (HDU=0) does not contain detector data.
            detectors = []
            for i in range(1, fitsData.countHdus()):
                fitsData.setHdu(i)
                metadata = fitsData.readMetadata()
                detectors.append(metadata['CCDNUM'])
            return detectors

        if predicate(dataId3):
            detectors = get_detectors(path)
            refs = []
            for detector in detectors:
                newDataId3 = DataCoordinate.standardize(dataId3,
                                                        graph=self._datasetType.dimensions,
                                                        detector=detector)
                refs.append(DatasetRef(self._datasetType, newDataId3))

            datasets[self._datasetType][calibDate].append(
                FileDataset(refs=refs, path=path, formatter=self._formatter)
            )
Esempio n. 12
0
    def ingestExposureDatasets(self,
                               exposure: RawExposureData,
                               *,
                               run: Optional[str] = None) -> List[DatasetRef]:
        """Ingest all raw files in one exposure.

        Parameters
        ----------
        exposure : `RawExposureData`
            A structure containing information about the exposure to be
            ingested.  Must have `RawExposureData.records` populated and all
            data ID attributes expanded.
        run : `str`, optional
            Name of a RUN-type collection to write to, overriding
            ``self.butler.run``.

        Returns
        -------
        refs : `list` of `lsst.daf.butler.DatasetRef`
            Dataset references for ingested raws.
        """
        datasets = [
            FileDataset(path=os.path.abspath(file.filename),
                        refs=[
                            DatasetRef(self.datasetType, d.dataId)
                            for d in file.datasets
                        ],
                        formatter=file.FormatterClass)
            for file in exposure.files
        ]
        self.butler.ingest(*datasets, transfer=self.config.transfer, run=run)
        return [ref for dataset in datasets for ref in dataset.refs]
 def iterDatasets(self) -> Iterator[FileDataset]:
     # Docstring inherited from RepoConverter.
     for struct in self._foundSkyMapsByCoaddName.values():
         if self.task.isDatasetTypeIncluded(struct.ref.datasetType.name):
             yield FileDataset(path=os.path.join(self.root,
                                                 struct.filename),
                               refs=struct.ref)
     yield from super().iterDatasets()
Esempio n. 14
0
 def failInputDoesNotExist(obj, path, ref):
     """Can't ingest files if we're given a bad path."""
     with self.assertRaises(FileNotFoundError):
         # Ensure the file does not look like it is in
         # datastore for auto mode
         datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref),
                          transfer=mode)
     self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
Esempio n. 15
0
 def failOutputExists(obj, path, ref):
     """Can't ingest files if transfer destination already
     exists."""
     with self.assertRaises(FileExistsError):
         datastore.ingest(FileDataset(
             path=os.path.abspath(path), refs=ref),
                          transfer=mode)
     self.assertFalse(datastore.exists(ref))
Esempio n. 16
0
 def handle(self, path: str, nextDataId2,
            datasets: Mapping[DatasetType, List[FileDataset]], *, log: Log,
            predicate: Callable[[DataCoordinate], bool]):
     # Docstring inherited from ParsedPathElementHandler.
     dataId3 = self.translate(nextDataId2, partial=False, log=log)
     if predicate(dataId3):
         datasets[self._datasetType].append(
             FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
                         path=path))
Esempio n. 17
0
    def ingestStrayLightData(self, butler, directory, *, transfer=None):
        """Ingest externally-produced y-band stray light data files into
        a data repository.

        Parameters
        ----------
        butler : `lsst.daf.butler.Butler`
            Butler initialized with the collection to ingest into.
        directory : `str`
            Directory containing yBackground-*.fits files.
        transfer : `str`, optional
            If not `None`, must be one of 'move', 'copy', 'hardlink', or
            'symlink', indicating how to transfer the files.
        """
        calibrationLabel = "y-LED-encoder-on"
        # LEDs covered up around 2018-01-01, no need for correctin after that
        # date.
        datetime_end = datetime.datetime(2018, 1, 1)
        datasets = []
        # TODO: should we use a more generic name for the dataset type?
        # This is just the (rather HSC-specific) name used in Gen2, and while
        # the instances of this dataset are camera-specific, the datasetType
        # (which is used in the generic IsrTask) should not be.
        datasetType = DatasetType("yBackground",
                                  dimensions=("physical_filter", "detector",
                                              "calibration_label"),
                                  storageClass="StrayLightData",
                                  universe=butler.registry.dimensions)
        for detector in self.getCamera():
            path = os.path.join(directory,
                                f"ybackground-{detector.getId():03d}.fits")
            if not os.path.exists(path):
                log.warn(
                    f"No stray light data found for detector {detector.getId()} @ {path}."
                )
                continue
            ref = DatasetRef(datasetType,
                             dataId={
                                 "instrument": self.getName(),
                                 "detector": detector.getId(),
                                 "physical_filter": "HSC-Y",
                                 "calibration_label": calibrationLabel
                             })
            datasets.append(
                FileDataset(ref=ref,
                            path=path,
                            formatter=SubaruStrayLightDataFormatter))
        with butler.transaction():
            butler.registry.registerDatasetType(datasetType)
            butler.registry.insertDimensionData(
                "calibration_label", {
                    "instrument": self.getName(),
                    "name": calibrationLabel,
                    "datetime_begin": datetime.date.min,
                    "datetime_end": datetime_end
                })
            butler.ingest(*datasets, transfer=transfer)
Esempio n. 18
0
 def handle(self, path: str, nextDataId2,
            datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
            predicate: Callable[[DataCoordinate], bool]):
     # Docstring inherited from ParsedPathElementHandler.
     dataId3, calibDate = self.translate(nextDataId2, partial=False)
     if predicate(dataId3):
         datasets[self._datasetType][calibDate].append(
             FileDataset(
                 refs=[DatasetRef(self._datasetType, dataId3)],
                 path=path, formatter=self._formatter
             )
         )
Esempio n. 19
0
    def _mock_export(refs: Iterable[DatasetRef], *,
                     directory: Optional[str] = None,
                     transfer: Optional[str] = None) -> Iterable[FileDataset]:
        """A mock of `Datastore.export` that satisfies the requirement that
        the refs passed in are included in the `FileDataset` objects
        returned.

        This can be used to construct a `Datastore` mock that can be used
        in repository export via::

            datastore = unittest.mock.Mock(spec=Datastore)
            datastore.export = DatastoreMock._mock_export

        """
        for ref in refs:
            yield FileDataset(refs=[ref],
                              path="mock/path",
                              formatter="lsst.daf.butler.formatters.json.JsonFormatter")
Esempio n. 20
0
 def export(self,
            refs: Iterable[DatasetRef],
            *,
            directory: Optional[str] = None,
            transfer: Optional[str] = None) -> Iterable[FileDataset]:
     # Docstring inherited from Datastore.export.
     for ref in refs:
         location, storedFileInfo = self._get_dataset_location_info(ref)
         if location is None:
             raise FileNotFoundError(f"Could not retrieve Dataset {ref}.")
         if transfer is None:
             # TODO: do we also need to return the readStorageClass somehow?
             yield FileDataset(refs=[ref],
                               path=location.pathInStore,
                               formatter=storedFileInfo.formatter)
         else:
             # TODO: add support for other transfer modes.  If we support
             # moving, this method should become transactional.
             raise NotImplementedError(
                 f"Transfer mode '{transfer}' not yet supported.")
Esempio n. 21
0
    def testIngestSymlinkOfSymlink(self):
        """Special test for symlink to a symlink ingest"""
        metrics, ref = self._prepareIngestTest()
        # The aim of this test is to create a dataset on disk, then
        # create a symlink to it and finally ingest the symlink such that
        # the symlink in the datastore points to the original dataset.
        for mode in ("symlink", "relsymlink"):
            if mode not in self.ingestTransferModes:
                continue

            print(f"Trying mode {mode}")
            with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
                with open(realpath, 'w') as fd:
                    yaml.dump(metrics._asdict(), stream=fd)
                with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
                    os.symlink(os.path.abspath(realpath), sympath)

                    datastore = self.makeDatastore()
                    datastore.ingest(FileDataset(path=os.path.abspath(sympath),
                                                 refs=ref),
                                     transfer=mode)

                    uri = datastore.getURI(ref)
                    self.assertTrue(not uri.scheme or uri.scheme == "file",
                                    f"Check {uri.scheme}")
                    self.assertTrue(os.path.islink(uri.ospath),
                                    f"Check {uri} is a symlink")

                    linkTarget = os.readlink(uri.ospath)
                    if mode == "relsymlink":
                        self.assertFalse(os.path.isabs(linkTarget))
                    else:
                        self.assertEqual(linkTarget, os.path.abspath(realpath))

                    # Check that we can get the dataset back regardless of mode
                    metric2 = datastore.get(ref)
                    self.assertEqual(metric2, metrics)

                    # Cleanup the file for next time round loop
                    # since it will get the same file name in store
                    datastore.remove(ref)
Esempio n. 22
0
 def failNotImplemented(obj, path, ref):
     with self.assertRaises(NotImplementedError):
         datastore.ingest(FileDataset(path=path, refs=ref),
                          transfer=None)
Esempio n. 23
0
    def ingestStrayLightData(self,
                             butler,
                             directory,
                             *,
                             transfer=None,
                             collection=None,
                             labels=()):
        """Ingest externally-produced y-band stray light data files into
        a data repository.

        Parameters
        ----------
        butler : `lsst.daf.butler.Butler`
            Butler to write with.  Any collections associated with it are
            ignored in favor of ``collection`` and/or ``labels``.
        directory : `str`
            Directory containing yBackground-*.fits files.
        transfer : `str`, optional
            If not `None`, must be one of 'move', 'copy', 'hardlink', or
            'symlink', indicating how to transfer the files.
        collection : `str`, optional
            Name to use for the calibration collection that associates all
            datasets with a validity range.  If this collection already exists,
            it must be a `~CollectionType.CALIBRATION` collection, and it must
            not have any datasets that would conflict with those inserted by
            this method.  If `None`, a collection name is worked out
            automatically from the instrument name and other metadata by
            calling ``makeCuratedCalibrationCollectionName``, but this
            default name may not work well for long-lived repositories unless
            ``labels`` is also provided (and changed every time curated
            calibrations are ingested).
        labels : `Sequence` [ `str` ], optional
            Extra strings to include in collection names, after concatenating
            them with the standard collection name delimeter.  If provided,
            these are inserted into to the names of the `~CollectionType.RUN`
            collections that datasets are inserted directly into, as well the
            `~CollectionType.CALIBRATION` collection if it is generated
            automatically (i.e. if ``collection is None``).  Usually this is
            just the name of the ticket on which the calibration collection is
            being created.
        """
        # Register the CALIBRATION collection that adds validity ranges.
        # This does nothing if it is already registered.
        if collection is None:
            collection = self.makeCalibrationCollectionName(*labels)
        butler.registry.registerCollection(collection,
                                           type=CollectionType.CALIBRATION)

        # Register the RUN collection that holds these datasets directly.  We
        # only need one because there is only one validity range and hence no
        # data ID conflicts even when there are no validity ranges.
        run = self.makeUnboundedCalibrationRunName(*labels)
        butler.registry.registerRun(run)

        # LEDs covered up around 2018-01-01, no need for correctin after that
        # date.
        timespan = Timespan(begin=None,
                            end=astropy.time.Time("2018-01-01",
                                                  format="iso",
                                                  scale="tai"))
        datasets = []
        # TODO: should we use a more generic name for the dataset type?
        # This is just the (rather HSC-specific) name used in Gen2, and while
        # the instances of this dataset are camera-specific, the datasetType
        # (which is used in the generic IsrTask) should not be.
        datasetType = DatasetType("yBackground",
                                  dimensions=(
                                      "physical_filter",
                                      "detector",
                                  ),
                                  storageClass="StrayLightData",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        for detector in self.getCamera():
            path = os.path.join(directory,
                                f"ybackground-{detector.getId():03d}.fits")
            if not os.path.exists(path):
                log.warning(
                    f"No stray light data found for detector {detector.getId()} @ {path}."
                )
                continue
            ref = DatasetRef(datasetType,
                             dataId={
                                 "instrument": self.getName(),
                                 "detector": detector.getId(),
                                 "physical_filter": "HSC-Y"
                             })
            datasets.append(
                FileDataset(refs=ref,
                            path=path,
                            formatter=SubaruStrayLightDataFormatter))
        butler.registry.registerDatasetType(datasetType)
        with butler.transaction():
            butler.ingest(*datasets, transfer=transfer, run=run)
            refs = []
            for dataset in datasets:
                refs.extend(dataset.refs)
            butler.registry.certify(collection, refs, timespan)
Esempio n. 24
0
    def run(self,
            locations,
            run=None,
            file_filter=r".*Photodiode_Readings.*txt",
            track_file_attrs=None):
        """Ingest photodiode data into a Butler data repository.

        Parameters
        ----------
        files : iterable over `lsst.resources.ResourcePath`
            URIs to the files to be ingested.
        run : `str`, optional
            Name of the RUN-type collection to write to,
            overriding the default derived from the instrument
            name.
        skip_existing_exposures : `bool`, optional
            If `True`, skip photodiodes that have already been
            ingested (i.e. raws for which we already have a
            dataset with the same data ID in the target
            collection).
        track_file_attrs : `bool`, optional
            Control whether file attributes such as the size or
            checksum should be tracked by the datastore.  Whether
            this parameter is honored depends on the specific
            datastore implementation.

        Returns
        -------
        refs : `list` [`lsst.daf.butler.DatasetRef`]
            Dataset references for ingested raws.

        Raises
        ------
        RuntimeError
            Raised if the number of exposures found for a photodiode
            file is not one
        """
        files = ResourcePath.findFileResources(locations, file_filter)

        registry = self.butler.registry
        registry.registerDatasetType(self.datasetType)

        # Find and register run that we will ingest to.
        if run is None:
            run = self.instrument.makeCollectionName("calib", "photodiode")
        registry.registerCollection(run, type=CollectionType.RUN)

        # Use datasetIds that match the raw exposure data.
        if self.butler.registry.supportsIdGenerationMode(
                DatasetIdGenEnum.DATAID_TYPE_RUN):
            mode = DatasetIdGenEnum.DATAID_TYPE_RUN
        else:
            mode = DatasetIdGenEnum.UNIQUE

        refs = []
        numExisting = 0
        numFailed = 0
        for inputFile in files:
            # Convert the file into the right class.
            with inputFile.as_local() as localFile:
                calib = PhotodiodeCalib.readTwoColumnPhotodiodeData(
                    localFile.ospath)

            dayObs = calib.getMetadata()['day_obs']
            seqNum = calib.getMetadata()['seq_num']

            # Find the associated exposure information.
            whereClause = "exposure.day_obs=dayObs and exposure.seq_num=seqNum"
            instrumentName = self.instrument.getName()
            exposureRecords = [
                rec for rec in registry.queryDimensionRecords(
                    "exposure",
                    instrument=instrumentName,
                    where=whereClause,
                    bind={
                        "dayObs": dayObs,
                        "seqNum": seqNum
                    })
            ]

            nRecords = len(exposureRecords)
            if nRecords == 1:
                exposureId = exposureRecords[0].id
                calib.updateMetadata(camera=self.camera, exposure=exposureId)
            elif nRecords == 0:
                numFailed += 1
                self.log.warning(
                    "Skipping instrument %s and dayObs/seqNum %d %d: no exposures found.",
                    instrumentName, dayObs, seqNum)
                continue
            else:
                numFailed += 1
                self.log.warning(
                    "Multiple exposure entries found for instrument %s and "
                    "dayObs/seqNum %d %d.", instrumentName, dayObs, seqNum)
                continue

            # Generate the dataId for this file.
            dataId = DataCoordinate.standardize(
                instrument=self.instrument.getName(),
                exposure=exposureId,
                universe=self.universe,
            )

            # If this already exists, we should skip it and continue.
            existing = {
                ref.dataId
                for ref in self.butler.registry.queryDatasets(
                    self.datasetType, collections=[run], dataId=dataId)
            }
            if existing:
                self.log.debug(
                    "Skipping instrument %s and dayObs/seqNum %d %d: already exists in run %s.",
                    instrumentName, dayObs, seqNum, run)
                numExisting += 1
                continue

            # Ingest must work from a file, but we can't use the
            # original, as we've added new metadata and reformatted
            # it.  Write it to a temp file that we can use to ingest.
            # If we can have the files written appropriately, this
            # will be a direct ingest of those files.
            with ResourcePath.temporary_uri(suffix=".fits") as tempFile:
                calib.writeFits(tempFile.ospath)

                ref = DatasetRef(self.datasetType, dataId)
                dataset = FileDataset(path=tempFile,
                                      refs=ref,
                                      formatter=FitsGenericFormatter)

                # No try, as if this fails, we should stop.
                self.butler.ingest(dataset,
                                   transfer=self.config.transfer,
                                   run=run,
                                   idGenerationMode=mode,
                                   record_validation_info=track_file_attrs)
                self.log.info("Photodiode %s:%d (%d/%d) ingested successfully",
                              instrumentName, exposureId, dayObs, seqNum)
                refs.append(dataset)

        if numExisting != 0:
            self.log.warning(
                "Skipped %d entries that already existed in run %s",
                numExisting, run)
        if numFailed != 0:
            raise RuntimeError(
                f"Failed to ingest {numFailed} entries due to missing exposure information."
            )
        return refs
Esempio n. 25
0
    except lsst.daf.butler.registry.MissingCollectionError:
        pass  # Already removed; nothing to do


logging.info("Preparing destination repository %s...", DEST_DIR)
_remove_refcat_run(dest_repo, DEST_RUN)
dest_repo.registry.registerCollection(DEST_RUN, CollectionType.RUN)
for src_cat, dest_cat in REFCATS.items():
    src_type = src_repo.registry.getDatasetType(src_cat)
    dest_type = _rename_dataset_type(src_type, dest_cat)
    dest_repo.registry.registerDatasetType(dest_type)
dest_repo.registry.refresh()

logging.info("Searching for refcats in %s:%s...", args.src_dir,
             args.src_collection)
query = f"htm{HTM_LEVEL} in ({','.join(id_ranges)})"
datasets = []
for src_ref in src_repo.registry.queryDatasets(REFCATS.keys(),
                                               where=query,
                                               findFirst=True):
    src_type = src_ref.datasetType
    dest_type = _rename_dataset_type(src_type, REFCATS[src_type.name])
    dest_ref = DatasetRef(dest_type, src_ref.dataId)
    datasets.append(FileDataset(path=src_repo.getURI(src_ref), refs=dest_ref))

logging.info("Copying refcats...")
dest_repo.ingest(*datasets, transfer="copy")

logging.info("%d refcat shards copied to %s:%s", len(datasets), DEST_DIR,
             DEST_RUN)
Esempio n. 26
0
    def testIngest(self):
        butler = Butler(self.tmpConfigFile, run="ingest")

        # Create and register a DatasetType
        dimensions = butler.registry.dimensions.extract(
            ["instrument", "visit", "detector"])

        storageClass = self.storageClassFactory.getStorageClass(
            "StructuredDataDictYaml")
        datasetTypeName = "metric"

        datasetType = self.addDatasetType(datasetTypeName, dimensions,
                                          storageClass, butler.registry)

        # Add needed Dimensions
        butler.registry.insertDimensionData("instrument",
                                            {"name": "DummyCamComp"})
        butler.registry.insertDimensionData("physical_filter", {
            "instrument": "DummyCamComp",
            "name": "d-r",
            "abstract_filter": "R"
        })
        for detector in (1, 2):
            butler.registry.insertDimensionData(
                "detector", {
                    "instrument": "DummyCamComp",
                    "id": detector,
                    "full_name": f"detector{detector}"
                })

        butler.registry.insertDimensionData(
            "visit", {
                "instrument": "DummyCamComp",
                "id": 423,
                "name": "fourtwentythree",
                "physical_filter": "d-r"
            }, {
                "instrument": "DummyCamComp",
                "id": 424,
                "name": "fourtwentyfour",
                "physical_filter": "d-r"
            })

        formatter = doImport(
            "lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
        dataRoot = os.path.join(TESTDIR, "data", "basic")
        datasets = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 423,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refIn = DatasetRef(datasetType, dataId, id=None)

            datasets.append(
                FileDataset(path=metricFile, refs=[refIn],
                            formatter=formatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}

        metrics1 = butler.get(datasetTypeName, dataId1)
        metrics2 = butler.get(datasetTypeName, dataId2)
        self.assertNotEqual(metrics1, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertNotEqual(uri1, uri2)

        # Now do a multi-dataset but single file ingest
        metricFile = os.path.join(dataRoot, "detectors.yaml")
        refs = []
        for detector in (1, 2):
            detector_name = f"detector_{detector}"
            dataId = {
                "instrument": "DummyCamComp",
                "visit": 424,
                "detector": detector
            }
            # Create a DatasetRef for ingest
            refs.append(DatasetRef(datasetType, dataId, id=None))

        datasets = []
        datasets.append(
            FileDataset(path=metricFile,
                        refs=refs,
                        formatter=MultiDetectorFormatter))

        butler.ingest(*datasets, transfer="copy")

        dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
        dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}

        multi1 = butler.get(datasetTypeName, dataId1)
        multi2 = butler.get(datasetTypeName, dataId2)

        self.assertEqual(multi1, metrics1)
        self.assertEqual(multi2, metrics2)

        # Compare URIs
        uri1 = butler.getUri(datasetTypeName, dataId1)
        uri2 = butler.getUri(datasetTypeName, dataId2)
        self.assertEqual(uri1, uri2)

        # Test that removing one does not break the second
        butler.remove(datasetTypeName, dataId1)
        with self.assertRaises(LookupError):
            butler.datasetExists(datasetTypeName, dataId1)
        self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
        multi2b = butler.get(datasetTypeName, dataId2)
        self.assertEqual(multi2, multi2b)
Esempio n. 27
0
def ingestSimulated(repo,
                    locations,
                    regex,
                    output_run,
                    transfer="auto",
                    ingest_type="rawexp"):
    """Ingests raw frames into the butler registry

    Parameters
    ----------
    repo : `str`
        URI to the repository.
    locations : `list` [`str`]
        Files to ingest and directories to search for files that match
        ``regex`` to ingest.
    regex : `str`
        Regex string used to find files in directories listed in locations.
    output_run : `str`
        The path to the location, the run, where datasets should be put.
    transfer : `str` or None
        The external data transfer type, by default "auto".
    ingest_type : `str`
        ingest product data type.

    Raises
    ------
    Exception
        Raised if operations on configuration object fail.

    Notes
    -----
    This method inserts all datasets for an exposure within a transaction,
    guaranteeing that partial exposures are never ingested.  The exposure
    dimension record is inserted with `Registry.syncDimensionData` first
    (in its own transaction), which inserts only if a record with the same
    primary key does not already exist.  This allows different files within
    the same exposure to be incremented in different runs.
    """

    butler = Butler(repo, writeable=True)

    # make sure instrument and detector dimensions are populated
    with butler.registry.transaction():
        instrument_record = {
            "name": "simulator",
            "exposure_max": 600000,
            "detector_max": 6,
            "class_name": "spherex.instrument.SimulatorInstrument"
        }
        butler.registry.syncDimensionData("instrument", instrument_record)
        for idx in range(1, 7):
            detector_record = {
                "instrument": "simulator",
                "id": idx,
                "full_name": f"array{idx}"
            }
            butler.registry.syncDimensionData("detector", detector_record)

    dimension_universe = butler.registry.dimensions
    datasetType = DatasetType(ingest_type,
                              dimension_universe.extract(
                                  ("instrument", "detector", "exposure")),
                              "SPHERExImage",
                              universe=dimension_universe)
    # idempotent dataset type registration
    butler.registry.registerDatasetType(datasetType)

    # idempotent collection registration
    run = f"{ingest_type}r" if (output_run is None) else output_run
    butler.registry.registerCollection(run, type=CollectionType.RUN)

    n_failed = 0
    files = findFileResources(locations, regex)

    # example: sim_exposure_000000_array_1.fits or
    #   sim_exposure_000000_array_2_dark_current.fits
    pattern = re.compile(r"sim_exposure_(\d+)_array_(\d)[_,.]")

    # do we want to group observations?
    grp = datetime.date.today().strftime("%Y%m%d")

    datasets = []
    for file in files:
        # parse exposure and detector ids from file name
        m = pattern.search(file)
        if m is None:
            n_failed += 1
            logging.error(f"{file} does not match simulator file pattern")
            continue
        else:
            g = m.groups()
            if len(g) != 2:
                n_failed += 1
                logging.error(
                    f"Unable to get exposure and detector from file name: {file}"
                )
                continue
            else:
                [exposure_id, detector_id] = list(map(int, g))

        try:
            exposure_record = {
                "instrument": "simulator",
                "id": exposure_id,
                "name": f"{exposure_id:06d}",
                "group_name": f"{grp}",
                "timespan": Timespan(begin=None, end=None)
            }
            # idempotent insertion of individual dimension rows
            butler.registry.syncDimensionData("exposure", exposure_record)
        except Exception as e:
            n_failed += 1
            logging.error(
                f"Unable to insert exposure record for file {file}: {e}")
            continue

        dataId = DataCoordinate.standardize(
            instrument="simulator",
            detector=detector_id,
            exposure=exposure_id,
            universe=butler.registry.dimensions)
        ref = DatasetRef(datasetType, dataId=dataId)
        datasets.append(
            FileDataset(refs=ref, path=file, formatter=AstropyImageFormatter))

    with butler.transaction():
        butler.ingest(*datasets, transfer=transfer, run=run)
Esempio n. 28
0
    def testConstraints(self):
        """Test chained datastore constraints model."""
        metrics = makeExampleMetrics()
        datastore = self.makeDatastore()

        sc1 = self.storageClassFactory.getStorageClass("StructuredData")
        sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
        dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
        dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}
        dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"}

        # Write empty file suitable for ingest check (JSON and YAML variants)
        testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
        testfile_j = tempfile.NamedTemporaryFile(suffix=".json")

        for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True),
                                                     ("metric2", dataId1, sc1, (False, False, False), False),
                                                     ("metric2", dataId2, sc1, (True, False, False), False),
                                                     ("metric33", dataId2, sc2, (True, True, False), True),
                                                     ("metric2", dataId1, sc2, (False, True, False), True)):

            # Choose different temp file depending on StorageClass
            testfile = testfile_j if sc.name.endswith("Json") else testfile_y

            with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
                ref = self.makeDatasetRef(typeName, dimensions, sc, dataId,
                                          conform=False)
                if any(accept):
                    datastore.put(metrics, ref)
                    self.assertTrue(datastore.exists(ref))

                    # Check each datastore inside the chained datastore
                    for childDatastore, expected in zip(datastore.datastores, accept):
                        self.assertEqual(childDatastore.exists(ref), expected,
                                         f"Testing presence of {ref} in datastore {childDatastore.name}")

                    datastore.remove(ref)

                    # Check that ingest works
                    if ingest:
                        datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
                        self.assertTrue(datastore.exists(ref))

                        # Check each datastore inside the chained datastore
                        for childDatastore, expected in zip(datastore.datastores, accept):
                            # Ephemeral datastores means InMemory at the moment
                            # and that does not accept ingest of files.
                            if childDatastore.isEphemeral:
                                expected = False
                            self.assertEqual(childDatastore.exists(ref), expected,
                                             f"Testing presence of ingested {ref} in datastore"
                                             f" {childDatastore.name}")

                        datastore.remove(ref)
                    else:
                        with self.assertRaises(DatasetTypeNotSupportedError):
                            datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")

                else:
                    with self.assertRaises(DatasetTypeNotSupportedError):
                        datastore.put(metrics, ref)
                    self.assertFalse(datastore.exists(ref))

                    # Again with ingest
                    with self.assertRaises(DatasetTypeNotSupportedError):
                        datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
                    self.assertFalse(datastore.exists(ref))
Esempio n. 29
0
 def failNotImplemented(obj, path, ref):
     with self.assertRaises(NotImplementedError):
         datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
    def test_ingest(self):

        fitsPath = os.path.join(TESTDIR, "data", "small.fits")

        formatter = FORMATTERS[0]
        datasetTypeName, formatterCls = (formatter["dataset_type"],
                                         formatter["formatter_cls"])

        datasetType = self.butler.registry.getDatasetType(datasetTypeName)
        datasets = []
        for exposure in range(3, 5):
            for detector in range(6):
                # use the same fits to test ingest
                if not os.path.exists(fitsPath):
                    log.warning(
                        f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}."
                    )
                    continue
                ref = DatasetRef(datasetType,
                                 dataId={
                                     "instrument": INSTRUMENT_NAME,
                                     "detector": detector,
                                     "exposure": exposure * 11
                                 })
                datasets.append(
                    FileDataset(refs=ref,
                                path=fitsPath,
                                formatter=formatterCls))

        # register new collection
        # run = "rawIngestedRun"
        # self.butler.registry.registerCollection(run, type=CollectionType.RUN)

        # collection is registered as a part of setUp
        run = self.collection

        with self.butler.transaction():
            for exposure in range(3, 5):
                expid = exposure * 11
                self.butler.registry.insertDimensionData(
                    "exposure", {
                        "instrument": INSTRUMENT_NAME,
                        "id": expid,
                        "name": f"{expid}",
                        "group_name": "day1",
                        "timespan": Timespan(begin=None, end=None)
                    })
            # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink'
            # or 'symlink'
            self.butler.ingest(*datasets, transfer="symlink", run=run)

        # verify that 12 files were ingested (2 exposures for each detector)
        refsSet = set(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run]))
        self.assertEqual(
            len(refsSet), 12,
            f"Collection {run} should have 12 elements after ingest")

        # verify that data id is present
        dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME}
        refsList = list(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run],
                                               dataId=dataid))
        self.assertEqual(
            len(refsList), 1,
            f"Collection {run} should have 1 element with {dataid}")