Beispiel #1
0
 def testTimescales(self):
     """Test time scale conversion occurs on comparison."""
     ts1 = Timespan(begin=astropy.time.Time('2013-06-17 13:34:45.775000', scale='tai', format='iso'),
                    end=astropy.time.Time('2013-06-17 13:35:17.947000', scale='tai', format='iso'))
     ts2 = Timespan(begin=astropy.time.Time('2013-06-17T13:34:10.775', scale='utc', format='isot'),
                    end=astropy.time.Time('2013-06-17T13:34:42.947', scale='utc', format='isot'))
     self.assertEqual(ts1, ts2, f"Compare {ts1} with {ts2}")
Beispiel #2
0
    def testFuture(self):
        """Check that we do not get warnings from future dates."""

        # Astropy will give "dubious year" for UTC five years in the future
        # so hide these expected warnings from the test output
        with warnings.catch_warnings():
            warnings.simplefilter(
                "ignore", category=astropy.utils.exceptions.AstropyWarning)
            if erfa is not None:
                warnings.simplefilter("ignore", category=erfa.ErfaWarning)
            ts1 = Timespan(begin=astropy.time.Time(self.timestamps[0],
                                                   scale='utc',
                                                   format='iso'),
                           end=astropy.time.Time('2099-06-17 13:35:17.947000',
                                                 scale='utc',
                                                 format='iso'))
            ts2 = Timespan(begin=astropy.time.Time(self.timestamps[0],
                                                   scale='utc',
                                                   format='iso'),
                           end=astropy.time.Time('2099-06-17 13:35:17.947000',
                                                 scale='utc',
                                                 format='iso'))

        # unittest can't test for no warnings so we run the test and
        # trigger our own warning and count all the warnings
        with self.assertWarns(Warning) as cm:
            self.assertEqual(ts1, ts2)
            warnings.warn("deliberate")
        self.assertEqual(str(cm.warning), "deliberate")
Beispiel #3
0
 def setUp(self):
     start = astropy.time.Time('2020-01-01T00:00:00', format="isot", scale="tai")
     offset = astropy.time.TimeDelta(60, format="sec")
     self.timestamps = [start + offset*n for n in range(3)]
     self.timespans = [Timespan(begin=None, end=None)]
     self.timespans.extend(Timespan(begin=None, end=t) for t in self.timestamps)
     self.timespans.extend(Timespan(begin=t, end=None) for t in self.timestamps)
     self.timespans.extend(Timespan(begin=t, end=t) for t in self.timestamps)
     self.timespans.extend(Timespan(begin=a, end=b)
                           for a, b in itertools.combinations(self.timestamps, 2))
Beispiel #4
0
    def test_RangeTimespanType(self):
        start = astropy.time.Time('2020-01-01T00:00:00', format="isot", scale="tai")
        offset = astropy.time.TimeDelta(60, format="sec")
        timestamps = [start + offset*n for n in range(3)]
        timespans = [Timespan(begin=None, end=None)]
        timespans.extend(Timespan(begin=None, end=t) for t in timestamps)
        timespans.extend(Timespan(begin=t, end=None) for t in timestamps)
        timespans.extend(Timespan(begin=a, end=b) for a, b in itertools.combinations(timestamps, 2))
        db = self.makeEmptyDatabase(origin=1)
        with db.declareStaticTables(create=True) as context:
            tbl = context.addTable(
                "tbl",
                ddl.TableSpec(
                    fields=[
                        ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True),
                        ddl.FieldSpec(name="timespan", dtype=_RangeTimespanType),
                    ],
                )
            )
        rows = [{"id": n, "timespan": t} for n, t in enumerate(timespans)]
        db.insert(tbl, *rows)

        # Test basic round-trip through database.
        self.assertEqual(
            rows,
            [dict(row) for row in db.query(tbl.select().order_by(tbl.columns.id)).fetchall()]
        )

        # Test that Timespan's Python methods are consistent with our usage of
        # half-open ranges and PostgreSQL operators on ranges.
        def subquery(alias: str) -> sqlalchemy.sql.FromClause:
            return sqlalchemy.sql.select(
                [tbl.columns.id.label("id"), tbl.columns.timespan.label("timespan")]
            ).select_from(
                tbl
            ).alias(alias)
        sq1 = subquery("sq1")
        sq2 = subquery("sq2")
        query = sqlalchemy.sql.select([
            sq1.columns.id.label("n1"),
            sq2.columns.id.label("n2"),
            sq1.columns.timespan.overlaps(sq2.columns.timespan).label("overlaps"),
        ])

        dbResults = {
            (row[query.columns.n1], row[query.columns.n2]): row[query.columns.overlaps]
            for row in db.query(query)
        }
        pyResults = {
            (n1, n2): t1.overlaps(t2)
            for (n1, t1), (n2, t2) in itertools.product(enumerate(timespans), enumerate(timespans))
        }
        self.assertEqual(pyResults, dbResults)
Beispiel #5
0
    def testPrecision(self):
        """Test that we only use nanosecond precision for equality."""
        ts1 = self.timespans[-1]
        ts2 = Timespan(begin=ts1.begin +
                       astropy.time.TimeDelta(1e-10, format="sec"),
                       end=ts1.end)
        self.assertEqual(ts1, ts2)

        self.assertEqual(Timespan(begin=None, end=None),
                         Timespan(begin=None, end=None))
        self.assertEqual(Timespan(begin=None, end=ts1.end),
                         Timespan(begin=None, end=ts1.end))

        ts2 = Timespan(begin=ts1.begin +
                       astropy.time.TimeDelta(1e-8, format="sec"),
                       end=ts1.end)
        self.assertNotEqual(ts1, ts2)

        ts2 = Timespan(begin=None, end=ts1.end)
        self.assertNotEqual(ts1, ts2)

        t1 = Timespan(begin=astropy.time.Time(2456461.0,
                                              val2=0.06580758101851847,
                                              format="jd",
                                              scale="tai"),
                      end=astropy.time.Time(2456461.0,
                                            val2=0.06617994212962963,
                                            format="jd",
                                            scale="tai"))
        t2 = Timespan(begin=astropy.time.Time(2456461.0,
                                              val2=0.06580758101851858,
                                              format="jd",
                                              scale="tai"),
                      end=astropy.time.Time(2456461.0,
                                            val2=0.06617994212962963,
                                            format="jd",
                                            scale="tai"))
        self.assertEqual(t1, t2)

        # Ensure that == and != work properly
        self.assertTrue(t1 == t2, f"Equality of {t1} and {t2}")
        self.assertFalse(t1 != t2, f"Check != is false for {t1} and {t2}")
Beispiel #6
0
 def testInvalid(self):
     """Test that we reject timespans that should not exist.
     """
     with self.assertRaises(ValueError):
         Timespan(TimeConverter().max_time, None)
     with self.assertRaises(ValueError):
         Timespan(TimeConverter().max_time, TimeConverter().max_time)
     with self.assertRaises(ValueError):
         Timespan(None, TimeConverter().epoch)
     with self.assertRaises(ValueError):
         Timespan(TimeConverter().epoch, TimeConverter().epoch)
     t = TimeConverter().nsec_to_astropy(TimeConverter().max_nsec - 1)
     with self.assertRaises(ValueError):
         Timespan(t, t)
     with self.assertRaises(ValueError):
         Timespan.fromInstant(t)
Beispiel #7
0
 def testJson(self):
     ts1 = Timespan(begin=astropy.time.Time('2013-06-17 13:34:45.775000', scale='tai', format='iso'),
                    end=astropy.time.Time('2013-06-17 13:35:17.947000', scale='tai', format='iso'))
     json_str = ts1.to_json()
     ts_json = Timespan.from_json(json_str)
     self.assertEqual(ts_json, ts1)
Beispiel #8
0
 def testFromInstant(self):
     """Test construction of instantaneous timespans.
     """
     self.assertEqual(Timespan.fromInstant(self.timestamps[0]),
                      Timespan(self.timestamps[0], self.timestamps[0]))
Beispiel #9
0
    def testGetCalibration(self):
        """Test that `Butler.get` can be used to fetch from
        `~CollectionType.CALIBRATION` collections if the data ID includes
        extra dimensions with temporal information.
        """
        # Import data to play with.
        butler = self.makeButler(writeable=True)
        butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
        butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
        # Certify some biases into a CALIBRATION collection.
        registry = butler.registry
        registry.registerCollection("calibs", CollectionType.CALIBRATION)
        t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
        t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
        t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
        bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
        bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
        bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
        bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
        registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
        registry.certify("calibs", [bias2b], Timespan(t2, None))
        registry.certify("calibs", [bias3b], Timespan(t2, t3))
        # Insert some exposure dimension data.
        registry.insertDimensionData(
            "exposure",
            {
                "instrument": "Cam1",
                "id": 3,
                "obs_id": "three",
                "timespan": Timespan(t1, t2),
                "physical_filter": "Cam1-G",
                "day_obs": 20201114,
                "seq_num": 55,
            },
            {
                "instrument": "Cam1",
                "id": 4,
                "obs_id": "four",
                "timespan": Timespan(t2, t3),
                "physical_filter": "Cam1-G",
                "day_obs": 20211114,
                "seq_num": 42,
            },
        )
        # Get some biases from raw-like data IDs.
        bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 3, "detector": 2},
                                  collections="calibs")
        self.assertEqual(bias2a_id, bias2a.id)
        bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 4, "detector": 3},
                                  collections="calibs")
        self.assertEqual(bias3b_id, bias3b.id)

        # Get using the kwarg form
        bias3b_id, _ = butler.get("bias",
                                  instrument="Cam1", exposure=4, detector=3,
                                  collections="calibs")
        self.assertEqual(bias3b_id, bias3b.id)

        # Do it again but using the record information
        bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure.obs_id": "three",
                                           "detector.full_name": "Ab"},
                                  collections="calibs")
        self.assertEqual(bias2a_id, bias2a.id)
        bias3b_id, _ = butler.get("bias", {"exposure.obs_id": "four",
                                           "detector.full_name": "Ba"},
                                  collections="calibs", instrument="Cam1")
        self.assertEqual(bias3b_id, bias3b.id)

        # And again but this time using the alternate value rather than
        # the primary.
        bias3b_id, _ = butler.get("bias", {"exposure": "four",
                                           "detector": "Ba"},
                                  collections="calibs", instrument="Cam1")
        self.assertEqual(bias3b_id, bias3b.id)

        # And again but this time using the alternate value rather than
        # the primary and do it in the keyword arguments.
        bias3b_id, _ = butler.get("bias",
                                  exposure="four", detector="Ba",
                                  collections="calibs", instrument="Cam1")
        self.assertEqual(bias3b_id, bias3b.id)

        # Now with implied record columns
        bias3b_id, _ = butler.get("bias", day_obs=20211114, seq_num=42,
                                  raft="B", name_in_raft="a",
                                  collections="calibs", instrument="Cam1")
        self.assertEqual(bias3b_id, bias3b.id)
Beispiel #10
0
    def testCollectionTransfers(self):
        """Test exporting and then importing collections of various types.
        """
        # Populate a registry with some datasets.
        butler1 = self.makeButler(writeable=True)
        butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
        butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
        registry1 = butler1.registry
        # Add some more collections.
        registry1.registerRun("run1")
        registry1.registerCollection("tag1", CollectionType.TAGGED)
        registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
        registry1.registerCollection("chain1", CollectionType.CHAINED)
        registry1.registerCollection("chain2", CollectionType.CHAINED)
        registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
        registry1.setCollectionChain("chain2", ["calibration1", "run1"])
        # Associate some datasets into the TAGGED and CALIBRATION collections.
        flats1 = list(registry1.queryDatasets("flat", collections=...))
        registry1.associate("tag1", flats1)
        t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
        t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
        t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
        bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
        bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
        bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
        bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
        registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
        registry1.certify("calibration1", [bias2b], Timespan(t2, None))
        registry1.certify("calibration1", [bias3b], Timespan(t2, t3))

        with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file:
            # Export all collections, and some datasets.
            with butler1.export(filename=file.name) as exporter:
                # Sort results to put chain1 before chain2, which is
                # intentionally not topological order.
                for collection in sorted(registry1.queryCollections()):
                    exporter.saveCollection(collection)
                exporter.saveDatasets(flats1)
                exporter.saveDatasets([bias2a, bias2b, bias3a, bias3b])
            # Import them into a new registry.
            butler2 = self.makeButler(writeable=True)
            butler2.import_(filename=file.name)
        registry2 = butler2.registry
        # Check that it all round-tripped, starting with the collections
        # themselves.
        self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
        self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
        self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
        self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
        self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
        self.assertEqual(
            list(registry2.getCollectionChain("chain1")),
            ["tag1", "run1", "chain2"],
        )
        self.assertEqual(
            list(registry2.getCollectionChain("chain2")),
            ["calibration1", "run1"],
        )
        # Check that tag collection contents are the same.
        self.maxDiff = None
        self.assertCountEqual(
            [ref.unresolved() for ref in registry1.queryDatasets(..., collections="tag1")],
            [ref.unresolved() for ref in registry2.queryDatasets(..., collections="tag1")],
        )
        # Check that calibration collection contents are the same.
        self.assertCountEqual(
            [(assoc.ref.unresolved(), assoc.timespan)
             for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")],
            [(assoc.ref.unresolved(), assoc.timespan)
             for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")],
        )
    def test_ingest(self):

        fitsPath = os.path.join(TESTDIR, "data", "small.fits")

        formatter = FORMATTERS[0]
        datasetTypeName, formatterCls = (formatter["dataset_type"],
                                         formatter["formatter_cls"])

        datasetType = self.butler.registry.getDatasetType(datasetTypeName)
        datasets = []
        for exposure in range(3, 5):
            for detector in range(6):
                # use the same fits to test ingest
                if not os.path.exists(fitsPath):
                    log.warning(
                        f"No data found for detector {detector}, exposure {exposure} @ {fitsPath}."
                    )
                    continue
                ref = DatasetRef(datasetType,
                                 dataId={
                                     "instrument": INSTRUMENT_NAME,
                                     "detector": detector,
                                     "exposure": exposure * 11
                                 })
                datasets.append(
                    FileDataset(refs=ref,
                                path=fitsPath,
                                formatter=formatterCls))

        # register new collection
        # run = "rawIngestedRun"
        # self.butler.registry.registerCollection(run, type=CollectionType.RUN)

        # collection is registered as a part of setUp
        run = self.collection

        with self.butler.transaction():
            for exposure in range(3, 5):
                expid = exposure * 11
                self.butler.registry.insertDimensionData(
                    "exposure", {
                        "instrument": INSTRUMENT_NAME,
                        "id": expid,
                        "name": f"{expid}",
                        "group_name": "day1",
                        "timespan": Timespan(begin=None, end=None)
                    })
            # transfer can be 'auto', 'move', 'copy', 'hardlink', 'relsymlink'
            # or 'symlink'
            self.butler.ingest(*datasets, transfer="symlink", run=run)

        # verify that 12 files were ingested (2 exposures for each detector)
        refsSet = set(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run]))
        self.assertEqual(
            len(refsSet), 12,
            f"Collection {run} should have 12 elements after ingest")

        # verify that data id is present
        dataid = {"exposure": 44, "detector": 5, "instrument": INSTRUMENT_NAME}
        refsList = list(
            self.butler.registry.queryDatasets(datasetTypeName,
                                               collections=[run],
                                               dataId=dataid))
        self.assertEqual(
            len(refsList), 1,
            f"Collection {run} should have 1 element with {dataid}")
Beispiel #12
0
    def _buildVisitRecords(self,
                           definition: VisitDefinitionData,
                           *,
                           collections: Any = None) -> _VisitRecords:
        """Build the DimensionRecords associated with a visit.

        Parameters
        ----------
        definition : `VisitDefinition`
            Struct with identifiers for the visit and records for its
            constituent exposures.
        collections : Any, optional
            Collections to be searched for raws and camera geometry, overriding
            ``self.butler.collections``.
            Can be any of the types supported by the ``collections`` argument
            to butler construction.

        Results
        -------
        records : `_VisitRecords`
            Struct containing DimensionRecords for the visit, including
            associated dimension elements.
        """
        # Compute all regions.
        visitRegion, visitDetectorRegions = self.computeVisitRegions.compute(
            definition, collections=collections)
        # Aggregate other exposure quantities.
        timespan = Timespan(
            begin=_reduceOrNone(min, (e.timespan.begin
                                      for e in definition.exposures)),
            end=_reduceOrNone(max,
                              (e.timespan.end for e in definition.exposures)),
        )
        exposure_time = _reduceOrNone(sum, (e.exposure_time
                                            for e in definition.exposures))
        physical_filter = _reduceOrNone(lambda a, b: a if a == b else None,
                                        (e.physical_filter
                                         for e in definition.exposures))
        target_name = _reduceOrNone(lambda a, b: a if a == b else None,
                                    (e.target_name
                                     for e in definition.exposures))
        science_program = _reduceOrNone(lambda a, b: a if a == b else None,
                                        (e.science_program
                                         for e in definition.exposures))

        # observing day for a visit is defined by the earliest observation
        # of the visit
        observing_day = _reduceOrNone(min, (e.day_obs
                                            for e in definition.exposures))
        observation_reason = _reduceOrNone(lambda a, b: a if a == b else None,
                                           (e.observation_reason
                                            for e in definition.exposures))
        if observation_reason is None:
            # Be explicit about there being multiple reasons
            observation_reason = "various"

        # Use the mean zenith angle as an approximation
        zenith_angle = _reduceOrNone(sum, (e.zenith_angle
                                           for e in definition.exposures))
        if zenith_angle is not None:
            zenith_angle /= len(definition.exposures)

        # Construct the actual DimensionRecords.
        return _VisitRecords(
            visit=self.universe["visit"].RecordClass(
                instrument=definition.instrument,
                id=definition.id,
                name=definition.name,
                physical_filter=physical_filter,
                target_name=target_name,
                science_program=science_program,
                observation_reason=observation_reason,
                day_obs=observing_day,
                zenith_angle=zenith_angle,
                visit_system=self.groupExposures.getVisitSystem()[0],
                exposure_time=exposure_time,
                timespan=timespan,
                region=visitRegion,
                # TODO: no seeing value in exposure dimension records, so we
                # can't set that here.  But there are many other columns that
                # both dimensions should probably have as well.
            ),
            visit_definition=[
                self.universe["visit_definition"].RecordClass(
                    instrument=definition.instrument,
                    visit=definition.id,
                    exposure=exposure.id,
                    visit_system=self.groupExposures.getVisitSystem()[0],
                ) for exposure in definition.exposures
            ],
            visit_detector_region=[
                self.universe["visit_detector_region"].RecordClass(
                    instrument=definition.instrument,
                    visit=definition.id,
                    detector=detectorId,
                    region=detectorRegion,
                )
                for detectorId, detectorRegion in visitDetectorRegions.items()
            ])
Beispiel #13
0
def ingestSimulated(repo,
                    locations,
                    regex,
                    output_run,
                    transfer="auto",
                    ingest_type="rawexp"):
    """Ingests raw frames into the butler registry

    Parameters
    ----------
    repo : `str`
        URI to the repository.
    locations : `list` [`str`]
        Files to ingest and directories to search for files that match
        ``regex`` to ingest.
    regex : `str`
        Regex string used to find files in directories listed in locations.
    output_run : `str`
        The path to the location, the run, where datasets should be put.
    transfer : `str` or None
        The external data transfer type, by default "auto".
    ingest_type : `str`
        ingest product data type.

    Raises
    ------
    Exception
        Raised if operations on configuration object fail.

    Notes
    -----
    This method inserts all datasets for an exposure within a transaction,
    guaranteeing that partial exposures are never ingested.  The exposure
    dimension record is inserted with `Registry.syncDimensionData` first
    (in its own transaction), which inserts only if a record with the same
    primary key does not already exist.  This allows different files within
    the same exposure to be incremented in different runs.
    """

    butler = Butler(repo, writeable=True)

    # make sure instrument and detector dimensions are populated
    with butler.registry.transaction():
        instrument_record = {
            "name": "simulator",
            "exposure_max": 600000,
            "detector_max": 6,
            "class_name": "spherex.instrument.SimulatorInstrument"
        }
        butler.registry.syncDimensionData("instrument", instrument_record)
        for idx in range(1, 7):
            detector_record = {
                "instrument": "simulator",
                "id": idx,
                "full_name": f"array{idx}"
            }
            butler.registry.syncDimensionData("detector", detector_record)

    dimension_universe = butler.registry.dimensions
    datasetType = DatasetType(ingest_type,
                              dimension_universe.extract(
                                  ("instrument", "detector", "exposure")),
                              "SPHERExImage",
                              universe=dimension_universe)
    # idempotent dataset type registration
    butler.registry.registerDatasetType(datasetType)

    # idempotent collection registration
    run = f"{ingest_type}r" if (output_run is None) else output_run
    butler.registry.registerCollection(run, type=CollectionType.RUN)

    n_failed = 0
    files = findFileResources(locations, regex)

    # example: sim_exposure_000000_array_1.fits or
    #   sim_exposure_000000_array_2_dark_current.fits
    pattern = re.compile(r"sim_exposure_(\d+)_array_(\d)[_,.]")

    # do we want to group observations?
    grp = datetime.date.today().strftime("%Y%m%d")

    datasets = []
    for file in files:
        # parse exposure and detector ids from file name
        m = pattern.search(file)
        if m is None:
            n_failed += 1
            logging.error(f"{file} does not match simulator file pattern")
            continue
        else:
            g = m.groups()
            if len(g) != 2:
                n_failed += 1
                logging.error(
                    f"Unable to get exposure and detector from file name: {file}"
                )
                continue
            else:
                [exposure_id, detector_id] = list(map(int, g))

        try:
            exposure_record = {
                "instrument": "simulator",
                "id": exposure_id,
                "name": f"{exposure_id:06d}",
                "group_name": f"{grp}",
                "timespan": Timespan(begin=None, end=None)
            }
            # idempotent insertion of individual dimension rows
            butler.registry.syncDimensionData("exposure", exposure_record)
        except Exception as e:
            n_failed += 1
            logging.error(
                f"Unable to insert exposure record for file {file}: {e}")
            continue

        dataId = DataCoordinate.standardize(
            instrument="simulator",
            detector=detector_id,
            exposure=exposure_id,
            universe=butler.registry.dimensions)
        ref = DatasetRef(datasetType, dataId=dataId)
        datasets.append(
            FileDataset(refs=ref, path=file, formatter=AstropyImageFormatter))

    with butler.transaction():
        butler.ingest(*datasets, transfer=transfer, run=run)
Beispiel #14
0
    def ingestStrayLightData(self,
                             butler,
                             directory,
                             *,
                             transfer=None,
                             collection=None,
                             labels=()):
        """Ingest externally-produced y-band stray light data files into
        a data repository.

        Parameters
        ----------
        butler : `lsst.daf.butler.Butler`
            Butler to write with.  Any collections associated with it are
            ignored in favor of ``collection`` and/or ``labels``.
        directory : `str`
            Directory containing yBackground-*.fits files.
        transfer : `str`, optional
            If not `None`, must be one of 'move', 'copy', 'hardlink', or
            'symlink', indicating how to transfer the files.
        collection : `str`, optional
            Name to use for the calibration collection that associates all
            datasets with a validity range.  If this collection already exists,
            it must be a `~CollectionType.CALIBRATION` collection, and it must
            not have any datasets that would conflict with those inserted by
            this method.  If `None`, a collection name is worked out
            automatically from the instrument name and other metadata by
            calling ``makeCuratedCalibrationCollectionName``, but this
            default name may not work well for long-lived repositories unless
            ``labels`` is also provided (and changed every time curated
            calibrations are ingested).
        labels : `Sequence` [ `str` ], optional
            Extra strings to include in collection names, after concatenating
            them with the standard collection name delimeter.  If provided,
            these are inserted into to the names of the `~CollectionType.RUN`
            collections that datasets are inserted directly into, as well the
            `~CollectionType.CALIBRATION` collection if it is generated
            automatically (i.e. if ``collection is None``).  Usually this is
            just the name of the ticket on which the calibration collection is
            being created.
        """
        # Register the CALIBRATION collection that adds validity ranges.
        # This does nothing if it is already registered.
        if collection is None:
            collection = self.makeCalibrationCollectionName(*labels)
        butler.registry.registerCollection(collection,
                                           type=CollectionType.CALIBRATION)

        # Register the RUN collection that holds these datasets directly.  We
        # only need one because there is only one validity range and hence no
        # data ID conflicts even when there are no validity ranges.
        run = self.makeUnboundedCalibrationRunName(*labels)
        butler.registry.registerRun(run)

        # LEDs covered up around 2018-01-01, no need for correctin after that
        # date.
        timespan = Timespan(begin=None,
                            end=astropy.time.Time("2018-01-01",
                                                  format="iso",
                                                  scale="tai"))
        datasets = []
        # TODO: should we use a more generic name for the dataset type?
        # This is just the (rather HSC-specific) name used in Gen2, and while
        # the instances of this dataset are camera-specific, the datasetType
        # (which is used in the generic IsrTask) should not be.
        datasetType = DatasetType("yBackground",
                                  dimensions=(
                                      "physical_filter",
                                      "detector",
                                  ),
                                  storageClass="StrayLightData",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        for detector in self.getCamera():
            path = os.path.join(directory,
                                f"ybackground-{detector.getId():03d}.fits")
            if not os.path.exists(path):
                log.warning(
                    f"No stray light data found for detector {detector.getId()} @ {path}."
                )
                continue
            ref = DatasetRef(datasetType,
                             dataId={
                                 "instrument": self.getName(),
                                 "detector": detector.getId(),
                                 "physical_filter": "HSC-Y"
                             })
            datasets.append(
                FileDataset(refs=ref,
                            path=path,
                            formatter=SubaruStrayLightDataFormatter))
        butler.registry.registerDatasetType(datasetType)
        with butler.transaction():
            butler.ingest(*datasets, transfer=transfer, run=run)
            refs = []
            for dataset in datasets:
                refs.extend(dataset.refs)
            butler.registry.certify(collection, refs, timespan)
Beispiel #15
0
    def writeAdditionalCuratedCalibrations(self,
                                           butler,
                                           collection=None,
                                           labels=()):
        # Register the CALIBRATION collection that adds validity ranges.
        # This does nothing if it is already registered.
        if collection is None:
            collection = self.makeCalibrationCollectionName(*labels)
        butler.registry.registerCollection(collection,
                                           type=CollectionType.CALIBRATION)

        # Register the RUN collection that holds these datasets directly.  We
        # only need one because all of these datasets have the same (unbounded)
        # validity range right now.
        run = self.makeUnboundedCalibrationRunName(*labels)
        butler.registry.registerRun(run)
        baseDataId = butler.registry.expandDataId(instrument=self.getName())
        refs = []

        # Write brighter-fatter kernel, with an infinite validity range.
        datasetType = DatasetType("bfKernel", ("instrument", ),
                                  "NumpyArray",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        butler.registry.registerDatasetType(datasetType)

        # Load and then put instead of just moving the file in part to ensure
        # the version in-repo is written with Python 3 and does not need
        # `encoding='latin1'` to be read.
        bfKernel = self.getBrighterFatterKernel()
        refs.append(butler.put(bfKernel, datasetType, baseDataId, run=run))

        # The following iterate over the values of the dictionaries returned
        # by the transmission functions and ignore the date that is supplied.
        # This is due to the dates not being ranges but single dates,
        # which do not give the proper notion of validity. As such unbounded
        # calibration labels are used when inserting into the database.
        # In the future these could and probably should be updated to
        # properly account for what ranges are considered valid.

        # Write optical transmissions
        opticsTransmissions = getOpticsTransmission()
        datasetType = DatasetType("transmission_optics", ("instrument", ),
                                  "TransmissionCurve",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        butler.registry.registerDatasetType(datasetType)
        for entry in opticsTransmissions.values():
            if entry is None:
                continue
            refs.append(butler.put(entry, datasetType, baseDataId, run=run))

        # Write transmission sensor
        sensorTransmissions = getSensorTransmission()
        datasetType = DatasetType("transmission_sensor", (
            "instrument",
            "detector",
        ),
                                  "TransmissionCurve",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        butler.registry.registerDatasetType(datasetType)
        for entry in sensorTransmissions.values():
            if entry is None:
                continue
            for sensor, curve in entry.items():
                dataId = DataCoordinate.standardize(baseDataId,
                                                    detector=sensor)
                refs.append(butler.put(curve, datasetType, dataId, run=run))

        # Write filter transmissions
        filterTransmissions = getFilterTransmission()
        datasetType = DatasetType("transmission_filter", (
            "instrument",
            "physical_filter",
        ),
                                  "TransmissionCurve",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        butler.registry.registerDatasetType(datasetType)
        for entry in filterTransmissions.values():
            if entry is None:
                continue
            for band, curve in entry.items():
                dataId = DataCoordinate.standardize(baseDataId,
                                                    physical_filter=band)
                refs.append(butler.put(curve, datasetType, dataId, run=run))

        # Write atmospheric transmissions
        atmosphericTransmissions = getAtmosphereTransmission()
        datasetType = DatasetType("transmission_atmosphere", ("instrument", ),
                                  "TransmissionCurve",
                                  universe=butler.registry.dimensions,
                                  isCalibration=True)
        butler.registry.registerDatasetType(datasetType)
        for entry in atmosphericTransmissions.values():
            if entry is None:
                continue
            refs.append(
                butler.put(entry,
                           datasetType, {"instrument": self.getName()},
                           run=run))

        # Associate all datasets with the unbounded validity range.
        butler.registry.certify(collection, refs, Timespan(begin=None,
                                                           end=None))
Beispiel #16
0
 def testEmpty(self):
     """Test various ways to construct an empty timespan, and that
     operations on empty timespans yield the expected behavior.
     """
     self.assertEqual(
         Timespan.makeEmpty(),
         Timespan(Timespan.EMPTY, Timespan.EMPTY),
     )
     self.assertEqual(
         Timespan.makeEmpty(),
         Timespan(self.timestamps[1], self.timestamps[0]),
     )
     self.assertEqual(
         Timespan.makeEmpty(),
         Timespan(Timespan.EMPTY, self.timestamps[0]),
     )
     self.assertEqual(
         Timespan.makeEmpty(),
         Timespan(self.timestamps[0], Timespan.EMPTY),
     )
     self.assertEqual(
         Timespan.makeEmpty(),
         Timespan(self.timestamps[0], self.timestamps[0], padInstantaneous=False)
     )
     empty = Timespan.makeEmpty()
     for t in self.timestamps:
         with self.subTest(t=str(t)):
             self.assertFalse(empty < t)
             self.assertFalse(empty > t)
             self.assertFalse(t < empty)
             self.assertFalse(t > empty)
             self.assertFalse(empty.contains(t))
     for t in self.timespans:
         with self.subTest(t=str(t)):
             self.assertTrue(t.contains(empty))
             self.assertFalse(t.overlaps(empty))
             self.assertFalse(empty.overlaps(t))
             self.assertEqual(empty.contains(t), t.isEmpty())
             self.assertFalse(empty < t)
             self.assertFalse(t < empty)
             self.assertFalse(empty > t)
             self.assertFalse(t > empty)
    def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]):
        # Docstring inherited from RepoConverter.
        # Read Gen2 calibration repository and extract validity ranges for
        # all datasetType + calibDate combinations we ingested.
        calibFile = os.path.join(self.root, "calibRegistry.sqlite3")
        # If the registry file does not exist this indicates a problem.
        # We check explicitly because sqlite will try to create the
        # missing file if it can.
        if not os.path.exists(calibFile):
            raise RuntimeError("Attempting to convert calibrations but no registry database"
                               f" found in {self.root}")

        # Initially we collate timespans for each dataId + dataset type
        # combination. This allows us to check for small gaps or overlaps
        # inherent in the ambiguous usage of validity ranges in gen2
        timespansByDataId = defaultdict(list)

        db = sqlite3.connect(calibFile)
        db.row_factory = sqlite3.Row

        for datasetType, datasetsByCalibDate in datasets.items():
            if not datasetType.isCalibration():
                continue
            gen2keys = {}
            if "detector" in datasetType.dimensions.names:
                gen2keys[self.task.config.ccdKey] = int
            if "physical_filter" in datasetType.dimensions.names:
                gen2keys["filter"] = str
            translator = self.instrument.makeDataIdTranslatorFactory().makeMatching(
                datasetType.name,
                gen2keys,
                instrument=self.instrument.getName()
            )
            for calibDate, datasetsForCalibDate in datasetsByCalibDate.items():
                assert calibDate is not None, ("datasetType.isCalibration() is set by "
                                               "the presence of calibDate in the Gen2 template")
                # Build a mapping that lets us find DatasetRefs by data ID,
                # for this DatasetType and calibDate.  We know there is only
                # one ref for each data ID (given DatasetType and calibDate as
                # well).
                refsByDataId = {}
                for dataset in datasetsForCalibDate:
                    refsByDataId.update((ref.dataId, ref) for ref in dataset.refs)
                # Query the Gen2 calibration repo for the validity ranges for
                # this DatasetType and calibDate, and look up the appropriate
                # refs by data ID.
                for row in self._queryGen2CalibRegistry(db, datasetType, calibDate):
                    # For validity times we use TAI as some gen2 repos have
                    # validity dates very far in the past or future.
                    timespan = Timespan(
                        astropy.time.Time(row["validStart"], format="iso", scale="tai"),
                        astropy.time.Time(row["validEnd"], format="iso", scale="tai"),
                    )
                    # Make a Gen2 data ID from query results.
                    gen2id = {}
                    if "detector" in datasetType.dimensions.names:
                        gen2id[self.task.config.ccdKey] = row[self.task.config.ccdKey]
                    if "physical_filter" in datasetType.dimensions.names:
                        gen2id["filter"] = row["filter"]
                    # Translate that to Gen3.
                    gen3id, _ = translator(gen2id)
                    dataId = DataCoordinate.standardize(gen3id, graph=datasetType.dimensions)
                    ref = refsByDataId.get(dataId)
                    if ref is not None:
                        # Validity ranges must not overlap for the same dataID
                        # datasetType combination. Use that as a primary
                        # key and store the timespan and ref in a tuple
                        # as the value for later timespan validation.
                        timespansByDataId[(ref.dataId, ref.datasetType.name)].append((timespan, ref))
                    else:
                        # The Gen2 calib registry mentions this dataset, but it
                        # isn't included in what we've ingested.  This might
                        # sometimes be a problem, but it should usually
                        # represent someone just trying to convert a subset of
                        # the Gen2 repo, so I don't think it's appropriate to
                        # warn or even log at info, since in that case there
                        # may be a _lot_ of these messages.
                        self.task.log.debug(
                            "Gen2 calibration registry entry has no dataset: %s for calibDate=%s, %s.",
                            datasetType.name, calibDate, dataId
                        )

        # Analyze the timespans to check for overlap problems
        # Gaps of a day should be closed since we assume differing
        # conventions in gen2 repos.

        # We need to correct any validity range issues and store the
        # results in a dict-of-lists keyed by Timespan, since
        # Registry.certify operates on one Timespan and multiple refs at a
        # time.
        refsByTimespan = defaultdict(list)

        # A day with a bit of fuzz to indicate the largest gap we will close
        max_gap = astropy.time.TimeDelta(1.001, format="jd", scale="tai")

        # Since in many cases the validity ranges are relevant for multiple
        # dataset types and dataIds we don't want to over-report and so
        # cache the messages for later.
        info_messages = set()
        warn_messages = set()
        for timespans in timespansByDataId.values():
            # Sort all the timespans and check overlaps
            sorted_timespans = sorted(timespans, key=lambda x: x[0])
            timespan_prev, ref_prev = sorted_timespans.pop(0)
            for timespan, ref in sorted_timespans:
                # See if we have a suspicious gap
                delta = timespan.begin - timespan_prev.end
                abs_delta = abs(delta)
                if abs_delta > 0 and abs_delta < max_gap:
                    if delta > 0:
                        # Gap between timespans
                        msg = f"Calibration validity gap closed from {timespan_prev.end} to {timespan.begin}"
                        info_messages.add(msg)
                    else:
                        # Overlap of timespans
                        msg = f"Calibration validity overlap of {abs(delta).to(u.s)} removed for period " \
                            f"{timespan.begin} to {timespan_prev.end}"
                        warn_messages.add(msg)

                    self.task.log.debug("Correcting validity range for %s with end %s",
                                        ref_prev, timespan_prev.end)

                    # Assume this gap is down to convention in gen2.
                    # We have to adjust the previous timespan to fit
                    # since we always trust validStart.
                    timespan_prev = Timespan(begin=timespan_prev.begin,
                                             end=timespan.begin)
                # Store the previous timespan and ref since it has now
                # been verified
                refsByTimespan[timespan_prev].append(ref_prev)

                # And update the previous values for the next iteration
                timespan_prev = timespan
                ref_prev = ref

            # Store the final timespan/ref pair
            refsByTimespan[timespan_prev].append(ref_prev)

        # Issue any pending log messages we have recorded
        for msg in sorted(info_messages):
            self.task.log.info(msg)
        for msg in sorted(warn_messages):
            self.task.log.warn(msg)

        # Done reading from Gen2, time to certify into Gen3.
        for timespan, refs in refsByTimespan.items():
            self.task.registry.certify(self.collection, refs, timespan)