def test_merging_overwrite(self): merged = merge_headers([self.h1, self.h2], mode="overwrite") # The merged header should be the same type as the first header self.assertIsInstance(merged, type(self.h1)) expected = { "MJD-OBS": self.h2["MJD-OBS"], "ORIGIN": self.h2["ORIGIN"], "KEY0": self.h2["KEY0"], "KEY1": self.h1["KEY1"], "KEY2": self.h2["KEY2"], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], } self.assertEqual(merged, expected) merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="overwrite") expected = { "MJD-OBS": self.h4["MJD-OBS"], "ORIGIN": self.h3["ORIGIN"], "KEY0": self.h2["KEY0"], "KEY1": self.h4["KEY1"], "KEY2": self.h3["KEY2"], "KEY3": self.h3["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h3["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected)
def test_merging_hsc(self): files = ("fitsheader-hsc-HSCA04090107.yaml", "fitsheader-hsc.yaml") headers = [read_test_file(f, dir=self.datadir) for f in files] merged = merge_headers(headers, mode="first", sort=False) # The MJD-STR should come from the first file self.assertAlmostEqual(merged["MJD-STR"], 57305.34729859) # If we sort then MJD-STR should come from the oldest file merged = merge_headers(headers, mode="first", sort=True) self.assertAlmostEqual(merged["MJD-STR"], 56598.26106374757) # Drop headers that differ, MJD-STR should not appear merged = merge_headers(headers, mode="drop", sort=True) self.assertNotIn("MJD-STR", merged) # Drop but retain first MJD-STR without sorting merged = merge_headers(headers, mode="drop", sort=False, first=["MJD-STR", "UT-STR"]) self.assertAlmostEqual(merged["MJD-STR"], 57305.34729859) self.assertEqual(merged["UT-STR"], "08:20:06.598") # Drop but retain first MJD-STR merged = merge_headers(headers, mode="drop", sort=True, first=["MJD-STR", "UT-STR"]) self.assertAlmostEqual(merged["MJD-STR"], 56598.26106374757) self.assertEqual(merged["UT-STR"], "06:15:55.908")
def test_merging_drop(self): merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="drop") expected = { "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected) # Sorting the headers should make no difference to drop mode merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="drop", sort=True) self.assertEqual(merged, expected) # Now retain some headers merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="drop", sort=False, first=["ORIGIN"], last=["KEY2", "KEY1"]) expected = { "KEY2": self.h3["KEY2"], "ORIGIN": self.h1["ORIGIN"], "KEY1": self.h4["KEY1"], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected) # Now retain some headers with sorting merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="drop", sort=True, first=["ORIGIN"], last=["KEY2", "KEY1"]) expected = { "KEY2": self.h3["KEY2"], "ORIGIN": self.h2["ORIGIN"], "KEY1": self.h4["KEY1"], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected)
def test_merging_append_sort(self): # Try with two headers first merged = merge_headers([self.h1, self.h2], mode="append", sort=True) expected = { "MJD-OBS": [self.h2["MJD-OBS"], self.h1["MJD-OBS"]], "ORIGIN": self.h1["ORIGIN"], "KEY0": [self.h2["KEY0"], self.h1["KEY0"]], "KEY1": self.h1["KEY1"], "KEY2": [self.h2["KEY2"], self.h1["KEY2"]], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], } self.assertEqual(merged, expected) merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="append", sort=True) expected = { "MJD-OBS": [ self.h2["MJD-OBS"], self.h1["MJD-OBS"], self.h4["MJD-OBS"], self.h3["MJD-OBS"] ], "ORIGIN": [self.h2["ORIGIN"], self.h1["ORIGIN"], None, self.h3["ORIGIN"]], "KEY0": [self.h2["KEY0"], self.h1["KEY0"], None, None], "KEY1": [None, self.h1["KEY1"], self.h4["KEY1"], None], "KEY2": [self.h2["KEY2"], self.h1["KEY2"], None, self.h3["KEY2"]], "KEY3": self.h3["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h3["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected) # Order should not matter merged = merge_headers([self.h4, self.h3, self.h2, self.h1], mode="append", sort=True) self.assertEqual(merged, expected)
def determine_translatable_headers(cls, filename, primary=None): """Given a file return all the headers usable for metadata translation. Phosim splits useful metadata between the primary header and the amplifier headers. A single header is returned as a merge of the first two. Parameters ---------- filename : `str` Path to a file in a format understood by this translator. primary : `dict`-like, optional The primary header obtained by the caller. This is sometimes already known, for example if a system is trying to bootstrap without already knowing what data is in the file. Will be ignored. Yields ------ headers : iterator of `dict`-like The primary header merged with the secondary header. Notes ----- This translator class is specifically tailored to raw PhoSim data and is not designed to work with general FITS files. The normal paradigm is for the caller to have read the first header and then called `determine_translator()` on the result to work out which translator class to then call to obtain the real headers to be used for translation. """ with fits.open(filename) as fits_file: yield merge_headers([fits_file[0].header, fits_file[1].header], mode="overwrite")
def test_merging_append(self): # Try with two headers first merged = merge_headers([self.h1, self.h2], mode="append") expected = { "MJD-OBS": [self.h1["MJD-OBS"], self.h2["MJD-OBS"]], "ORIGIN": self.h1["ORIGIN"], "KEY0": [self.h1["KEY0"], self.h2["KEY0"]], "KEY1": self.h1["KEY1"], "KEY2": [self.h1["KEY2"], self.h2["KEY2"]], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], } self.assertEqual(merged, expected) merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="append") expected = { "MJD-OBS": [ self.h1["MJD-OBS"], self.h2["MJD-OBS"], self.h3["MJD-OBS"], self.h4["MJD-OBS"] ], "ORIGIN": [self.h1["ORIGIN"], self.h2["ORIGIN"], self.h3["ORIGIN"], None], "KEY0": [self.h1["KEY0"], self.h2["KEY0"], None, None], "KEY1": [self.h1["KEY1"], None, None, self.h4["KEY1"]], "KEY2": [self.h1["KEY2"], self.h2["KEY2"], self.h3["KEY2"], None], "KEY3": self.h3["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h3["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected)
def test_merging_overwrite_sort(self): merged = merge_headers([self.h1, self.h2], mode="overwrite", sort=True) expected = { "MJD-OBS": self.h1["MJD-OBS"], "ORIGIN": self.h1["ORIGIN"], "KEY0": self.h1["KEY0"], "KEY1": self.h1["KEY1"], "KEY2": self.h1["KEY2"], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], } self.assertEqual(merged, expected) merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="overwrite", sort=True) expected = { "MJD-OBS": self.h3["MJD-OBS"], "ORIGIN": self.h3["ORIGIN"], "KEY0": self.h1["KEY0"], "KEY1": self.h4["KEY1"], "KEY2": self.h3["KEY2"], "KEY3": self.h3["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h3["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected) # Changing the order should not change the result merged = merge_headers([self.h4, self.h1, self.h3, self.h2], mode="overwrite", sort=True) self.assertEqual(merged, expected)
def test_merging_first(self): merged = merge_headers([self.h1, self.h2, self.h3, self.h4], mode="first") expected = { "MJD-OBS": self.h1["MJD-OBS"], "ORIGIN": self.h1["ORIGIN"], "KEY0": self.h1["KEY0"], "KEY1": self.h1["KEY1"], "KEY2": self.h1["KEY2"], "KEY3": self.h1["KEY3"], "KEY4": self.h1["KEY4"], "KEY5": self.h2["KEY5"], "KEY6": self.h4["KEY6"], } self.assertEqual(merged, expected)
def readRawFitsHeader(fileName, translator_class=None): """Read a FITS header from a raw file and fix it up as required. Parameters ---------- fileName : `str` Name of the FITS file. Can include a HDU specifier (although 0 is ignored). translator_class : `~astro_metadata_translator.MetadataTranslator`, optional Any translator class to use for fixing up the header. Returns ------- md : `PropertyList` Metadata from file. We also merge the contents with the next HDU if an ``INHERIT`` key is not specified. If an explicit HDU is encoded with the file name and it is greater than 0 then no merging will occur. """ mat = re.search(r"\[(\d+)\]$", fileName) hdu = None if mat: # Treat 0 as a special case # For some instruments the primary header is empty requested = int(mat.group(1)) if requested > 0: hdu = requested if hdu is not None: md = lsst.afw.fits.readMetadata(fileName, hdu=hdu) else: # For raw some of these files need the second header to be # read as well. Not all instruments want the double read # but for now it's easiest to always merge. phdu = lsst.afw.fits.readMetadata(fileName, 0) md = lsst.afw.fits.readMetadata(fileName) md = merge_headers([phdu, md], mode="overwrite") fix_header(md, translator_class=translator_class) return md
def readMetadata(self): """Read all header metadata directly into a PropertyList. Specialist version since some of our data does not set INHERIT=T so we have to merge the headers manually. Returns ------- metadata : `~lsst.daf.base.PropertyList` Header metadata. """ file = self.fileDescriptor.location.path phdu = lsst.afw.fits.readMetadata(file, 0) if "INHERIT" in phdu: # Trust the inheritance flag return super().readMetadata() # Merge ourselves md = merge_headers([phdu, lsst.afw.fits.readMetadata(file)], mode="overwrite") fix_header(md) return md
def readMetadata(self): """Read all header metadata directly into a PropertyList. Specialist version since some of our data does not set INHERIT=T so we have to merge the headers manually. Returns ------- metadata : `~lsst.daf.base.PropertyList` Header metadata. """ file = self.fileDescriptor.location.path phdu = lsst.afw.fits.readMetadata(file, 0) index, md = self._determineHDU(self.dataId['detector']) if "INHERIT" in phdu: # Trust the inheritance flag return super().readMetadata() # Merge ourselves md = merge_headers([phdu, md], mode="overwrite") #fix_header(md) astro_metadata_translator.fix_header(md,translator_class=VircamTranslator) #print('md:',md) return md
def extractMetadata(self, filename: str) -> RawFileData: """Extract and process metadata from a single raw file. Parameters ---------- filename : `str` Path to the file. Returns ------- data : `RawFileData` A structure containing the metadata extracted from the file, as well as the original filename. All fields will be populated, but the `RawFileData.dataId` attribute will be a minimal (unexpanded) `DataCoordinate` instance. Notes ----- Assumes that there is a single dataset associated with the given file. Instruments using a single file to store multiple datasets must implement their own version of this method. """ # Manually merge the primary and "first data" headers here because we # do not know in general if an input file has set INHERIT=T. phdu = readMetadata(filename, 0) header = merge_headers([phdu, readMetadata(filename)], mode="overwrite") fix_header(header) datasets = [self._calculate_dataset_info(header, filename)] # The data model currently assumes that whilst multiple datasets # can be associated with a single file, they must all share the # same formatter. FormatterClass = self.instrument.getRawFormatter(datasets[0].dataId) return RawFileData(datasets=datasets, filename=filename, FormatterClass=FormatterClass)
def test_one(self): merged = merge_headers([self.h1], mode="drop") self.assertEqual(merged, self.h1)
def combineHeaders(self, expList, calib, calibType="CALIB", scales=None): """Combine input headers to determine the set of common headers, supplemented by calibration inputs. Parameters ---------- expList : `list` of `lsst.afw.image.Exposure` Input list of exposures to combine. calib : `lsst.afw.image.Exposure` Output calibration to construct headers for. calibType: `str`, optional OBSTYPE the output should claim. scales: `list` of `float`, optional Scale values applied to each input to record. Returns ------- header : `lsst.daf.base.PropertyList` Constructed header. """ # Header header = calib.getMetadata() header.set("OBSTYPE", calibType) # Keywords we care about comments = { "TIMESYS": "Time scale for all dates", "DATE-OBS": "Start date of earliest input observation", "MJD-OBS": "[d] Start MJD of earliest input observation", "DATE-END": "End date of oldest input observation", "MJD-END": "[d] End MJD of oldest input observation", "MJD-AVG": "[d] MJD midpoint of all input observations", "DATE-AVG": "Midpoint date of all input observations" } # Creation date now = time.localtime() calibDate = time.strftime("%Y-%m-%d", now) calibTime = time.strftime("%X %Z", now) header.set("CALIB_CREATE_DATE", calibDate) header.set("CALIB_CREATE_TIME", calibTime) # Merge input headers inputHeaders = [ exp.getMetadata() for exp in expList if exp is not None ] merged = merge_headers(inputHeaders, mode='drop') for k, v in merged.items(): if k not in header: md = expList[0].getMetadata() comment = md.getComment(k) if k in md else None header.set(k, v, comment=comment) # Construct list of visits visitInfoList = [ exp.getInfo().getVisitInfo() for exp in expList if exp is not None ] for i, visit in enumerate(visitInfoList): if visit is None: continue header.set("CPP_INPUT_%d" % (i, ), visit.getExposureId()) header.set("CPP_INPUT_DATE_%d" % (i, ), str(visit.getDate())) header.set("CPP_INPUT_EXPT_%d" % (i, ), visit.getExposureTime()) if scales is not None: header.set("CPP_INPUT_SCALE_%d" % (i, ), scales[i]) # Not yet working: DM-22302 # Create an observation group so we can add some standard headers # independent of the form in the input files. # Use try block in case we are dealing with unexpected data headers try: group = ObservationGroup(visitInfoList, pedantic=False) except Exception: self.log.warn( "Exception making an obs group for headers. Continuing.") # Fall back to setting a DATE-OBS from the calibDate dateCards = {"DATE-OBS": "{}T00:00:00.00".format(calibDate)} comments[ "DATE-OBS"] = "Date of start of day of calibration midpoint" else: oldest, newest = group.extremes() dateCards = dates_to_fits(oldest.datetime_begin, newest.datetime_end) for k, v in dateCards.items(): header.set(k, v, comment=comments.get(k, None)) return header
def read_file(file, hdrnum, print_trace, outstream=sys.stdout, errstream=sys.stderr, output_mode="verbose", write_heading=False): """Read the specified file and process it. Parameters ---------- file : `str` The file from which the header is to be read. hdrnum : `int` The HDU number to read. The primary header is always read and merged with the header from this HDU. print_trace : `bool` If there is an error reading the file and this parameter is `True`, a full traceback of the exception will be reported. If `False` prints a one line summary of the error condition. outstream : `io.StringIO`, optional Output stream to use for standard messages. Defaults to `sys.stdout`. errstream : `io.StringIO`, optional Stream to send messages that would normally be sent to standard error. Defaults to `sys.stderr`. output_mode : `str`, optional Output mode to use. Must be one of "verbose", "none", "table", "yaml", or "fixed". "yaml" and "fixed" can be modified with a "native" suffix to indicate that the output should be a representation of the native object type representing the header (which can be PropertyList or an Astropy header). Without this modify headers will be dumped as simple `dict` form. "auto" is not allowed by this point. write_heading: `bool`, optional If `True` and in table mode, write a table heading out before writing the content. Returns ------- success : `bool` `True` if the file was handled successfully, `False` if the file could not be processed. """ if output_mode not in OUTPUT_MODES: raise ValueError(f"Output mode of '{output_mode}' is not understood.") if output_mode == "auto": raise ValueError("Output mode can not be 'auto' here.") # This gets in the way in tabular mode if output_mode != "table": print(f"Analyzing {file}...", file=errstream) try: if file.endswith(".yaml"): md = read_test_file(file, ) if hdrnum != 0: # YAML can't have HDUs hdrnum = 0 else: md = read_metadata(file, 0) if md is None: print(f"Unable to open file {file}", file=errstream) return False if hdrnum != 0: mdn = read_metadata(file, int(hdrnum)) # Astropy does not allow append mode since it does not # convert lists to multiple cards. Overwrite for now if mdn is not None: md = merge_headers([md, mdn], mode="overwrite") else: print(f"HDU {hdrnum} was not found. Ignoring request.", file=errstream) if output_mode.endswith("native"): # Strip native and don't change type of md output_mode = output_mode[:-len("native")] else: # Rewrite md as simple dict for output md = {k: v for k, v in md.items()} if output_mode in ("yaml", "fixed"): if output_mode == "fixed": fix_header(md, filename=file) # The header should be written out in the insertion order print(yaml.dump(md, sort_keys=False), file=outstream) return True obs_info = ObservationInfo(md, pedantic=True, filename=file) if output_mode == "table": columns = [ "{:{fmt}}".format(getattr(obs_info, c["attr"]), fmt=c["format"]) for c in TABLE_COLUMNS ] if write_heading: # Construct headings of the same width as the items # we have calculated. Doing this means we don't have to # work out for ourselves how many characters will be used # for non-strings (especially Quantity) headings = [] separators = [] for thiscol, defn in zip(columns, TABLE_COLUMNS): width = len(thiscol) headings.append("{:{w}.{w}}".format(defn["label"], w=width)) separators.append("-" * width) print(" ".join(headings), file=outstream) print(" ".join(separators), file=outstream) row = " ".join(columns) print(row, file=outstream) elif output_mode == "verbose": print(f"{obs_info}", file=outstream) elif output_mode == "none": pass else: raise RuntimeError( f"Output mode of '{output_mode}' not recognized but should be known." ) except Exception as e: if print_trace: traceback.print_exc(file=outstream) else: print(repr(e), file=outstream) return False return True
def test_fail(self): with self.assertRaises(ValueError): merge_headers([self.h1, self.h2], mode="wrong") with self.assertRaises(ValueError): merge_headers([])
def extractMetadata(self, filename: str) -> RawFileData: """Extract and process metadata from a single raw file. Parameters ---------- filename : `str` Path to the file. Returns ------- data : `RawFileData` A structure containing the metadata extracted from the file, as well as the original filename. All fields will be populated, but the `RawFileData.dataId` attribute will be a minimal (unexpanded) `DataCoordinate` instance. Notes ----- Assumes that there is a single dataset associated with the given file. Instruments using a single file to store multiple datasets must implement their own version of this method. """ # We do not want to stop ingest if we are given a bad file. # Instead return a RawFileData with no datasets and allow # the caller to report the failure. try: # Manually merge the primary and "first data" headers here because # we do not know in general if an input file has set INHERIT=T. phdu = readMetadata(filename, 0) header = merge_headers([phdu, readMetadata(filename)], mode="overwrite") datasets = [self._calculate_dataset_info(header, filename)] except Exception as e: self.log.debug("Problem extracting metadata from %s: %s", filename, e) # Indicate to the caller that we failed to read datasets = [] FormatterClass = Formatter instrument = None else: self.log.debug("Extracted metadata from file %s", filename) # The data model currently assumes that whilst multiple datasets # can be associated with a single file, they must all share the # same formatter. try: instrument = Instrument.fromName( datasets[0].dataId["instrument"], self.butler.registry) except LookupError: self.log.warning( "Instrument %s for file %s not known to registry", datasets[0].dataId["instrument"], filename) datasets = [] FormatterClass = Formatter instrument = None else: FormatterClass = instrument.getRawFormatter(datasets[0].dataId) return RawFileData(datasets=datasets, filename=filename, FormatterClass=FormatterClass, instrumentClass=instrument)