def _get_provenance_inputs(adinputs): """ gets the input information for a future call to store provenance history. The AstroData inputs can change during the call to a primitive. We use this helper function to extract the 'before' state of things so that we can accurately record provenance history. After the primitive returns, we have the AstroData objects into which we'll want to record this information. Args ----- adinputs : list of incoming `AstroData` objects We expect to be called before the primitive executes, since we want to capture the state of the adinputs before they may be modified. Returns -------- `dict` by datalabel of dictionaries with the filename, md5, provenance and provenance_history data from the inputs """ retval = dict() for ad in adinputs: if ad.path: md5 = md5sum(ad.path) or "" else: md5 = "" if hasattr(ad, 'PROVENANCE'): provenance = ad.PROVENANCE.copy() else: provenance = [] if hasattr(ad, 'PROVENANCE_HISTORY'): provenance_history = ad.PROVENANCE_HISTORY.copy() else: provenance_history = [] retval[ad.data_label()] = { "filename": ad.filename, "md5": md5, "provenance": provenance, "provenance_history": provenance_history } return retval
def prepare(self, adinputs=None, **params): """ Validate and standardize the datasets to ensure compatibility with the subsequent primitives. The outputs, if written to disk will be given the suffix "_prepared". Currently, there are no input parameters associated with this primitive. Parameters ---------- adinputs : None or list Input files that will be prepared. If `None`, it runs on the list of AstroData objects in the main stream. suffix: str Suffix to be added to output files (Default: "_prepared"). """ log = self.log log.debug(gt.log_message("primitive", "prepare", "starting")) filenames = [ad.filename for ad in adinputs] paths = [ad.path for ad in adinputs] timestamp_key = self.timestamp_keys["prepare"] sfx = params["suffix"] for primitive in ('validateData', 'standardizeStructure', 'standardizeHeaders', 'standardizeWCS'): # No need to call standardizeWCS if all adinputs are single-extension # images (tags should be the same for all adinputs) if ('WCS' not in primitive or 'SPECT' in adinputs[0].tags or any(len(ad) > 1 for ad in adinputs)): passed_params = self._inherit_params(params, primitive) adinputs = getattr(self, primitive)(adinputs, **passed_params) for ad in adinputs: gt.mark_history(ad, self.myself(), timestamp_key) ad.update_filename(suffix=sfx, strip=True) for ad, filename, path in zip(adinputs, filenames, paths): if path: add_provenance(ad, filename, md5sum(path) or "", self.myself()) return adinputs
def biasCorrect(self, adinputs=None, suffix=None, bias=None, do_bias=True): """ The biasCorrect primitive will subtract the science extension of the input bias frames from the science extension of the input science frames. The variance and data quality extension will be updated, if they exist. If no bias is provided, getProcessedBias will be called to ensure a bias exists for every adinput. Parameters ---------- suffix: str suffix to be added to output files bias: str/list of str bias(es) to subtract do_bias: bool perform bias subtraction? """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] if not do_bias: log.warning("Bias correction has been turned off.") return adinputs if bias is None: self.getProcessedBias(adinputs, refresh=False) bias_list = self._get_cal(adinputs, 'processed_bias') else: bias_list = bias # Provide a bias AD object for every science frame for ad, bias in zip( *gt.make_lists(adinputs, bias_list, force_ad=True)): if ad.phu.get(timestamp_key): log.warning("No changes will be made to {}, since it has " "already been processed by biasCorrect".format( ad.filename)) continue if bias is None: if 'qa' in self.mode: log.warning("No changes will be made to {}, since no " "bias was specified".format(ad.filename)) continue else: raise OSError('No processed bias listed for {}'.format( ad.filename)) try: gt.check_inputs_match(ad, bias, check_filter=False, check_units=True) except ValueError: bias = gt.clip_auxiliary_data(ad, aux=bias, aux_type='cal') # An Error will be raised if they don't match now gt.check_inputs_match(ad, bias, check_filter=False, check_units=True) log.fullinfo('Subtracting this bias from {}:\n{}'.format( ad.filename, bias.filename)) ad.subtract(bias) # Record bias used, timestamp, and update filename ad.phu.set('BIASIM', bias.filename, self.keyword_comments['BIASIM']) gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key) ad.update_filename(suffix=suffix, strip=True) if bias.path: add_provenance(ad, bias.filename, md5sum(bias.path) or "", self.myself()) timestamp = datetime.now() return adinputs
def slitIllumCorrect(self, adinputs=None, slit_illum=None, do_illum=True, suffix="_illumCorrected"): """ This primitive will divide each SCI extension of the inputs by those of the corresponding slit illumination image. If the inputs contain VAR or DQ frames, those will also be updated accordingly due to the division on the data. Parameters ---------- adinputs : list of AstroData Data to be corrected. slit_illum : str or AstroData Slit illumination path or AstroData object. do_illum: bool, optional Perform slit illumination correction? (Default: True) """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] qecorr_key = self.timestamp_keys['QECorrect'] if not do_illum: log.warning("Slit Illumination correction has been turned off.") return adinputs if slit_illum is None: raise NotImplementedError else: slit_illum_list = slit_illum # Provide a Slit Illum Ad object for every science frame ad_outputs = [] for ad, slit_illum_ad in zip( *gt.make_lists(adinputs, slit_illum_list, force_ad=True)): if ad.phu.get(timestamp_key): log.warning("No changes will be made to {}, since it has " "already been processed by flatCorrect".format( ad.filename)) continue if slit_illum_ad is None: if self.mode in ['sq']: raise OSError( "No processed slit illumination listed for {}".format( ad.filename)) else: log.warning("No changes will be made to {}, since no slit " "illumination has been specified".format( ad.filename)) continue gt.check_inputs_match(ad, slit_illum_ad, check_shape=False) if not all( [e1.shape == e2.shape for (e1, e2) in zip(ad, slit_illum_ad)]): slit_illum_ad = gt.clip_auxiliary_data(adinput=[ad], aux=[slit_illum_ad])[0] log.info("Dividing the input AstroData object {} by this \n" "slit illumination file: \n{}".format( ad.filename, slit_illum_ad.filename)) ad_out = deepcopy(ad) ad_out.divide(slit_illum_ad) # Update the header and filename, copying QECORR keyword from flat ad_out.phu.set("SLTILLIM", slit_illum_ad.filename, self.keyword_comments["SLTILLIM"]) try: qecorr_value = slit_illum_ad.phu[qecorr_key] except KeyError: pass else: log.fullinfo( "Copying {} keyword from slit illumination".format( qecorr_key)) ad_out.phu.set(qecorr_key, qecorr_value, slit_illum_ad.phu.comments[qecorr_key]) gt.mark_history(ad_out, primname=self.myself(), keyword=timestamp_key) ad_out.update_filename(suffix=suffix, strip=True) if slit_illum_ad.path: add_provenance(ad_out, slit_illum_ad.filename, md5sum(slit_illum_ad.path) or "", self.myself()) ad_outputs.append(ad_out) return ad_outputs
def fringeCorrect(self, adinputs=None, **params): """ Correct science frames for the effects of fringing, using a fringe frame. The fringe frame is obtained either from a specified parameter, or the "fringe" stream, or the calibration database. This is basically a bookkeeping wrapper for subtractFringe(), which does all the work. Parameters ---------- suffix: str suffix to be added to output files fringe: list/str/AstroData/None fringe frame(s) to subtract do_fringe: bool/None apply fringe correction? (None => use pipeline default for data) scale: bool/None scale fringe frame? (None => False if fringe frame has same group_id() as data scale_factor: float/sequence/None factor(s) to scale fringe """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] fringe = params["fringe"] scale = params["scale"] do_cal = params["do_cal"] # Exit now if nothing needs a correction, to avoid an error when the # calibration search fails. If images with different exposure times # are used, some frames may not require a correction (but the calibration # search will succeed), so still need to check individual inputs later. needs_correction = [self._needs_fringe_correction(ad) for ad in adinputs] if any(needs_correction): if do_cal == 'skip': log.warning("Fringe correction has been turned off but is " "recommended.") return adinputs else: if do_cal == 'procmode' or do_cal == 'skip': log.stdinfo("No input images require a fringe correction.") return adinputs else: # do_cal == 'force': log.warning("Fringe correction has been forced on but may not " "be required.") if fringe is None: # This logic is for QAP try: fringe_list = self.streams['fringe'] assert len(fringe_list) == 1 scale = False log.stdinfo("Using fringe frame in 'fringe' stream. " "Setting scale=False") fringe_list = (fringe_list[0], "stream") except (KeyError, AssertionError): fringe_list = self.caldb.get_processed_fringe(adinputs) else: fringe_list = (fringe, None) # Usual stuff to ensure that we have an iterable of the correct length # for the scale factors regardless of what the input is scale_factor = params["scale_factor"] try: factors = iter(scale_factor) except TypeError: factors = iter([scale_factor] * len(adinputs)) else: # In case a single-element list was passed if len(scale_factor) == 1: factors = iter(scale_factor * len(adinputs)) # Get a fringe AD object for every science frame for ad, fringe, origin, correct in zip(*gt.make_lists( adinputs, *fringe_list, needs_correction, force_ad=(1,))): if ad.phu.get(timestamp_key): log.warning(f"{ad.filename}: already processed by " "fringeCorrect. Continuing.") continue # Logic to deal with different exposure times where only # some inputs might require fringe correction # KL: for now, I'm not allowing the "force" to do anything when # the correction is not needed. if (do_cal == 'procmode' or do_cal == 'force') and not correct: log.stdinfo("{} does not require a fringe correction". format(ad.filename)) ad.update_filename(suffix=params["suffix"], strip=True) continue # At this point, we definitely want to do a fringe correction # so we'd better have a fringe frame! if fringe is None: if 'sq' not in self.mode and do_cal != 'force': log.warning("No changes will be made to {}, since no " "fringe frame has been specified". format(ad.filename)) continue else: log.warning(f"{ad.filename}: no fringe was specified. " "Continuing.") continue # Check the inputs have matching filters, binning, and shapes try: gt.check_inputs_match(ad, fringe) except ValueError: fringe = gt.clip_auxiliary_data(adinput=ad, aux=fringe, aux_type="cal") gt.check_inputs_match(ad, fringe) # origin_str = f" (obtained from {origin})" if origin else "" log.stdinfo(f"{ad.filename}: using the fringe frame " f"{fringe.filename}{origin_str}") matched_groups = (ad.group_id() == fringe.group_id()) if scale or (scale is None and not matched_groups): factor = next(factors) if factor is None: factor = self._calculate_fringe_scaling(ad, fringe) log.stdinfo("Scaling fringe frame by factor {:.3f} before " "subtracting from {}".format(factor, ad.filename)) # Since all elements of fringe_list might be references to the # same AD, need to make a copy before multiplying fringe_copy = deepcopy(fringe) fringe_copy.multiply(factor) ad.subtract(fringe_copy) else: if scale is None: log.stdinfo("Not scaling fringe frame with same group ID " "as {}".format(ad.filename)) ad.subtract(fringe) # Timestamp and update header and filename ad.phu.set("FRINGEIM", fringe.filename, self.keyword_comments["FRINGEIM"]) gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key) ad.update_filename(suffix=params["suffix"], strip=True) if fringe.path: add_provenance(ad, fringe.filename, md5sum(fringe.path) or "", self.myself()) return adinputs
def biasCorrect(self, adinputs=None, suffix=None, bias=None, do_cal=None): """ The biasCorrect primitive will subtract the science extension of the input bias frames from the science extension of the input science frames. The variance and data quality extension will be updated, if they exist. If no bias is provided, the calibration database(s) will be queried. Parameters ---------- suffix: str suffix to be added to output files bias: str/list of str bias(es) to subtract do_cal: str perform bias subtraction? """ log = self.log log.debug(gt.log_message("primitive", self.myself(), "starting")) timestamp_key = self.timestamp_keys[self.myself()] if do_cal == 'skip': log.warning("Bias correction has been turned off.") return adinputs if bias is None: bias_list = self.caldb.get_processed_bias(adinputs) else: bias_list = (bias, None) # Provide a bias AD object for every science frame, and an origin for ad, bias, origin in zip(*gt.make_lists(adinputs, *bias_list, force_ad=(1,))): if ad.phu.get(timestamp_key): log.warning(f"{ad.filename}: already processed by " "biasCorrect. Continuing.") continue if bias is None: if 'sq' not in self.mode and do_cal != 'force': log.warning("No changes will be made to {}, since no " "bias was specified".format(ad.filename)) continue else: log.warning(f"{ad.filename}: no bias was specified. " "Continuing.") continue try: gt.check_inputs_match(ad, bias, check_filter=False, check_units=True) except ValueError: bias = gt.clip_auxiliary_data(ad, aux=bias, aux_type='cal') # An Error will be raised if they don't match now gt.check_inputs_match(ad, bias, check_filter=False, check_units=True) origin_str = f" (obtained from {origin})" if origin else "" log.stdinfo(f"{ad.filename}: subtracting the bias " f"{bias.filename}{origin_str}") ad.subtract(bias) # Record bias used, timestamp, and update filename ad.phu.set('BIASIM', bias.filename, self.keyword_comments['BIASIM']) gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key) ad.update_filename(suffix=suffix, strip=True) if bias.path: add_provenance(ad, bias.filename, md5sum(bias.path) or "", self.myself()) timestamp = datetime.now() return adinputs