Beispiel #1
0
def test_add_duplicate_provenance(ad):
    timestamp = datetime.now().strftime(PROVENANCE_DATE_FORMAT)
    filename = "filename"
    md5 = "md5"
    primitive = "provenance_added_by"

    add_provenance(ad, filename, md5, primitive, timestamp=timestamp)
    add_provenance(ad, filename, md5, primitive, timestamp=timestamp)

    # was a dupe, so should have been skipped
    assert len(ad.PROVENANCE) == 1
Beispiel #2
0
def test_clone_provenance(ad, ad2):
    timestamp = datetime.now().strftime(PROVENANCE_DATE_FORMAT)
    filename = "filename"
    md5 = "md5"
    primitive = "provenance_added_by"

    add_provenance(ad, filename, md5, primitive, timestamp=timestamp)

    clone_provenance(ad.PROVENANCE, ad2)

    assert len(ad2.PROVENANCE) == 1
    assert tuple(ad2.PROVENANCE[0]) == (timestamp, filename, md5, primitive)
    def prepare(self, adinputs=None, **params):
        """
        Validate and standardize the datasets to ensure compatibility
        with the subsequent primitives.  The outputs, if written to
        disk will be given the suffix "_prepared".

        Currently, there are no input parameters associated with
        this primitive.

        Parameters
        ----------
        adinputs : None or list
            Input files that will be prepared. If `None`, it runs on the
            list of AstroData objects in the main stream.
        suffix: str
            Suffix to be added to output files (Default: "_prepared").
        """
        log = self.log
        log.debug(gt.log_message("primitive", "prepare", "starting"))

        filenames = [ad.filename for ad in adinputs]
        paths = [ad.path for ad in adinputs]

        timestamp_key = self.timestamp_keys["prepare"]
        sfx = params["suffix"]
        for primitive in ('validateData', 'standardizeStructure',
                          'standardizeHeaders', 'standardizeWCS'):
            # No need to call standardizeWCS if all adinputs are single-extension
            # images (tags should be the same for all adinputs)
            if ('WCS' not in primitive or 'SPECT' in adinputs[0].tags
                    or any(len(ad) > 1 for ad in adinputs)):
                passed_params = self._inherit_params(params, primitive)
                adinputs = getattr(self, primitive)(adinputs, **passed_params)

        for ad in adinputs:
            gt.mark_history(ad, self.myself(), timestamp_key)
            ad.update_filename(suffix=sfx, strip=True)
        for ad, filename, path in zip(adinputs, filenames, paths):
            if path:
                add_provenance(ad, filename, md5sum(path) or "", self.myself())
        return adinputs
Beispiel #4
0
def test_add_get_provenance(ad):
    timestamp = datetime.now().strftime(PROVENANCE_DATE_FORMAT)
    filename = "filename"
    md5 = "md5"
    primitive = "provenance_added_by"

    # if md5 is None, nothing is added
    add_provenance(ad, filename, None, primitive)
    assert not hasattr(ad, 'PROVENANCE')

    add_provenance(ad, filename, md5, primitive, timestamp=timestamp)
    assert len(ad.PROVENANCE) == 1
    assert tuple(ad.PROVENANCE[0]) == (timestamp, filename, md5, primitive)

    # entry is updated and a default timestamp is created
    add_provenance(ad, filename, md5, primitive)
    assert len(ad.PROVENANCE) == 1
    assert tuple(ad.PROVENANCE[0])[1:] == (filename, md5, primitive)

    # add new entry
    add_provenance(ad, filename, 'md6', 'other primitive')
    assert len(ad.PROVENANCE) == 2
    assert tuple(ad.PROVENANCE[0])[1:] == (filename, md5, primitive)
    assert tuple(ad.PROVENANCE[1])[1:] == (filename, 'md6', 'other primitive')
Beispiel #5
0
    def biasCorrect(self, adinputs=None, suffix=None, bias=None, do_bias=True):
        """
        The biasCorrect primitive will subtract the science extension of the
        input bias frames from the science extension of the input science
        frames. The variance and data quality extension will be updated, if
        they exist. If no bias is provided, getProcessedBias will be called
        to ensure a bias exists for every adinput.

        Parameters
        ----------
        suffix: str
            suffix to be added to output files
        bias: str/list of str
            bias(es) to subtract
        do_bias: bool
            perform bias subtraction?
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]

        if not do_bias:
            log.warning("Bias correction has been turned off.")
            return adinputs

        if bias is None:
            self.getProcessedBias(adinputs, refresh=False)
            bias_list = self._get_cal(adinputs, 'processed_bias')
        else:
            bias_list = bias

        # Provide a bias AD object for every science frame
        for ad, bias in zip(
                *gt.make_lists(adinputs, bias_list, force_ad=True)):
            if ad.phu.get(timestamp_key):
                log.warning("No changes will be made to {}, since it has "
                            "already been processed by biasCorrect".format(
                                ad.filename))
                continue

            if bias is None:
                if 'qa' in self.mode:
                    log.warning("No changes will be made to {}, since no "
                                "bias was specified".format(ad.filename))
                    continue
                else:
                    raise OSError('No processed bias listed for {}'.format(
                        ad.filename))

            try:
                gt.check_inputs_match(ad,
                                      bias,
                                      check_filter=False,
                                      check_units=True)
            except ValueError:
                bias = gt.clip_auxiliary_data(ad, aux=bias, aux_type='cal')
                # An Error will be raised if they don't match now
                gt.check_inputs_match(ad,
                                      bias,
                                      check_filter=False,
                                      check_units=True)

            log.fullinfo('Subtracting this bias from {}:\n{}'.format(
                ad.filename, bias.filename))
            ad.subtract(bias)

            # Record bias used, timestamp, and update filename
            ad.phu.set('BIASIM', bias.filename,
                       self.keyword_comments['BIASIM'])
            gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key)
            ad.update_filename(suffix=suffix, strip=True)
            if bias.path:
                add_provenance(ad, bias.filename,
                               md5sum(bias.path) or "", self.myself())

            timestamp = datetime.now()
        return adinputs
    def slitIllumCorrect(self,
                         adinputs=None,
                         slit_illum=None,
                         do_illum=True,
                         suffix="_illumCorrected"):
        """
        This primitive will divide each SCI extension of the inputs by those
        of the corresponding slit illumination image. If the inputs contain
        VAR or DQ frames, those will also be updated accordingly due to the
        division on the data.

        Parameters
        ----------
        adinputs : list of AstroData
            Data to be corrected.
        slit_illum : str or AstroData
            Slit illumination path or AstroData object.
        do_illum: bool, optional
            Perform slit illumination correction? (Default: True)
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]
        qecorr_key = self.timestamp_keys['QECorrect']

        if not do_illum:
            log.warning("Slit Illumination correction has been turned off.")
            return adinputs

        if slit_illum is None:
            raise NotImplementedError
        else:
            slit_illum_list = slit_illum

        # Provide a Slit Illum Ad object for every science frame
        ad_outputs = []
        for ad, slit_illum_ad in zip(
                *gt.make_lists(adinputs, slit_illum_list, force_ad=True)):

            if ad.phu.get(timestamp_key):
                log.warning("No changes will be made to {}, since it has "
                            "already been processed by flatCorrect".format(
                                ad.filename))
                continue

            if slit_illum_ad is None:
                if self.mode in ['sq']:
                    raise OSError(
                        "No processed slit illumination listed for {}".format(
                            ad.filename))
                else:
                    log.warning("No changes will be made to {}, since no slit "
                                "illumination has been specified".format(
                                    ad.filename))
                    continue

            gt.check_inputs_match(ad, slit_illum_ad, check_shape=False)

            if not all(
                [e1.shape == e2.shape for (e1, e2) in zip(ad, slit_illum_ad)]):
                slit_illum_ad = gt.clip_auxiliary_data(adinput=[ad],
                                                       aux=[slit_illum_ad])[0]

            log.info("Dividing the input AstroData object {} by this \n"
                     "slit illumination file:  \n{}".format(
                         ad.filename, slit_illum_ad.filename))

            ad_out = deepcopy(ad)
            ad_out.divide(slit_illum_ad)

            # Update the header and filename, copying QECORR keyword from flat
            ad_out.phu.set("SLTILLIM", slit_illum_ad.filename,
                           self.keyword_comments["SLTILLIM"])

            try:
                qecorr_value = slit_illum_ad.phu[qecorr_key]
            except KeyError:
                pass
            else:
                log.fullinfo(
                    "Copying {} keyword from slit illumination".format(
                        qecorr_key))
                ad_out.phu.set(qecorr_key, qecorr_value,
                               slit_illum_ad.phu.comments[qecorr_key])

            gt.mark_history(ad_out,
                            primname=self.myself(),
                            keyword=timestamp_key)
            ad_out.update_filename(suffix=suffix, strip=True)

            if slit_illum_ad.path:
                add_provenance(ad_out, slit_illum_ad.filename,
                               md5sum(slit_illum_ad.path) or "", self.myself())

            ad_outputs.append(ad_out)

        return ad_outputs
Beispiel #7
0
    def fringeCorrect(self, adinputs=None, **params):
        """
        Correct science frames for the effects of fringing, using a fringe
        frame. The fringe frame is obtained either from a specified parameter,
        or the "fringe" stream, or the calibration database. This is basically
        a bookkeeping wrapper for subtractFringe(), which does all the work.

        Parameters
        ----------
        suffix: str
            suffix to be added to output files
        fringe: list/str/AstroData/None
            fringe frame(s) to subtract
        do_fringe: bool/None
            apply fringe correction? (None => use pipeline default for data)
        scale: bool/None
            scale fringe frame? (None => False if fringe frame has same
            group_id() as data
        scale_factor: float/sequence/None
            factor(s) to scale fringe
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]
        fringe = params["fringe"]
        scale = params["scale"]
        do_cal = params["do_cal"]

        # Exit now if nothing needs a correction, to avoid an error when the
        # calibration search fails. If images with different exposure times
        # are used, some frames may not require a correction (but the calibration
        # search will succeed), so still need to check individual inputs later.
        needs_correction = [self._needs_fringe_correction(ad) for ad in adinputs]
        if any(needs_correction):
            if do_cal == 'skip':
                log.warning("Fringe correction has been turned off but is "
                            "recommended.")
                return adinputs
        else:
            if do_cal == 'procmode' or do_cal == 'skip':
                log.stdinfo("No input images require a fringe correction.")
                return adinputs
            else:  # do_cal == 'force':
                log.warning("Fringe correction has been forced on but may not "
                            "be required.")


        if fringe is None:
            # This logic is for QAP
            try:
                fringe_list = self.streams['fringe']
                assert len(fringe_list) == 1
                scale = False
                log.stdinfo("Using fringe frame in 'fringe' stream. "
                            "Setting scale=False")
                fringe_list = (fringe_list[0], "stream")
            except (KeyError, AssertionError):
                fringe_list = self.caldb.get_processed_fringe(adinputs)
        else:
            fringe_list = (fringe, None)

        # Usual stuff to ensure that we have an iterable of the correct length
        # for the scale factors regardless of what the input is
        scale_factor = params["scale_factor"]
        try:
            factors = iter(scale_factor)
        except TypeError:
            factors = iter([scale_factor] * len(adinputs))
        else:
            # In case a single-element list was passed
            if len(scale_factor) == 1:
                factors = iter(scale_factor * len(adinputs))

        # Get a fringe AD object for every science frame
        for ad, fringe, origin, correct in zip(*gt.make_lists(
                adinputs, *fringe_list, needs_correction, force_ad=(1,))):
            if ad.phu.get(timestamp_key):
                log.warning(f"{ad.filename}: already processed by "
                            "fringeCorrect. Continuing.")
                continue

            # Logic to deal with different exposure times where only
            # some inputs might require fringe correction
            # KL: for now, I'm not allowing the "force" to do anything when
            #     the correction is not needed.
            if (do_cal == 'procmode' or do_cal == 'force') and not correct:
                log.stdinfo("{} does not require a fringe correction".
                            format(ad.filename))
                ad.update_filename(suffix=params["suffix"], strip=True)
                continue

            # At this point, we definitely want to do a fringe correction
            # so we'd better have a fringe frame!
            if fringe is None:
                if 'sq' not in self.mode and do_cal != 'force':
                    log.warning("No changes will be made to {}, since no "
                                "fringe frame has been specified".
                                format(ad.filename))
                    continue
                else:
                    log.warning(f"{ad.filename}: no fringe was specified. "
                                "Continuing.")
                    continue

            # Check the inputs have matching filters, binning, and shapes
            try:
                gt.check_inputs_match(ad, fringe)
            except ValueError:
                fringe = gt.clip_auxiliary_data(adinput=ad, aux=fringe,
                                                aux_type="cal")
                gt.check_inputs_match(ad, fringe)

            #
            origin_str = f" (obtained from {origin})" if origin else ""
            log.stdinfo(f"{ad.filename}: using the fringe frame "
                         f"{fringe.filename}{origin_str}")
            matched_groups = (ad.group_id() == fringe.group_id())
            if scale or (scale is None and not matched_groups):
                factor = next(factors)
                if factor is None:
                    factor = self._calculate_fringe_scaling(ad, fringe)
                log.stdinfo("Scaling fringe frame by factor {:.3f} before "
                            "subtracting from {}".format(factor, ad.filename))
                # Since all elements of fringe_list might be references to the
                # same AD, need to make a copy before multiplying
                fringe_copy = deepcopy(fringe)
                fringe_copy.multiply(factor)
                ad.subtract(fringe_copy)
            else:
                if scale is None:
                    log.stdinfo("Not scaling fringe frame with same group ID "
                                "as {}".format(ad.filename))
                ad.subtract(fringe)

            # Timestamp and update header and filename
            ad.phu.set("FRINGEIM", fringe.filename, self.keyword_comments["FRINGEIM"])
            gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key)
            ad.update_filename(suffix=params["suffix"], strip=True)
            if fringe.path:
                add_provenance(ad, fringe.filename, md5sum(fringe.path) or "", self.myself())
        return adinputs
Beispiel #8
0
    def biasCorrect(self, adinputs=None, suffix=None, bias=None, do_cal=None):
        """
        The biasCorrect primitive will subtract the science extension of the
        input bias frames from the science extension of the input science
        frames. The variance and data quality extension will be updated, if
        they exist. If no bias is provided, the calibration database(s) will
        be queried.

        Parameters
        ----------
        suffix: str
            suffix to be added to output files
        bias: str/list of str
            bias(es) to subtract
        do_cal: str
            perform bias subtraction?
        """
        log = self.log
        log.debug(gt.log_message("primitive", self.myself(), "starting"))
        timestamp_key = self.timestamp_keys[self.myself()]

        if do_cal == 'skip':
            log.warning("Bias correction has been turned off.")
            return adinputs

        if bias is None:
            bias_list = self.caldb.get_processed_bias(adinputs)
        else:
            bias_list = (bias, None)

        # Provide a bias AD object for every science frame, and an origin
        for ad, bias, origin in zip(*gt.make_lists(adinputs, *bias_list,
                                    force_ad=(1,))):
            if ad.phu.get(timestamp_key):
                log.warning(f"{ad.filename}: already processed by "
                            "biasCorrect. Continuing.")
                continue

            if bias is None:
                if 'sq' not in self.mode and do_cal != 'force':
                    log.warning("No changes will be made to {}, since no "
                                "bias was specified".format(ad.filename))
                    continue
                else:
                    log.warning(f"{ad.filename}: no bias was specified. "
                                "Continuing.")
                    continue

            try:
                gt.check_inputs_match(ad, bias, check_filter=False,
                                      check_units=True)
            except ValueError:
                bias = gt.clip_auxiliary_data(ad, aux=bias, aux_type='cal')
                # An Error will be raised if they don't match now
                gt.check_inputs_match(ad, bias, check_filter=False,
                                      check_units=True)

            origin_str = f" (obtained from {origin})" if origin else ""
            log.stdinfo(f"{ad.filename}: subtracting the bias "
                         f"{bias.filename}{origin_str}")
            ad.subtract(bias)

            # Record bias used, timestamp, and update filename
            ad.phu.set('BIASIM', bias.filename, self.keyword_comments['BIASIM'])
            gt.mark_history(ad, primname=self.myself(), keyword=timestamp_key)
            ad.update_filename(suffix=suffix, strip=True)
            if bias.path:
                add_provenance(ad, bias.filename, md5sum(bias.path) or "", self.myself())

            timestamp = datetime.now()
        return adinputs