def get_multiple_reference_paths(input_file, reference_file_types): """Aligns JWST pipeline requirements with CRDS library top level interfaces. get_multiple_reference_paths() layers these additional tasks onto crds.getreferences(): It converts an input file into a flat dictionary of JWST data model dotted parameters for defining CRDS best references. Returns { filetype : filepath or "N/A", ... } """ from .. import datamodels gc.collect() if not reference_file_types: # [] interpreted as *all types*. return {} if isinstance(input_file, (six.string_types, datamodels.DataModel)): with datamodels.open(input_file) as dm: data_dict = dm.to_flat_dict(include_arrays=False) else: # XXX not sure what this does... seems unneeded. data_dict = _flatten_dict(input_file) gc.collect() try: if crds_cache_locking is not None: with crds_cache_locking.get_cache_lock(): bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") else: bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") except crds.CrdsBadRulesError as exc: raise crds.CrdsBadRulesError(str(exc)) except crds.CrdsBadReferenceError as exc: raise crds.CrdsBadReferenceError(str(exc)) refpaths = { filetype: filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items() } return refpaths
def get_data_level(reftype, calibration_version, context=None, extras=None, observatory="jwst", ignore_cache=False): """Based on `context` and `calibration_version` (nominally CAL_VER), return the minimum data level associated with reference type `reftype`. Nominally this means fetching and interpreting the SYSTEM DATALVL reference file so utilizing this function requires operating in an environment with an up-to-date and accessible CRDS cache. """ header = { "META.INSTRUMENT.NAME": "SYSTEM", "INSTRUME": "SYSTEM", "INSTRUMENT": "SYSTEM", "META.CALIBRATION_SOFTWARE_VERSION": calibration_version, "CAL_VER": calibration_version, "VERSION": calibration_version, } header.update(extras or {}) bestrefs = crds.getreferences(header, context=context, reftypes=["datalvl"], observatory=observatory, ignore_cache=False) reference_path = bestrefs["datalvl"] type_to_level = get_type_to_level(reference_path) return type_to_level[reftype]
def get_reffiles(parameter_dict, reffile_types, download=True): """Determine CRDS's best reference files to use for a particular observation, and download them if they are not already present in the ``CRDS_PATH``. The determination is made based on the information in the ``parameter_dictionary``. Parameters ---------- parameter_dict : dict Dictionary of basic metadata from the file to be processed by the returned reference files (e.g. ``INSTRUME``, ``DETECTOR``, etc) reffile_types : list List of reference file types to look up and download. These must be contained in CRDS's list of reference file types. download : bool If ``True`` (default), the identified best reference files will be downloaded. If ``False``, the dictionary of best reference files will still be returned, but the files will not be downloaded. The use of ``False`` is primarily intended to support testing on Travis. Returns ------- reffile_mapping : dict Mapping of downloaded CRDS file locations """ # IMPORTANT: Import of crds package must be done AFTER the environment # variables are set in the functions above import crds from crds import CrdsLookupError if download: try: reffile_mapping = crds.getreferences(parameter_dict, reftypes=reffile_types) except CrdsLookupError: raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict)) else: # If the files will not be downloaded, still return the same local # paths that are returned when the files are downloaded. Note that # this follows the directory structure currently assumed by CRDS. crds_path = os.environ.get('CRDS_PATH') try: reffile_mapping = crds.getrecommendations(parameter_dict, reftypes=reffile_types) except CrdsLookupError: raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict)) for key, value in reffile_mapping.items(): # Check for NOT FOUND must be done here because the following # line will raise an exception if NOT FOUND is present if "NOT FOUND" in value: reffile_mapping[key] = "NOT FOUND" else: instrument = value.split('_')[1] reffile_mapping[key] = os.path.join(crds_path, 'references/jwst', instrument, value) return reffile_mapping
def test_crds_failed_getreferences_reftype(): """Run crds.getreferences() with an invalid reftypes list.""" header = { '_extra_fits.PRIMARY.IRAF-TLM': '2013-12-12T15:56:30', 'meta.date': '2014-07-22T15:53:19.893683', 'meta.filename': 'crds.fits', 'meta.instrument.detector': 'NRCA1', 'meta.instrument.filter': 'F140M', 'meta.instrument.name': 'NIRCAM', 'meta.instrument.pupil': 'CLEAR', 'meta.observation.date': '2012-04-22', 'meta.origin': 'NOAO-IRAF FITS Image Kernel July 2003', 'meta.subarray.name': 'FULL', 'meta.subarray.xsize': 2048, 'meta.subarray.xstart': 1, 'meta.subarray.ysize': 2048, 'meta.subarray.ystart': 1, 'meta.telescope': 'JWST' } crds.getreferences(header, reftypes=["foo"])
def test_crds_failed_getreferences_parameter(): """Run crds.getreferences() with invalid FILTER.""" header = { '_extra_fits.PRIMARY.IRAF-TLM': '2013-12-12T15:56:30', 'meta.date': '2014-07-22T15:53:19.893683', 'meta.filename': 'crds.fits', 'meta.instrument.detector': 'yyyyNRCA1yyyy', # whack this parameter 'meta.instrument.filter': 'F140M', 'meta.instrument.name': 'NIRCAM', 'meta.instrument.pupil': 'CLEAR', 'meta.observation.date': '2012-04-22', 'meta.origin': 'NOAO-IRAF FITS Image Kernel July 2003', 'meta.subarray.name': 'FULL', 'meta.subarray.xsize': 2048, 'meta.subarray.xstart': 1, 'meta.subarray.ysize': 2048, 'meta.subarray.ystart': 1, 'meta.telescope': 'JWST' } crds.getreferences(header, reftypes=["flat"])
def test_crds_failed_getreferences_reftype(): """Run crds.getreferences() with an invalid reftypes list.""" header = { '_extra_fits.PRIMARY.IRAF-TLM': '2013-12-12T15:56:30', 'meta.date': '2014-07-22T15:53:19.893683', 'meta.filename': 'crds.fits', 'meta.instrument.detector': 'NRCA1', 'meta.instrument.filter': 'F140M', 'meta.instrument.name': 'NIRCAM', 'meta.instrument.pupil': 'CLEAR', 'meta.observation.date': '2012-04-22', 'meta.origin': 'NOAO-IRAF FITS Image Kernel July 2003', 'meta.subarray.name': 'FULL', 'meta.subarray.xsize': 2048, 'meta.subarray.xstart': 1, 'meta.subarray.ysize': 2048, 'meta.subarray.ystart': 1, 'meta.telescope': 'JWST' } with pytest.raises(crds.CrdsError): crds.getreferences(header, reftypes=["foo"])
def test_crds_failed_getreferences_parameter(): """Run crds.getreferences() with invalid FILTER.""" header = { '_extra_fits.PRIMARY.IRAF-TLM': '2013-12-12T15:56:30', 'meta.date': '2014-07-22T15:53:19.893683', 'meta.filename': 'crds.fits', 'meta.instrument.detector': 'yyyyNRCA1yyyy', # whack this parameter 'meta.instrument.filter': 'F140M', 'meta.instrument.name': 'NIRCAM', 'meta.instrument.pupil': 'CLEAR', 'meta.observation.date': '2012-04-22', 'meta.origin': 'NOAO-IRAF FITS Image Kernel July 2003', 'meta.subarray.name': 'FULL', 'meta.subarray.xsize': 2048, 'meta.subarray.xstart': 1, 'meta.subarray.ysize': 2048, 'meta.subarray.ystart': 1, 'meta.telescope': 'JWST' } with pytest.raises(crds.CrdsError): crds.getreferences(header, reftypes=["flat"])
def test_crds_failed_getreferences_bad_context(): import crds header = { '_extra_fits.PRIMARY.IRAF-TLM': '2013-12-12T15:56:30', 'meta.date': '2014-07-22T15:53:19.893683', 'meta.filename': 'crds.fits', 'meta.instrument.detector': 'NRCA1', 'meta.instrument.filter': 'F140M', 'meta.instrument.name': 'NIRCAM', 'meta.instrument.pupil': 'CLEAR', 'meta.observation.date': '2012-04-22', 'meta.origin': 'NOAO-IRAF FITS Image Kernel July 2003', 'meta.subarray.name': 'FULL', 'meta.subarray.xsize': 2048, 'meta.subarray.xstart': 1, 'meta.subarray.ysize': 2048, 'meta.subarray.ystart': 1, 'meta.telescope': 'JWST' } with pytest.raises(crds.CrdsError): crds.getreferences(header, reftypes=["flat"], context="jwst_9942.pmap")
def _get_refpaths(data_dict, reference_file_types, observatory): """Tailor the CRDS core library getreferences() call to the JWST CAL code by adding locking and truncating expected exceptions. Also simplify 'NOT FOUND n/a' to 'N/A'. Re-interpret empty reference_file_types as "no types" instead of core library default of "all types." """ if not reference_file_types: # [] interpreted as *all types*. return {} with crds_cache_locking.get_cache_lock(): bestrefs = crds.getreferences( data_dict, reftypes=reference_file_types, observatory=observatory) refpaths = {filetype: filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items()} return refpaths
def retrieve_ancil(fitsname): '''Use crds package to find/download the needed ancilliary files. This code requires that the CRDS_PATH and CRDS_SERVER_URL environment variables be set in your .bashrc file (or equivalent, e.g. .bash_profile or .zshrc) Parameters ---------- fitsname: The filename of the file currently being analyzed. Returns ------- phot_filename: str The full path to the photom calibration file. gain_filename: str The full path to the gain calibration file. Notes ----- History: - 2022-03-04 Taylor J Bell Initial code version. - 2022-03-28 Taylor J Bell Removed jwst dependency, using crds package now instead. ''' with fits.open(fitsname) as file: # Automatically get the best reference files using the information contained in the FITS header and the crds package. # The parameters below are easily obtained from model.get_crds_parameters(), but datamodels is a jwst sub-package. # Instead, I've resorted to manually populating the required lines for finding gain and photom reference files. parameters = { "meta.ref_file.crds.context_used": file[0].header["CRDS_CTX"], "meta.ref_file.crds.sw_version": file[0].header["CRDS_VER"], "meta.instrument.name": file[0].header["INSTRUME"], "meta.instrument.detector": file[0].header["DETECTOR"], "meta.observation.date": file[0].header["DATE-OBS"], "meta.observation.time": file[0].header["TIME-OBS"], "meta.exposure.type": file[0].header["EXP_TYPE"], } refiles = crds.getreferences( parameters, ["gain", "photom"], observatory=file[0].header['TELESCOP'].lower()) gain_filename = refiles["gain"] phot_filename = refiles["photom"] return phot_filename, gain_filename
def get_multiple_reference_paths(input_file, reference_file_types): """Aligns JWST pipeline requirements with CRDS library top level interfaces. get_multiple_reference_paths() layers these additional tasks onto crds.getreferences(): 1. It converts an input file into a flat dictionary of JWST data model dotted parameters. 2. It verifies than any true filepath (not N/A) returned is openable. Returns { filetype : filepath or "N/A", ... } """ from .. import datamodels gc.collect() if not reference_file_types: # [] interpreted as *all types*. return {} if six.PY2: model_types = (str, unicode, datamodels.DataModel) else: model_types = (str, datamodels.DataModel) if isinstance(input_file, model_types): with datamodels.open(input_file) as dm: data_dict = dm.to_flat_dict(include_arrays=False) else: data_dict = _flatten_dict(input_file) gc.collect() try: bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") except crds.CrdsBadRulesError as exc: raise crds.CrdsBadRulesError(str(exc)) except crds.CrdsBadReferenceError as exc: raise crds.CrdsBadReferenceError(str(exc)) refpaths = { filetype: filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items() } return refpaths
def get_multiple_reference_paths(input_file, reference_file_types): """Aligns JWST pipeline requirements with CRDS library top level interfaces. get_multiple_reference_paths() layers these additional tasks onto crds.getreferences(): 1. It converts an input file into a flat dictionary of JWST data model dotted parameters. 2. It verifies than any true filepath (not N/A) returned is openable. Returns { filetype : filepath or "N/A", ... } """ from jwst import datamodels gc.collect() if not reference_file_types: # [] interpreted as *all types*. return {} if six.PY2: model_types = (str, unicode, models.DataModel) else: model_types = (str, models.DataModel) if isinstance(input_file, model_types): with models.open(input_file) as dm: data_dict = dm.to_flat_dict(include_arrays=False) else: data_dict = _flatten_dict(input_file) gc.collect() try: bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") except crds.CrdsBadRulesError as exc: raise crds.CrdsBadRulesError(str(exc)) except crds.CrdsBadReferenceError as exc: raise crds.CrdsBadReferenceError(str(exc)) refpaths = { filetype : filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items() } return refpaths
def _get_refpaths(data_dict, reference_file_types): """Tailor the CRDS core library getreferences() call to the JWST CAL code by adding locking and truncating expected exceptions. Also simplify 'NOT FOUND n/a' to 'N/A'. Re-interpret empty reference_file_types as "no types" instead of core library default of "all types." """ if not reference_file_types: # [] interpreted as *all types*. return {} try: # catch exceptions to truncate expected tracebacks with crds_cache_locking.get_cache_lock(): bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") except crds.CrdsBadRulesError as exc: raise crds.CrdsBadRulesError(str(exc)) except crds.CrdsBadReferenceError as exc: raise crds.CrdsBadReferenceError(str(exc)) except crds.CrdsLookupError as exc: raise crds.CrdsLookupError(str(exc)) refpaths = {filetype: filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items()} return refpaths
def _get_refpaths(data_dict, reference_file_types): """Tailor the CRDS core library getreferences() call to the JWST CAL code by adding locking and truncating expected exceptions. Also simplify 'NOT FOUND n/a' to 'N/A'. Re-interpret empty reference_file_types as "no types" instead of core library default of "all types." """ if not reference_file_types: # [] interpreted as *all types*. return {} try: # catch exceptions to truncate expected tracebacks with crds_cache_locking.get_cache_lock(): bestrefs = crds.getreferences(data_dict, reftypes=reference_file_types, observatory="jwst") except crds.CrdsBadRulesError as exc: raise crds.CrdsBadRulesError(str(exc)) except crds.CrdsBadReferenceError as exc: raise crds.CrdsBadReferenceError(str(exc)) except crds.CrdsLookupError as exc: raise crds.CrdsLookupError(str(exc)) refpaths = { filetype: filepath if "N/A" not in filepath.upper() else "N/A" for (filetype, filepath) in bestrefs.items() } return refpaths
def process(self, file_list): """The main method for processing darks. See module docstrings for further details. Parameters ---------- file_list : list List of filenames (including full paths) to the dark current files. """ for filename in file_list: logging.info('\tWorking on file: {}'.format(filename)) # Get relevant header information for this file self.get_metadata(filename) # Run the file through the necessary pipeline steps pipeline_steps = self.determine_pipeline_steps() logging.info('\tRunning pipeline on {}'.format(filename)) try: processed_file = pipeline_tools.run_calwebb_detector1_steps( filename, pipeline_steps) logging.info( '\tPipeline complete. Output: {}'.format(processed_file)) set_permissions(processed_file) except: logging.info( '\tPipeline processing failed for {}'.format(filename)) continue # Find amplifier boundaries so per-amp statistics can be calculated _, amp_bounds = instrument_properties.amplifier_info( processed_file, omit_reference_pixels=True) logging.info('\tAmplifier boundaries: {}'.format(amp_bounds)) # Get the ramp data; remove first 5 groups and last group for MIRI to avoid reset/rscd effects cal_data = fits.getdata(processed_file, 'SCI', uint=False) if self.instrument == 'MIRI': cal_data = cal_data[:, 5:-1, :, :] # Make the readnoise image readnoise_outfile = os.path.join( self.data_dir, os.path.basename( processed_file.replace('.fits', '_readnoise.fits'))) readnoise = self.make_readnoise_image(cal_data) fits.writeto(readnoise_outfile, readnoise, overwrite=True) logging.info( '\tReadnoise image saved to {}'.format(readnoise_outfile)) # Calculate the full image readnoise stats clipped = sigma_clip(readnoise, sigma=3.0, maxiters=5) full_image_mean, full_image_stddev = np.nanmean( clipped), np.nanstd(clipped) full_image_n, full_image_bin_centers = self.make_histogram( readnoise) logging.info('\tReadnoise image stats: {:.5f} +/- {:.5f}'.format( full_image_mean, full_image_stddev)) # Calculate readnoise stats in each amp separately amp_stats = self.get_amp_stats(readnoise, amp_bounds) logging.info( '\tReadnoise image stats by amp: {}'.format(amp_stats)) # Get the current JWST Readnoise Reference File data parameters = self.make_crds_parameter_dict() reffile_mapping = crds.getreferences(parameters, reftypes=['readnoise']) readnoise_file = reffile_mapping['readnoise'] if 'NOT FOUND' in readnoise_file: logging.warning( '\tNo pipeline readnoise reffile match for this file - assuming all zeros.' ) pipeline_readnoise = np.zeros(readnoise.shape) else: logging.info('\tPipeline readnoise reffile is {}'.format( readnoise_file)) pipeline_readnoise = fits.getdata(readnoise_file) # Find the difference between the current readnoise image and the pipeline readnoise reffile, and record image stats. # Sometimes, the pipeline readnoise reffile needs to be cutout to match the subarray. pipeline_readnoise = pipeline_readnoise[self.substrt2 - 1:self.substrt2 + self.subsize2 - 1, self.substrt1 - 1:self.substrt1 + self.subsize1 - 1] readnoise_diff = readnoise - pipeline_readnoise clipped = sigma_clip(readnoise_diff, sigma=3.0, maxiters=5) diff_image_mean, diff_image_stddev = np.nanmean( clipped), np.nanstd(clipped) diff_image_n, diff_image_bin_centers = self.make_histogram( readnoise_diff) logging.info( '\tReadnoise difference image stats: {:.5f} +/- {:.5f}'.format( diff_image_mean, diff_image_stddev)) # Save a png of the readnoise difference image for visual inspection logging.info('\tCreating png of readnoise difference image') readnoise_diff_png = self.image_to_png( readnoise_diff, outname=os.path.basename(readnoise_outfile).replace( '.fits', '_diff')) # Construct new entry for this file for the readnoise database table. # Can't insert values with numpy.float32 datatypes into database # so need to change the datatypes of these values. readnoise_db_entry = { 'uncal_filename': filename, 'aperture': self.aperture, 'detector': self.detector, 'subarray': self.subarray, 'read_pattern': self.read_pattern, 'nints': self.nints, 'ngroups': self.ngroups, 'expstart': self.expstart, 'readnoise_filename': readnoise_outfile, 'full_image_mean': float(full_image_mean), 'full_image_stddev': float(full_image_stddev), 'full_image_n': full_image_n.astype(float), 'full_image_bin_centers': full_image_bin_centers.astype(float), 'readnoise_diff_image': readnoise_diff_png, 'diff_image_mean': float(diff_image_mean), 'diff_image_stddev': float(diff_image_stddev), 'diff_image_n': diff_image_n.astype(float), 'diff_image_bin_centers': diff_image_bin_centers.astype(float), 'entry_date': datetime.datetime.now() } for key in amp_stats.keys(): if isinstance(amp_stats[key], (int, float)): readnoise_db_entry[key] = float(amp_stats[key]) else: readnoise_db_entry[key] = amp_stats[key].astype(float) # Add this new entry to the readnoise database table self.stats_table.__table__.insert().execute(readnoise_db_entry) logging.info('\tNew entry added to readnoise database table') # Remove the raw and calibrated files to save memory space os.remove(filename) os.remove(processed_file)