def sync_datasets(self): """Sync mappings and references for datasets with respect to `self.contexts`.""" if not self.contexts: log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""") sys.exit(-1) active_references = [] for context in self.contexts: if self.args.dataset_ids: if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"): with open(self.args.dataset_ids[0][1:]) as pfile: self.args.dataset_ids = pfile.read().splitlines() with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids): id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids) for dataset in self.args.dataset_files or self.args.dataset_ids: log.info("Syncing context '%s' dataset '%s'." % (context, dataset)) with log.error_on_exception("Failed to get matching parameters from", repr(dataset)): if self.args.dataset_files: headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) } else: headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if dataset.upper() in dataset_id } for assc_dataset, header in headers.items(): with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), "under context", repr(context)): bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, ignore_cache=self.args.ignore_cache) log.verbose("Best references for", repr(assc_dataset), "are", bestrefs) active_references.extend(bestrefs.values()) active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ] log.verbose("Syncing references:", repr(active_references)) return list(set(active_references))
def get_reffiles(parameter_dict, reffile_types, download=True): """Determine CRDS's best reference files to use for a particular observation, and download them if they are not already present in the ``CRDS_PATH``. The determination is made based on the information in the ``parameter_dictionary``. Parameters ---------- parameter_dict : dict Dictionary of basic metadata from the file to be processed by the returned reference files (e.g. ``INSTRUME``, ``DETECTOR``, etc) reffile_types : list List of reference file types to look up and download. These must be contained in CRDS's list of reference file types. download : bool If ``True`` (default), the identified best reference files will be downloaded. If ``False``, the dictionary of best reference files will still be returned, but the files will not be downloaded. The use of ``False`` is primarily intended to support testing on Travis. Returns ------- reffile_mapping : dict Mapping of downloaded CRDS file locations """ # IMPORTANT: Import of crds package must be done AFTER the environment # variables are set in the functions above import crds from crds import CrdsLookupError if download: try: reffile_mapping = crds.getreferences(parameter_dict, reftypes=reffile_types) except CrdsLookupError: raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict)) else: # If the files will not be downloaded, still return the same local # paths that are returned when the files are downloaded. Note that # this follows the directory structure currently assumed by CRDS. crds_path = os.environ.get('CRDS_PATH') try: reffile_mapping = crds.getrecommendations(parameter_dict, reftypes=reffile_types) except CrdsLookupError: raise ValueError("ERROR: CRDSLookupError when trying to find reference files for parameters: {}".format(parameter_dict)) for key, value in reffile_mapping.items(): # Check for NOT FOUND must be done here because the following # line will raise an exception if NOT FOUND is present if "NOT FOUND" in value: reffile_mapping[key] = "NOT FOUND" else: instrument = value.split('_')[1] reffile_mapping[key] = os.path.join(crds_path, 'references/jwst', instrument, value) return reffile_mapping
def reffile_test(path_to_input_file, pipeline_step, logfile=None, input_file=None): """ This is a new version of reffile_test which uses crds.matches instead of working with the reference file metadata directly. That way, if the rmap was updated manually on CRDS (to avoid redelivering files for a minor keyword change), this will test the actual match criteria. """ log_msgs = [] logstream, errstream = get_streams(logfile=logfile) #Convert pipeline step to a header keyword if necessary if pipeline_step.upper().startswith("R_"): step_key = pipeline_step.upper() else: if len(pipeline_step) >= 6: step_key = "R_" + pipeline_step.upper()[:6] else: step_key = "R_" + pipeline_step.upper() #Identify the context context = fits.getval(path_to_input_file, "CRDS_CTX") #Identify the reference file try: reffile_name = fits.getval(path_to_input_file, step_key) except KeyError: print("Invalid pipeline step", file=errstream) log_msgs.append("Invalid pipeline step") return None reffile_name = reffile_name.replace('crds://', '') #Is there a reference file for this step? If not, PASS if reffile_name == "N/A": print("No reference file for step {}.".format(pipeline_step), file=errstream) log_msgs.append("No reference file for step {}.".format(pipeline_step)) return "" #Grab metadata from the input and reference files if input_file is None: input_file = load_input_file(path_to_input_file, logstream=logstream) print("Grabbing CRDS match criteria...", file=logstream) log_msgs.append("Grabbing CRDS match criteria...") try: match_criteria = ref_matches(context, reffile_name)[0] except ValueError: import pdb; pdb.set_trace() tests = {} #store all the tests in a single dictionary # add instrument name in the expected keyword match_criteria['META.INSTRUMENT.NAME'] = 'NIRSPEC' # make sure that the subarray keyword is correct for the size of the data #subarray = get_subarray(path_to_input_file) subarray = fits.getval(path_to_input_file, 'SUBARRAY', 0) match_criteria['META.SUBARRAY.NAME'] = subarray #Test whether the recommended reference file was actually selected recommended_reffile = getrecommendations(match_criteria, reftypes=[pipeline_step], context=context, fast=True) if isinstance(recommended_reffile, str): recommended_reffile = os.path.basename(recommended_reffile) #remove path, only want to test filename tests['RECOMMENDATION'] = recommended_reffile == reffile_name else: msg1 = '* WARNING: Unable to find recommendation for the reference file:' msg2 = ' Match criteria determined by pipeline to find reference file: ', repr(match_criteria) msg3 = ' Recommendation dictionary = '+repr(recommended_reffile) log_msgs.append(msg1) log_msgs.append(msg2) log_msgs.append(msg3) print(msg1) print(msg2) print(msg3) #Remove irrelevant match criteria del match_criteria['observatory'] del match_criteria['instrument'] del match_criteria['filekind'] #Useafter dates require special handling if "META.OBSERVATION.DATE" not in match_criteria: tests['USEAFTER'] = True else: input_date = input_file.meta.observation.date input_time = input_file.meta.observation.time input_obstime = Time(input_date + "T" + input_time) ref_date = match_criteria.pop("META.OBSERVATION.DATE") ref_time = match_criteria.pop("META.OBSERVATION.TIME") ref_useafter = Time(ref_date + "T" + ref_time) tests["USEAFTER"] = input_obstime >= ref_useafter #Note that this does NOT check whether there is a more recent #(but still valid) reference file that could have been selected #Loop over the rest of the matching criteria for criterion, value in match_criteria.items(): tests[criterion] = check_meta(input_file, criterion, value) final = all([x or x is None for x in tests.values()]) failures = [] failmsg = "{}: reffile value {}, input value {}" #Finally, print out the results of the tests print("REFERENCE FILE SELECTION TEST", file=logstream) print(" Input file: {}".format(path_to_input_file), file=logstream) print(" Pipeline step: {}".format(pipeline_step), file=logstream) print(" Header keyword: {}".format(step_key), file=logstream) print(" Reference file selected: {}".format(reffile_name), file=logstream) print(" **Metadata tests performed:**", file=logstream) log_msgs.append("REFERENCE FILE SELECTION TEST") log_msgs.append(" Input file: {}".format(path_to_input_file)) log_msgs.append(" Pipeline step: {}".format(pipeline_step)) log_msgs.append(" Header keyword: {}".format(step_key)) log_msgs.append(" Reference file selected: {}".format(reffile_name)) log_msgs.append(" **Metadata tests performed:**") rescode = {None: "N/A", True: "PASS", False: "FAIL"} for meta in sorted(tests): result = tests[meta] print(" {}: {}".format(meta, rescode[result]), file=logstream) if rescode[result] == "FAIL": if meta == "USEAFTER": ival = input_obstime rval = ref_useafter else: ival = input_file[meta.lower()] rval = match_criteria[meta] failures.append(failmsg.format(meta, rval, ival)) print(" Input file value: {}".format(ival), file=logstream) print(" Reference file value: {}".format(rval), file=logstream) log_msgs.append(" Input file value: {}".format(ival)) log_msgs.append(" Reference file value: {}".format(rval)) print(" Final result: {}".format(rescode[final]), file=logstream) log_msgs.append(" Final result: {}".format(rescode[final])) #Close the output stream if necessary if logfile is not None: logstream.close() return "\n".join(failures), log_msgs