def test_sync_contexts(self): self.run_script("crds.sync --contexts hst_cos.imap") for name in crds.get_cached_mapping("hst_cos.imap").mapping_names(): self.assert_crds_exists(name) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references") for name in crds.get_cached_mapping("hst_cos_deadtab.rmap").reference_names(): self.assert_crds_exists(name) with open(config.locate_file(name, "hst"), "w+") as handle: handle.write("foo") self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files", 2) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files", 2) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files") self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files --check-sha1sum")
def dump_dataset_headers(self): """Print out the matching parameters for the --datasets specified on the command line. """ multi_context_headers = defaultdict(list) for context in self.contexts: if self.args.datasets: headers = api.get_dataset_headers_by_id( context, self.args.datasets) elif self.args.instrument: headers = api.get_dataset_headers_by_instrument( context, self.args.instrument) for dataset_id, header in headers.items(): multi_context_headers[dataset_id].append((context, header)) for dataset_id, context_headers in multi_context_headers.items(): for (context, header) in context_headers: if self.args.condition_values: header = utils.condition_header(header) if self.args.minimize_headers: header = crds.get_cached_mapping(context).minimize_header( header) if len(self.contexts) == 1: print(dataset_id, ":", log.format_parameter_list(header)) else: print(dataset_id, ":", context, ":", log.format_parameter_list(header))
def list_dataset_headers(self): """List dataset header info for self.args.dataset_headers with respect to self.args.context""" for context in self.contexts: with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), "for", repr(self.args.dataset_headers)): pars = api.get_dataset_headers_by_id(context, self.args.dataset_headers) pmap = crds.get_cached_mapping(context) for requested_id in self.args.dataset_headers: for returned_id in sorted(pars.keys()): if requested_id.upper() in returned_id.upper(): header = pars[returned_id] if isinstance(header, python23.string_types): log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason continue if self.args.id_expansions_only: print(returned_id, context if len(self.contexts) > 1 else "") else: if self.args.minimize_headers: header2 = pmap.minimize_header(header) else: header2 = dict(header) header2.pop("REFTYPE", None) header2["dataset_id"] = returned_id log.info("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n", log.PP(header2)) if self.args.first_id_expansion_only: break
def check_exptypes(self): """Based on EXP_TYPEs defined by CAL schema and the specified instrument contexts, print out log info on missing or unexpected coverage. """ for imap_name in self.contexts: i_loaded = crds.get_cached_mapping(imap_name) s_exp_types = self.locator.get_exptypes(i_loaded.instrument) for exp_type in s_exp_types: reftypes = self.locator.get_reftypes(exp_type) for filekind in i_loaded.selections: ufilekind = (i_loaded.instrument.upper(), filekind.upper()) rmap_name = i_loaded.selections[filekind] if rmap_name == 'N/A': if filekind in reftypes: log.verbose("Reftype rmap", repr(ufilekind), "is defined as N/A for", repr(exp_type)) else: r_loaded = i_loaded.get_rmap(filekind) r_exp_types = r_loaded.get_parkey_map().get("META.EXPOSURE.TYPE", None) if r_exp_types is None: # ??? log.verbose("Reftype", repr(ufilekind), "does not match using EXP_TYPE.") elif exp_type in r_exp_types: if filekind in reftypes: log.verbose("Reftype", repr(ufilekind), "explicitly mentions", repr(exp_type)) else: log.warning("Reftype", repr(ufilekind), "has unexpected coverage for", repr(exp_type)) elif "ANY" in r_exp_types or "N/A" in r_exp_types: log.verbose("Reftype", repr(ufilekind), "is satisfied by ANY or N/A for", repr(exp_type)) elif filekind in reftypes: log.info("Reftype", repr(ufilekind), "is missing coverage for", repr(exp_type)) else: log.verbose("Reftype", repr(ufilekind), "has no expected coverage for", repr(exp_type))
def get_updated_files(context1, context2): """Return the sorted list of files names which are in `context2` (or any intermediate context) but not in `context1`. context2 > context1. """ extension1 = os.path.splitext(context1)[1] extension2 = os.path.splitext(context2)[1] assert extension1 == extension2, "Only compare mappings of same type/extension." old_map = crds.get_cached_mapping(context1) old_files = set(old_map.mapping_names() + old_map.reference_names()) all_mappings = rmap.list_mappings("*"+extension1, old_map.observatory) updated = set() context1, context2 = os.path.basename(context1), os.path.basename(context2) for new in all_mappings: new = os.path.basename(new) if context1 < new <= context2: new_map = crds.get_cached_mapping(new) updated |= set(new_map.mapping_names() + new_map.reference_names()) return sorted(list(updated - old_files))
def gen_specs(context): """Generate spec files corresponding to all types in `context`.""" log.info("Generating specs from", repr(context)) p = crds.get_cached_mapping(context) for mapping in p.mapping_names(): if mapping.endswith(".rmap"): r = crds.get_cached_mapping(mapping) specname = r.instrument + "_" + r.filekind + ".spec" specpath = os.path.join(HERE, "specs", specname) if os.path.exists(specpath): continue spec = dict(r.header) spec["filetype"] = FILEKIND_TO_FILETYPE.get( r.filekind.upper(), r.filekind.upper()) spec["file_ext"] = os.path.splitext(r.reference_names()[0])[-1] spec["text_descr"] = TEXT_DESCR[r.filekind] spec["suffix"] = r.filekind log.write("Generating spec", repr(specpath)) reftypes.write_spec(specpath, spec)
def all_filekinds(): filekinds = set() for context in rmap.list_mappings("*.pmap", "jwst"): p = crds.get_cached_mapping(context) for i in p.selections.values(): for r in i.selections.values(): if r.filekind not in filekinds: log.info("Adding", repr(r.filekind), "from", repr(context)) filekinds.add(r.filekind) return sorted(filekinds)
def crds_download(self): """Populates the <synphot_dir>/crds file cache with all synphot files implied by self.args.context. This creates an ordinary CRDS cache within <synphot_dir> which both sources the synphot files needed now and can optimize future downloads if desired. """ resolved_context = self.resolve_context(self.args.context) self.dump_mappings([resolved_context]) self.pmap = crds.get_cached_mapping(resolved_context) self.imap = self.pmap.get_imap("synphot") self.dump_files(resolved_context, self.imap.reference_names())
def test_purge_mappings(self): self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references") self.run_script("crds.sync --organize=flat") r = crds.get_cached_mapping("hst_cos_deadtab.rmap") self.assertEqual(r.reference_names(), ["s7g1700gl_dead.fits", "s7g1700ql_dead.fits"]) self.assertEqual(rmap.list_references("*", "hst"), ["s7g1700gl_dead.fits", "s7g1700ql_dead.fits"]) self.assert_crds_exists("s7g1700gl_dead.fits") self.assert_crds_exists("s7g1700ql_dead.fits") self.run_script("crds.sync --contexts hst_acs_imphttab.rmap --fetch-references --purge-mappings --purge-references") self.assertEqual(rmap.list_references("*", "hst"), ['w3m1716tj_imp.fits', 'w3m17170j_imp.fits', 'w3m17171j_imp.fits']) self.assertEqual(rmap.list_mappings("*", "hst"), ['hst_acs_imphttab.rmap'])
def main(): p = crds.get_cached_mapping("hst.pmap") s = pickle.dumps(p) q = pickle.loads(s) p._trace_compare(q) log.divider("p == q --> " + repr(p == q)) log.divider("__getstate__ --> " + repr(p.__getstate__() == q.__getstate__())) log.divider("rmap __getstate__ --> " + repr(p.get_imap("acs").get_rmap("biasfile").__getstate__() == q.get_imap("acs").get_rmap("biasfile").__getstate__()))
def get_context_references(self): """Return the set of references which are pointed to by the references in `contexts`. """ files = set() for context in self.contexts: try: pmap = crds.get_cached_mapping(context) files |= set(pmap.reference_names()) log.verbose("Determined references from cached mapping", repr(context)) except Exception: # only ask the server if loading context fails files |= set(api.get_reference_names(context)) return sorted(files)
def get_context_mappings(self): """Return the set of mappings which are pointed to by the mappings in `self.contexts`. """ files = set() useable_contexts = [] if not self.contexts: return [] log.verbose("Getting all mappings for specified contexts.", verbosity=55) if self.args.all: files = self._list_mappings("*.*map") pmaps = self._list_mappings("*.pmap") useable_contexts = [] if pmaps and files: with log.warn_on_exception("Failed dumping mappings for", repr(self.contexts)): self.dump_files(pmaps[-1], files) for context in self.contexts: with log.warn_on_exception("Failed loading context", repr(context)): pmap = crds.get_cached_mapping(context) useable_contexts.append(context) else: for context in self.contexts: with log.warn_on_exception("Failed listing mappings for", repr(context)): try: pmap = crds.get_cached_mapping(context) files |= set(pmap.mapping_names()) except Exception: files |= set(api.get_mapping_names(context)) useable_contexts.append(context) useable_contexts = sorted(useable_contexts) if useable_contexts and files: with log.warn_on_exception("Failed dumping mappings for", repr(self.contexts)): self.dump_files(useable_contexts[-1], files) self.contexts = useable_contexts # XXXX reset self.contexts files = sorted(files) log.verbose("Got mappings from specified (usable) contexts: ", files, verbosity=55) return files
def pickle_unpickle(context, data): p = crds.get_cached_mapping(context) p.get_required_parkeys() header = data_file.get_header(data) prefs = p.get_best_references(header) q = pickle.dumps(p) r = pickle.loads(q) rrefs = r.get_best_references(header) diffs = p.difference(r, include_header_diffs=True, recurse_added_deleted=True) assert prefs == rrefs assert str(p) == str(r) return { "refs": prefs == rrefs, "strs" : str(p) == str(r), "equal": p == r, "diffs": diffs, }
def insert_references(self): """Insert files specified by --references into the appropriate rmaps identified by --source-context.""" self._setup_source_context() categorized = self.categorize_files(self.args.references) pmap = crds.get_cached_mapping(self.source_context) self.args.rmaps = [] for (instrument, filekind) in categorized: try: self.args.rmaps.append(pmap.get_imap(instrument).get_rmap(filekind).filename) except crexc.CrdsError: log.info("Existing rmap for", (instrument, filekind), "not found. Trying empty spec.") spec_file = os.path.join( os.path.dirname(self.obs_pkg.__file__), "specs", instrument + "_" + filekind + ".rmap") rmapping = rmap.asmapping(spec_file) log.info("Loaded spec file from", repr(spec_file)) self.args.rmaps.append(spec_file) self.rmap_apply(insert_rmap_references, categorized=categorized)
def dump_header(self, context, returned_id, header): """Print out dataset `header` for `id` and `context` in either .json or multi-line formats. """ pmap = crds.get_cached_mapping(context) if self.args.minimize_headers: header2 = pmap.minimize_header(header) else: header2 = dict(header) header2.pop("REFTYPE", None) header2["dataset_id"] = returned_id header2["CRDS_CTX"] = context if self.args.json: json_header = {returned_id: header} print(json.dumps(json_header)) else: print("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n", log.PP(header2))
def find_match_tuples(self, context, reffile): """Return the list of match representations for `reference` in `context`. """ ctx = crds.get_cached_mapping(context) matches = ctx.file_matches(reffile) result = [] for path in matches: prefix = self.format_prefix(path[0]) if self.is_filtered(path): continue match_tuple = tuple([self.format_match_tup(tup) for section in path[1:] for tup in section]) if self.args.tuple_format: if prefix: match_tuple = prefix + match_tuple else: match_tuple = prefix + " " + " ".join(match_tuple) result.append(match_tuple) return result
def dump_header(self, context, returned_id, header): """Print out dataset `header` for `id` and `context` in either .json or multi-line formats. """ pmap = crds.get_cached_mapping(context) if self.args.minimize_headers: header2 = pmap.minimize_header(header) else: header2 = dict(header) header2.pop("REFTYPE", None) header2["dataset_id"] = returned_id header2["CRDS_CTX"] = context if self.args.json: json_header = { returned_id : header } print(json.dumps(json_header)) else: print("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n", log.PP(header2))
def list_required_parkeys(self): """Print out the parkeys required for matching using the specified contexts.""" for name in self.contexts: mapping = crds.get_cached_mapping(name) if isinstance(mapping, rmap.PipelineContext): log.divider(name="Parkeys required for " + repr(mapping.basename), func=log.write) _print_dict("", mapping.get_required_parkeys()) elif isinstance(mapping, rmap.InstrumentContext): for name in sorted(mapping.selections): try: rmapping = mapping.get_rmap(name) except (crds.exceptions.IrrelevantReferenceTypeError, crds.exceptions.OmitReferenceTypeError): print(name +":", repr("N/A")) else: print(name + ":", rmapping.get_required_parkeys()) else: print(name + ":", mapping.get_required_parkeys())
def check_exptypes(self): """Based on EXP_TYPEs defined by CAL schema and the specified instrument contexts, print out log info on missing or unexpected coverage. """ for imap_name in self.contexts: i_loaded = crds.get_cached_mapping(imap_name) s_exp_types = self.locator.get_exptypes(i_loaded.instrument) for exp_type in s_exp_types: reftypes = self.locator.get_reftypes(exp_type) for filekind in i_loaded.selections: ufilekind = (i_loaded.instrument.upper(), filekind.upper()) rmap_name = i_loaded.selections[filekind] if rmap_name == 'N/A': if filekind in reftypes: log.verbose("Reftype rmap", repr(ufilekind), "is defined as N/A for", repr(exp_type)) else: r_loaded = i_loaded.get_rmap(filekind) r_exp_types = r_loaded.get_parkey_map().get( "META.EXPOSURE.TYPE", None) if r_exp_types is None: # ??? log.verbose("Reftype", repr(ufilekind), "does not match using EXP_TYPE.") elif exp_type in r_exp_types: if filekind in reftypes: log.verbose("Reftype", repr(ufilekind), "explicitly mentions", repr(exp_type)) else: log.warning("Reftype", repr(ufilekind), "has unexpected coverage for", repr(exp_type)) elif "ANY" in r_exp_types or "N/A" in r_exp_types: log.verbose("Reftype", repr(ufilekind), "is satisfied by ANY or N/A for", repr(exp_type)) elif filekind in reftypes: log.info("Reftype", repr(ufilekind), "is missing coverage for", repr(exp_type)) else: log.verbose("Reftype", repr(ufilekind), "has no expected coverage for", repr(exp_type))
def list_required_parkeys(self): """Print out the parkeys required for matching using the specified contexts.""" for name in self.contexts: mapping = crds.get_cached_mapping(name) if isinstance(mapping, rmap.PipelineContext): log.divider(name="Parkeys required for " + repr(mapping.basename), func=log.write) _print_dict("", mapping.get_required_parkeys()) elif isinstance(mapping, rmap.InstrumentContext): for name in sorted(mapping.selections): try: rmapping = mapping.get_rmap(name) except (crds.exceptions.IrrelevantReferenceTypeError, crds.exceptions.OmitReferenceTypeError): print(name + ":", repr("N/A")) else: print(name + ":", rmapping.get_required_parkeys()) else: print(name + ":", mapping.get_required_parkeys())
def dump_dataset_headers(self): """Print out the matching parameters for the --datasets specified on the command line. """ multi_context_headers = defaultdict(list) for context in self.contexts: if self.args.datasets: headers = api.get_dataset_headers_by_id(context, self.args.datasets) elif self.args.instrument: headers = api.get_dataset_headers_by_instrument(context, self.args.instrument) for dataset_id, header in headers.items(): multi_context_headers[dataset_id].append((context, header)) for dataset_id, context_headers in multi_context_headers.items(): for (context, header) in context_headers: if self.args.condition_values: header = utils.condition_header(header) if self.args.minimize_headers: header = crds.get_cached_mapping(context).minimize_header(header) if len(self.contexts) == 1: print(dataset_id, ":", log.format_parameter_list(header)) else: print(dataset_id, ":", context, ":", log.format_parameter_list(header))
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False): """Dev function to compare the properties returned by name decomposition to the properties determined by file contents and make sure they're the same. Also checks rmap membership. >> from crds.tests import test_config >> old_config = test_config.setup() >> check_naming_consistency("acs") >> check_naming_consistency("cos") >> check_naming_consistency("nicmos") >> check_naming_consistency("stis") >> check_naming_consistency("wfc3") >> check_naming_consistency("wfpc2") >> test_config.cleanup(old_config) """ from crds import certify for ref in rmap.list_references("*", observatory="hst", full_path=True): with log.error_on_exception("Failed processing:", repr(ref)): _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(ref) if checked_instrument is not None and instrument != checked_instrument: continue if data_file.is_geis_data(ref): if os.path.exists(data_file.get_conjugate(ref)): continue else: log.warning("No GEIS header for", repr(ref)) log.verbose("Processing:", instrument, filekind, ref) _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(ref) if instrument != instrument2: log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), "for", repr(ref)) if filekind != filekind2: log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), "for", repr(ref)) for pmap_name in reversed(sorted(rmap.list_mappings("*.pmap", observatory="hst"))): pmap = crds.get_cached_mapping(pmap_name) r = certify.find_governing_rmap(pmap_name, ref) if not r: continue if r.instrument != instrument: log.error("Rmap instrument", repr(r.instrument), "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind: log.error("Rmap filekind", repr(r.filekind), "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name)) if r.instrument != instrument2: log.error("Rmap instrument", repr(r.instrument), "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind2: log.error("Rmap filekind", repr(r.filekind), "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name)) if not exhaustive_mapping_check: break else: log.error("Orphan reference", repr(ref), "not found under any context.")
def test_crds_selectors_vs_datamodel(jail_environ, instrument): os.environ["CRDS_SERVER_URL"] = 'https://jwst-crds-pub.stsci.edu' log.info(f"CRDS_PATH: {os.environ['CRDS_PATH']}") import crds from crds.client.api import cache_references from crds.core.exceptions import IrrelevantReferenceTypeError context = crds.get_context_name('jwst') pmap = crds.get_cached_mapping(context) imap = pmap.get_imap(instrument) log.info(f"Beginning tests for {instrument}") # get the reftypes reftypes = imap.get_filekinds() # remove pars- files _ = [ reftypes.remove(name) for name in reftypes[::-1] if name.startswith('pars-') ] # iterate over reftypes for this instrument for reftype in reftypes: try: r = imap.get_rmap(reftype) parkeys = [ p for p in list(flatten(list(r.parkey))) if p not in ignored_parkeys ] log.debug(f"Parkeys for {reftype}: {parkeys}") for f in r.reference_names(): # Ensure filetype is kind to be loaded into datamodel if 'fits' in f or 'asdf' in f: # Find datamodel appropriate for this reference file # If reftype has multiple datamodels possible, do some guesswork if reftype in ref_to_multiples_dict.keys(): model_map = ref_to_multiples_dict[reftype] with warnings.catch_warnings(): warnings.simplefilter('ignore', NoTypeWarning) refs = cache_references(context, {reftype: f}) with dm.open(refs[reftype]) as model: try: ref_exptype = model.meta.exposure.type except AttributeError: ref_exptype = None ref_instrument = model.meta.instrument.name if ref_exptype in model_map.keys(): ref_model = model_map[ref_exptype] elif ref_instrument in model_map.keys(): ref_model = model_map[ref_instrument] else: ref_model = model_map['other'] # Simple one to one translation of reftype to datamodel else: ref_model = ref_to_datamodel_dict[reftype] log.debug( f"Loading {reftype} reference for {instrument} as {ref_model}" ) if ref_model is None: log.warning( f"No datamodel found for {reftype}: skipping...") break # No need to actually load the reference file into the datamodel! with ref_model() as m: for key in parkeys: assert len(m.search_schema(key.lower())) > 0 break except IrrelevantReferenceTypeError as e: log.debug(e) pass
def list_required_parkeys(self): """Print out the parkeys required for matching using the specified contexts.""" for name in self.contexts: mapping = crds.get_cached_mapping(name) log.divider(name="Parkeys required for " + repr(mapping.basename), func=log.write) _print_dict("", mapping.get_required_parkeys())
def test_sync_explicit_files(self): self.assert_crds_not_exists("hst_cos_deadtab.rmap") self.run_script("crds.sync --files hst_cos_deadtab.rmap --check-files --repair-files --check-sha1sum") crds.get_cached_mapping("hst_cos_deadtab.rmap")
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False): """Dev function to compare the properties returned by name decomposition to the properties determined by file contents and make sure they're the same. Also checks rmap membership. >> from crds.tests import test_config >> old_config = test_config.setup() >> check_naming_consistency("acs") >> check_naming_consistency("cos") >> check_naming_consistency("nicmos") >> check_naming_consistency("stis") >> check_naming_consistency("wfc3") >> check_naming_consistency("wfpc2") >> test_config.cleanup(old_config) """ from crds import certify for ref in rmap.list_references("*", observatory="hst", full_path=True): with log.error_on_exception("Failed processing:", repr(ref)): _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path( ref) if checked_instrument is not None and instrument != checked_instrument: continue if data_file.is_geis_data(ref): if os.path.exists(data_file.get_conjugate(ref)): continue else: log.warning("No GEIS header for", repr(ref)) log.verbose("Processing:", instrument, filekind, ref) _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header( ref) if instrument != instrument2: log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), "for", repr(ref)) if filekind != filekind2: log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), "for", repr(ref)) for pmap_name in reversed( sorted(rmap.list_mappings("*.pmap", observatory="hst"))): pmap = crds.get_cached_mapping(pmap_name) r = certify.find_governing_rmap(pmap_name, ref) if not r: continue if r.instrument != instrument: log.error("Rmap instrument", repr(r.instrument), "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind: log.error("Rmap filekind", repr(r.filekind), "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name)) if r.instrument != instrument2: log.error("Rmap instrument", repr(r.instrument), "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind2: log.error("Rmap filekind", repr(r.filekind), "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name)) if not exhaustive_mapping_check: break else: log.error("Orphan reference", repr(ref), "not found under any context.")