def sync_datasets(self): """Sync mappings and references for datasets with respect to `self.contexts`.""" if not self.contexts: log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""") sys.exit(-1) active_references = [] for context in self.contexts: if self.args.dataset_ids: if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"): with open(self.args.dataset_ids[0][1:]) as pfile: self.args.dataset_ids = pfile.read().splitlines() with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids): id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids) for dataset in self.args.dataset_files or self.args.dataset_ids: log.info("Syncing context '%s' dataset '%s'." % (context, dataset)) with log.error_on_exception("Failed to get matching parameters from", repr(dataset)): if self.args.dataset_files: headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) } else: headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if dataset.upper() in dataset_id } for assc_dataset, header in headers.items(): with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), "under context", repr(context)): bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, ignore_cache=self.args.ignore_cache) log.verbose("Best references for", repr(assc_dataset), "are", bestrefs) active_references.extend(bestrefs.values()) active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ] log.verbose("Syncing references:", repr(active_references)) return list(set(active_references))
def rmap_apply(self, func, *args, **keys): """Apply `func()` to *args and **keys, adding the pmap, imap, and rmap values associated with the elaboration of args.source_context, args.instruments, args.types. """ keywords = dict(keys) self._setup_source_context() if self.args.rmaps: for rmap_name in self.args.rmaps: with log.error_on_exception("Failed processing rmap", srepr(rmap_name)): log.info("="*20, "Refactoring rmap", srepr(rmap_name), "="*20) rmapping = rmap.load_mapping(rmap_name) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, new_filename=new_filename, source_context=self.source_context, **keywords) else: pmapping = rmap.load_mapping(self.source_context) instruments = pmapping.selections.keys() if "all" in self.args.instruments else self.args.instruments for instr in instruments: with log.augment_exception("Failed loading imap for", repr(instr), "from", repr(self.source_context)): imapping = pmapping.get_imap(instr) types = imapping.selections.keys() if "all" in self.args.types else self.args.types for filekind in types: with log.error_on_exception("Failed processing rmap for", repr(filekind)): #, "from", # repr(imapping.basename), "of", repr(self.source_context)): try: rmapping = imapping.get_rmap(filekind).copy() except crds.exceptions.IrrelevantReferenceTypeError as exc: log.info("Skipping type", srepr(filekind), "as N/A") continue log.info("="*20, "Refactoring rmap", srepr(rmapping.basename), "="*20) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, source_context=self.source_context, new_filename=new_filename, **keywords)
def __init__(self, observatory, unified_defs): self.observatory = observatory self.unified_defs = unified_defs sorted_udef_items = sorted(unified_defs.items()) with log.error_on_exception("Can't determine instruments from specs."): self.instruments = [instr.lower() for instr in sorted(self.unified_defs.keys())] with log.error_on_exception("Can't determine types from specs."): self.filekinds = sorted( set(reftype.lower() for instr, reftypes in sorted_udef_items for reftype in reftypes)) with log.error_on_exception("Can't determine extensions from specs."): self.extensions = sorted( set(params.get("file_ext", ".fits") for instr, reftypes in sorted_udef_items for reftype, params in reftypes.items())) + [".pmap", ".imap", ".rmap"] with log.error_on_exception("Can't determine type text descriptions from specs."): self.text_descr = { reftype.lower() : params["text_descr"] for instr, reftypes in sorted_udef_items for reftype, params in reftypes.items() } with log.error_on_exception("Failed determining filekind_to_suffix"): self._filekind_to_suffix = { instr : { filekind.lower() : self.unified_defs[instr][filekind]["suffix"].lower() for filekind in self.unified_defs[instr] } for instr in self.unified_defs } with log.error_on_exception("Failed determining suffix_to_filekind"): self._suffix_to_filekind = _invert_instr_dict(self._filekind_to_suffix) with log.error_on_exception("Failed determining filetype_to_suffix"): self._filetype_to_suffix = { instr : { self.unified_defs[instr][filekind]["filetype"].lower() : self.unified_defs[instr][filekind]["suffix"].lower() for filekind in self.unified_defs[instr] } for instr in self.unified_defs } with log.error_on_exception("Failed determining suffix_to_filetype"): self.suffix_to_filetype = _invert_instr_dict(self._filetype_to_suffix) with log.error_on_exception("Failed determining unique_rowkeys"): self.row_keys = { instr : { filekind.lower() : self.unified_defs[instr][filekind]["unique_rowkeys"] for filekind in self.unified_defs[instr] } for instr in self.unified_defs }
def rmap_apply(self, func, *args, **keys): """Apply `func()` to *args and **keys, adding the pmap, imap, and rmap values associated with the elaboration of args.source_context, args.instruments, args.types. """ keywords = dict(keys) self._setup_source_context() if self.args.rmaps: for rmap_name in self.args.rmaps: with log.error_on_exception("Failed processing rmap", srepr(rmap_name)): log.info("=" * 20, "Refactoring rmap", srepr(rmap_name), "=" * 20) rmapping = rmap.load_mapping(rmap_name) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, new_filename=new_filename, source_context=self.source_context, **keywords) else: pmapping = rmap.load_mapping(self.source_context) instruments = pmapping.selections.keys( ) if "all" in self.args.instruments else self.args.instruments for instr in instruments: with log.augment_exception("Failed loading imap for", repr(instr), "from", repr(self.source_context)): imapping = pmapping.get_imap(instr) types = imapping.selections.keys( ) if "all" in self.args.types else self.args.types for filekind in types: with log.error_on_exception("Failed processing rmap for", repr(filekind)): #, "from", # repr(imapping.basename), "of", repr(self.source_context)): try: rmapping = imapping.get_rmap(filekind).copy() except crds.exceptions.IrrelevantReferenceTypeError as exc: log.info("Skipping type", srepr(filekind), "as N/A") continue log.info("=" * 20, "Refactoring rmap", srepr(rmapping.basename), "=" * 20) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify( rmapping=rmapping, source_context=self.source_context, new_filename=new_filename, **keywords)
def load_raw_specs(spec_path): """Return a dictionary of TypeSpecs loaded from directory `spec_path` of form: { instrument : { filetype : header_dict, ...}, ... } """ with log.error_on_exception("Failed loading type specs from:", repr(spec_path)): specs = collections.defaultdict(dict) for spec in glob.glob(os.path.join(spec_path, "*.spec")) + glob.glob(os.path.join(spec_path, "*.rmap")): instr, reftype = os.path.splitext(os.path.basename(spec))[0].split("_") with log.error_on_exception("Failed loading", repr(spec)): specs[instr][reftype] = dict(TypeSpec.from_file(spec)) return specs return {}
def organize_references(self, new_mode): """Find all references in the CRDS cache and relink them to the paths which are implied by `new_mode`. This is used to reroganize existing file caches into new layouts, e.g. flat --> by instrument. """ old_refpaths = rmap.list_references("*", observatory=self.observatory, full_path=True) old_mode = config.get_crds_ref_subdir_mode(self.observatory) log.info("Reorganizing", len(old_refpaths), "references from", repr(old_mode), "to", repr(new_mode)) config.set_crds_ref_subdir_mode(new_mode, observatory=self.observatory) new_mode = config.get_crds_ref_subdir_mode(self.observatory) # did it really change. for refpath in old_refpaths: with log.error_on_exception("Failed relocating:", repr(refpath)): desired_loc = config.locate_file(os.path.basename(refpath), observatory=self.observatory) if desired_loc != refpath: if os.path.exists(desired_loc): if not self.args.organize_delete_junk: log.warning("Link or directory already exists at", repr(desired_loc), "Skipping", repr(refpath)) continue utils.remove(desired_loc, observatory=self.observatory) if config.writable_cache_or_info("Skipping file relocation from", repr(refpath), "to", repr(desired_loc)): log.info("Relocating", repr(refpath), "to", repr(desired_loc)) shutil.move(refpath, desired_loc) else: if old_mode != new_mode: log.verbose_warning("Keeping existing cached file", repr(desired_loc), "already in target mode", repr(new_mode)) else: log.verbose_warning("No change in subdirectory mode", repr(old_mode), "skipping reorganization of", repr(refpath)) if new_mode == "flat" and old_mode == "instrument": log.info("Reorganizing from 'instrument' to 'flat' cache, removing instrument directories.") for instrument in self.locator.INSTRUMENTS: self.remove_dir(instrument)
def organize_references(self, new_mode): """Find all references in the CRDS cache and relink them to the paths which are implied by `new_mode`. This is used to reroganize existing file caches into new layouts, e.g. flat --> by instrument. """ old_refpaths = rmap.list_references("*", observatory=self.observatory, full_path=True) old_mode = config.get_crds_ref_subdir_mode(self.observatory) log.info("Reorganizing", len(old_refpaths), "references from", repr(old_mode), "to", repr(new_mode)) config.set_crds_ref_subdir_mode(new_mode, observatory=self.observatory) new_mode = config.get_crds_ref_subdir_mode(self.observatory) # did it really change. for refpath in old_refpaths: with log.error_on_exception("Failed relocating:", repr(refpath)): desired_loc = rmap.locate_file(os.path.basename(refpath), observatory=self.observatory) if desired_loc != refpath: if os.path.exists(desired_loc): if not self.args.organize_delete_junk: log.warning("Link or directory already exists at", repr(desired_loc), "Skipping", repr(refpath)) continue utils.remove(desired_loc, observatory=self.observatory) if config.writable_cache_or_info("Skipping file relocation from", repr(refpath), "to", repr(desired_loc)): log.info("Relocating", repr(refpath), "to", repr(desired_loc)) shutil.move(refpath, desired_loc) else: if old_mode != new_mode: log.verbose_warning("Keeping existing cached file", repr(desired_loc), "already in target mode", repr(new_mode)) else: log.verbose_warning("No change in subdirectory mode", repr(old_mode), "skipping reorganization of", repr(refpath)) if new_mode == "flat" and old_mode == "instrument": log.info("Reorganizing from 'instrument' to 'flat' cache, removing instrument directories.") for instrument in self.locator.INSTRUMENTS: self.remove_dir(instrument)
def cat_files(self): """Print out the files listed after --cat or implied by a combination of explicitly specified contexts and --mappings or --references. --files is not allowed. """ # --cat files... specifying *no* files still triggers --cat logic # --contexts context-specifiers [including --all --last --range...] # context specifiers can be symbolic and will be resolved. # --cat @file is allowed catted_files = self.get_words(self.args.cat) + self.implied_files try: self._file_info = api.get_file_info_map( self.observatory, files=[ os.path.basename(filename) for filename in catted_files ]) except Exception: log.verbose_warning( "Failed retrieving CRDS server catalog information. May need to set CRDS_SERVER_URL." ) # This could be expanded to include the closure of mappings or references for name in catted_files: with log.error_on_exception("Failed dumping:", repr(name)): path = self.locate_file(name) self._cat_file(path)
def list_dataset_headers(self): """List dataset header info for self.args.dataset_headers with respect to self.args.contexts""" # Support @-files for ids specified on command line ids = self.get_words(self.args.dataset_headers) products_seen, exposures_seen = set(), set() expanded_ids = [] for context in self.contexts: with log.error_on_exception( "Failed fetching dataset parameters with repect to", repr(context), "for", repr(self.args.dataset_headers)): for returned_id, header in api.get_dataset_headers_unlimited( context, ids): product, exposure = returned_id.split(":") if isinstance(header, str): log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason continue if self.args.first_id_expansion_only and product in products_seen: continue products_seen.add(product) exposures_seen.add(exposure) if self.args.id_expansions_only: expanded_ids += [ returned_id + (" " + context if len(self.contexts) > 1 else "") ] else: self.dump_header(context, returned_id, header) if self.args.id_expansions_only: for expanded in sorted(expanded_ids): print(expanded)
def remote_context(self): """Print the name of the context in use at pipeline `self.args.remote_context` as recorded on the server after being pushed by the crds.sync tool in the pipeline. """ self.require_server_connection() with log.error_on_exception("Failed resolving remote context"): return api.get_remote_context(self.observatory, self.args.remote_context)
def list_dataset_headers(self): """List dataset header info for self.args.dataset_headers with respect to self.args.contexts""" # Support @-files for ids specified on command line ids = self.get_words(self.args.dataset_headers) products_seen, exposures_seen = set(), set() expanded_ids = [] for context in self.contexts: with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), "for", repr(self.args.dataset_headers)): for returned_id, header in api.get_dataset_headers_unlimited(context, ids): product, exposure = returned_id.split(":") if isinstance(header, str): log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason continue if self.args.first_id_expansion_only and product in products_seen: continue products_seen.add(product) exposures_seen.add(exposure) if self.args.id_expansions_only: expanded_ids += [ returned_id + (" " + context if len(self.contexts) > 1 else "")] else: self.dump_header(context, returned_id, header) if self.args.id_expansions_only: for expanded in sorted(expanded_ids): print(expanded)
def pickle_contexts(self, contexts): """Save pickled versions of `contexts` in the CRDS cache. By default this will by-pass existing pickles if they successfully load. """ for context in contexts: with log.error_on_exception("Failed pickling", repr(context)): crds.get_pickled_mapping.uncached(context, use_pickles=True, save_pickles=True) # reviewed
def push_context(self): """Push the final context recorded in the local cache to the CRDS server so it can be displayed as the operational state of a pipeline. """ info = heavy_client.load_server_info(self.observatory) with log.error_on_exception("Failed pushing cached operational context name to CRDS server"): api.push_remote_context(self.observatory, "operational", self.args.push_context, info.operational_context) log.info("Pushed cached operational context name", repr(info.operational_context), "to CRDS server")
def list_file_properties(self): """Print out the (instrument, filekind, filename) information for each of the files implied by --files, and any contexts plus --mappings and/or --references. """ for filename in self.args.file_properties + self.implied_files: with log.error_on_exception("Failed dumping file properties for", repr(filename)): filepath = self.locate_file(filename) instrument, filekind = self.get_file_properties(filepath) print(instrument, filekind, os.path.basename(filepath))
def load_all_mappings(observatory, pattern="*map"): """Return a dictionary mapping the names of all CRDS Mappings matching `pattern` onto the loaded Mapping object. """ all_mappings = rmap.list_mappings(pattern, observatory) loaded = {} for name in all_mappings: with log.error_on_exception("Failed loading", repr(name)): loaded[name] = rmap.get_cached_mapping(name) return loaded
def file_available(filename): """Return True IFF `filename` is believed to be available, nominally based on HTTP HEAD to the archive. """ with log.error_on_exception("Failed verify_archive_file() for", repr(filename)): script = CheckArchiveScript() script.init_files([filename]) available = script.verify_archive_file(filename) return available
def main(self): for file_ in self.files: with log.error_on_exception("Checksum operation FAILED"): if self.args.remove: remove_checksum(file_) elif self.args.verify: verify_checksum(file_) else: add_checksum(file_) return log.errors()
def list_dataset_ids(self): """Print out the dataset ids associated with the instruments specified as command line params.""" for instrument in self.args.dataset_ids: with log.error_on_exception("Failed reading dataset ids for", repr(instrument)): for context in self.contexts: ids = api.get_dataset_ids(context, instrument) for dataset_id in ids: if len(self.contexts) > 1: print(context, dataset_id) else: print(dataset_id)
def add_checksum(file_): """Add checksums to file_.""" log.info("Adding checksum for", repr(file_)) if config.is_reference(file_): with log.error_on_exception("Failed updating checksum for", repr(file_)): data_file.add_checksum(file_) elif rmap.is_mapping(file_): update_mapping_checksum(file_) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def get_datamodels(): try: from jwst import datamodels # this is fatal. except ImportError: log.error( "CRDS requires installation of the 'jwst' package to operate on JWST files.") raise global MODEL if MODEL is None: with log.error_on_exception( "Failed constructing basic JWST DataModel"): MODEL = datamodels.DataModel() return datamodels
def remove_files(self, files, kind): """Remove the list of `files` basenames which are converted to fully specified CRDS paths using the locator module associated with context. """ if not files: log.verbose("No " + kind + "s to remove.") files2 = set(files) for filename in files: if re.match(r"\w+\.r[0-9]h", filename): files2.add(filename[:-1] + "d") for filename in files: with log.error_on_exception("Failed purging", kind, repr(filename)): where = rmap.locate_file(filename, self.observatory) utils.remove(where, observatory=self.observatory)
def remove_files(self, files, kind): """Remove the list of `files` basenames which are converted to fully specified CRDS paths using the locator module associated with context. """ if not files: log.verbose("No " + kind + "s to remove.") files2 = set(files) for filename in files: if re.match(r"\w+\.r[0-9]h", filename): files2.add(filename[:-1] + "d") for filename in files: with log.error_on_exception("Failed purging", kind, repr(filename)): where = config.locate_file(filename, self.observatory) utils.remove(where, observatory=self.observatory)
def main(self): if self.args.best_effort: config.PASS_INVALID_VALUES.set(True) # JWST SSB cal code data model config.ALLOW_BAD_USEAFTER.set(True) # Don't fail for bad USEAFTER values config.ALLOW_SCHEMA_VIOLATIONS.set(True) # Don't fail for data model bad value errors config.ALLOW_BAD_PARKEY_VALUES.set(True) # Don't fail for values which don't pass DM + .tpn checking if self.args.rmaps: # clean up dead lines from file lists self.args.rmaps = [ self.resolve_context(mapping) for mapping in self.args.rmaps if mapping.strip() ] if self.args.references: self.args.references = [self.locate_file(reference) for reference in self.args.references] with log.error_on_exception("Refactoring operation FAILED"): if self.args.command == "insert_reference": if self.args.old_rmap: old_rmap, new_rmap = self.resolve_context(self.args.old_rmap), self.resolve_context(self.args.new_rmap) rmap_insert_references(old_rmap, new_rmap, self.args.references) else: self.insert_references() # figure it all out relative to --source-context elif self.args.command == "delete_reference": old_rmap, new_rmap = self.resolve_context(self.args.old_rmap), self.resolve_context(self.args.new_rmap) rmap_delete_references(old_rmap, new_rmap, self.args.references) elif self.args.command == "del_header": self.del_header_key() elif self.args.command == "set_header": self.set_header_key() elif self.args.command == "del_parameter": self.del_parameter() elif self.args.command == "set_parkey": self.set_parkey() elif self.args.command == "replace_text": self.replace_text() elif self.args.command == "set_substitution": self.set_substitution() elif self.args.command == "cat": self.cat() elif self.args.command == "add_useafter": self.add_useafter() elif self.args.command == "diff_rmaps": self.diff_rmaps() elif self.args.command == "certify_rmaps": self.certify_rmaps() else: raise ValueError("Unknown refactoring command: " + repr(self.args.command)) log.standard_status() return log.errors()
def __iter__(self): """Return the sources from self with EXPTIME >= self.datasets_since.""" for source in sorted(self.sources): with log.error_on_exception("Failed loading source", repr(source), "from", repr(self.__class__.__name__)): instrument = utils.header_to_instrument(self.header(source)) exptime = matches.get_exptime(self.header(source)) since = self.datasets_since(instrument) # since == None when no command line argument given. if since is None or exptime >= since: yield source else: log.verbose("Dropping source", repr(source), "with EXPTIME =", repr(exptime), "< --datasets-since =", repr(since))
def get_datamodels(): """Defer datamodels loads until we definitely have a roman usecase. Enables light observatory package imports which don't require all dependencies when supporting other observatories. """ try: from romancal import datamodels except ImportError: log.error( "CRDS requires installation of the 'romancal' package to operate on Roman files.") raise global MODEL if MODEL is None: with log.error_on_exception( "Failed constructing basic RomanDataModel"): MODEL = datamodels.RomanDataModel() return datamodels
def main(self): with log.error_on_exception("Refactoring operation FAILED"): if self.args.command == "insert": rmap_insert_references(self.old_rmap, self.new_rmap, self.ref_paths) elif self.args.command == "delete": rmap_delete_references(self.old_rmap, self.new_rmap, self.ref_paths) elif self.args.command == "set_header": field, setting = self.args.references[0], " ".join( self.args.references[1:]) set_header_value(self.old_rmap, self.new_rmap, field, setting) elif self.args.command == "del_header": field = self.args.references[0] del_header_value(self.old_rmap, self.new_rmap, field) else: raise ValueError("Unknown refactoring command: " + repr(self.args.command)) log.standard_status() return log.errors()
def get_history(self, handle): """Given and ASDF file object `handle`, return the history collected into a single string. """ history = "UNDEFINED" # or BAD FORMAT with log.error_on_exception( "Failed reading ASDF history, see ASDF docs on adding history"): histall = [] hist = handle.tree["history"] try: entries = handle.get_history_entries() except Exception: log.verbose_warning( "Using inlined CRDS ASDF history entry reading interface.") entries = hist["entries"] if "entries" in hist else hist for entry in entries: time = timestamp.format_date(entry["time"]).split(".")[0] description = entry["description"] histall.append(time + " :: " + description) if histall: history = "\n".join(histall) return history
def cross_link_cdbs_paths(self, syn_name_map): """Hard link files from the downloaded CRDS cache structure to the classic CDBS-style directory locations specified by `syn_name_map`. On OS-X and Linux this creates files which are referenced from two different paths on the file system. Deleting one path or the other leaves behind a normal file referenced from only one location. syn_name_map dict { syn_basename : pysyn_filepath, ... } returns None """ for reference in syn_name_map: with log.error_on_exception( "Failed linking", repr(reference), "to CDBS directory."): crds_filepath = os.path.abspath(self.imap.locate_file(reference)) cdbs_filepath = syn_name_map[reference] utils.ensure_dir_exists(cdbs_filepath) if not os.path.exists(cdbs_filepath): os.link(crds_filepath, cdbs_filepath) log.verbose("Linked", cdbs_filepath, "-->", crds_filepath) else: log.verbose("Skipped existing", cdbs_filepath)
def cat_files(self): """Print out the files listed after --cat or implied by a combination of explicitly specified contexts and --mappings or --references. --files is not allowed. """ # --cat files... specifying *no* files still triggers --cat logic # --contexts context-specifiers [including --all --last --range...] # context specifiers can be symbolic and will be resolved. # --cat @file is allowed catted_files = self.get_words(self.args.cat) + self.implied_files try: self._file_info = api.get_file_info_map( self.observatory, files=[os.path.basename(filename) for filename in catted_files]) except Exception: log.verbose_warning("Failed retrieving CRDS server catalog information. May need to set CRDS_SERVER_URL.") # This could be expanded to include the closure of mappings or references for name in catted_files: with log.error_on_exception("Failed dumping:", repr(name)): path = self.locate_file(name) self._cat_file(path)
def main(self): if self.args.best_effort: config.PASS_INVALID_VALUES.set( True) # JWST SSB cal code data model config.ALLOW_BAD_USEAFTER.set( True) # Don't fail for bad USEAFTER values config.ALLOW_SCHEMA_VIOLATIONS.set( True) # Don't fail for data model bad value errors config.ALLOW_BAD_PARKEY_VALUES.set( True ) # Don't fail for values which don't pass DM + .tpn checking if self.args.rmaps: # clean up dead lines from file lists self.args.rmaps = [ self.resolve_context(mapping) for mapping in self.args.rmaps if mapping.strip() ] if self.args.references: self.args.references = [ self.locate_file(reference) for reference in self.args.references ] with log.error_on_exception("Refactoring operation FAILED"): if self.args.command == "insert_reference": if self.args.old_rmap: old_rmap, new_rmap = self.resolve_context( self.args.old_rmap), self.resolve_context( self.args.new_rmap) rmap_insert_references(old_rmap, new_rmap, self.args.references) else: self.insert_references( ) # figure it all out relative to --source-context elif self.args.command == "delete_reference": old_rmap, new_rmap = self.resolve_context( self.args.old_rmap), self.resolve_context( self.args.new_rmap) rmap_delete_references(old_rmap, new_rmap, self.args.references) elif self.args.command == "del_header": self.del_header_key() elif self.args.command == "set_header": self.set_header_key() elif self.args.command == "del_parameter": self.del_parameter() elif self.args.command == "set_parkey": self.set_parkey() elif self.args.command == "replace_text": self.replace_text() elif self.args.command == "set_substitution": self.set_substitution() elif self.args.command == "cat": self.cat() elif self.args.command == "add_useafter": self.add_useafter() elif self.args.command == "diff_rmaps": self.diff_rmaps() elif self.args.command == "certify_rmaps": self.certify_rmaps() else: raise ValueError("Unknown refactoring command: " + repr(self.args.command)) log.standard_status() return log.errors()
def is_reprocessing_required(dataset, dataset_parameters, old_context, new_context, update): """This is the top level interface to crds.bestrefs running in "Affected Datasets" mode. It determines if reprocessing `dataset` with parameters `dataset_parameters` should be performed as a consequence of switching from `old_reference` to `new_reference`. old_reference is assigned to dataset by old_context, and new_reference is assigned to dataset by new_context. Parameters ---------- dataset: id of dataset being reprocessed, <assoc>:<member> or <unassoc>:<unassoc> format dataset_parameters: { parameter : value, ...} for all matching parameters and row selection parameters XXX row selection parameters not used in file selection may not be present until XXX explicitly added to the CRDS interface to the DADSOPS parameter database... XXX and possibly even to DADSOPS itself. Normally the row selections have only been XXX done with direct access to dataset .fits files. old_context: loaded pmap or name of old context, possibly for metadata or None new_context: loaded pmap or name of new context, possibly for metadata update: Update object Returns ------- True IFF reprocessing should be done as a consequence of the table change. """ log.verbose('is_reprocessing_required: Called with:\n', dataset, '\n', dataset_parameters, '\n', old_context, '\n', new_context, '\n', update, verbosity=100) # no old_context means "single context" mode, always reprocess. if old_context is None: return True # NOTE: non-tables are treated in DeepLook as filekinds which aren't (or maybe someday are) handled, # hence reprocessed for now. # Reprocess for non-file special values. Other code will decide what to do with the updates, # the point here is that table comparison isn't possible so filtering shouldn't be done. old_ref = update.old_reference.lower() new_ref = update.new_reference.lower() incomparable = ('n/a', 'undefined', 'not found') if old_ref.startswith(incomparable) or new_ref.startswith(incomparable): return True # mostly debug wrappers here, allows simple string parameters to work and resolves cache paths. old_context = rmap.asmapping(old_context, cached=True) new_context = rmap.asmapping(new_context, cached=True) old_reference = old_context.locate_file(old_ref) new_reference = new_context.locate_file(new_ref) # Log that deep examination is occuring. log.verbose('Deep Reference examination between {} and {} initiated.'.format(old_reference, new_reference), verbosity=25) with log.error_on_exception("Failed fetching comparison reference tables:", repr([old_ref, new_ref])): api.dump_files(new_context.name, [old_ref, new_ref]) # See if deep checking into the reference is possible. try: deep_look = DeepLook.from_filekind(update.instrument, update.filekind) dataset_id = dataset.split(':')[0] # **DEBUG** # ** Since we are not getting full headers, if this is a test # ** dataset, replace the headers. #log.verbose_warning('Forcing use of LBYX01010, regardless...', verbosity=25) #dataset_id = 'LBYX01010' #***DEBUG: force headers regardless of actua data if dataset_id in deep_look.stub_input: log.verbose_warning('Substituting header for dataset "{}"'.format(dataset)) dataset_parameters = deep_look.stub_input[dataset_id]['headers'] log.verbose_warning('headers = ', dataset_parameters, verbosity=25) log.verbose(deep_look.preamble, 'Dataset headers = {}'.format(dataset_parameters), verbosity=75) log.verbose(deep_look.preamble, 'Comparing references {} and {}.'.format(old_reference, new_reference), verbosity=75) deep_look.are_different(dataset_parameters, old_reference, new_reference) log.verbose(deep_look.preamble, 'Reprocessing is {}required.'.format('' if deep_look.is_different else 'not '), verbosity=25) log.verbose(deep_look.preamble, deep_look.message, verbosity=25) return deep_look.is_different except DeepLookError as error: # Could not determine difference, therefore presume so. log.verbose_warning('Deep examination error: {}'.format(error.message), verbosity=25) log.verbose_warning('Deep examination failed, presuming reprocessing.', verbosity=25) return True
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False): """Dev function to compare the properties returned by name decomposition to the properties determined by file contents and make sure they're the same. Also checks rmap membership. >> from crds.tests import test_config >> old_config = test_config.setup() >> check_naming_consistency("acs") >> check_naming_consistency("cos") >> check_naming_consistency("nicmos") >> check_naming_consistency("stis") >> check_naming_consistency("wfc3") >> check_naming_consistency("wfpc2") >> test_config.cleanup(old_config) """ from crds import certify for ref in rmap.list_references("*", observatory="hst", full_path=True): with log.error_on_exception("Failed processing:", repr(ref)): _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path( ref) if checked_instrument is not None and instrument != checked_instrument: continue if data_file.is_geis_data(ref): if os.path.exists(data_file.get_conjugate(ref)): continue else: log.warning("No GEIS header for", repr(ref)) log.verbose("Processing:", instrument, filekind, ref) _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header( ref) if instrument != instrument2: log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), "for", repr(ref)) if filekind != filekind2: log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), "for", repr(ref)) for pmap_name in reversed( sorted(rmap.list_mappings("*.pmap", observatory="hst"))): r = certify.certify.find_governing_rmap(pmap_name, ref) if not r: continue if r.instrument != instrument: log.error("Rmap instrument", repr(r.instrument), "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind: log.error("Rmap filekind", repr(r.filekind), "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name)) if r.instrument != instrument2: log.error("Rmap instrument", repr(r.instrument), "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind2: log.error("Rmap filekind", repr(r.filekind), "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name)) if not exhaustive_mapping_check: break else: log.error("Orphan reference", repr(ref), "not found under any context.")
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False): """Dev function to compare the properties returned by name decomposition to the properties determined by file contents and make sure they're the same. Also checks rmap membership. >> from crds.tests import test_config >> old_config = test_config.setup() >> check_naming_consistency("acs") >> check_naming_consistency("cos") >> check_naming_consistency("nicmos") >> check_naming_consistency("stis") >> check_naming_consistency("wfc3") >> check_naming_consistency("wfpc2") >> test_config.cleanup(old_config) """ from crds import certify for ref in rmap.list_references("*", observatory="hst", full_path=True): with log.error_on_exception("Failed processing:", repr(ref)): _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(ref) if checked_instrument is not None and instrument != checked_instrument: continue if data_file.is_geis_data(ref): if os.path.exists(data_file.get_conjugate(ref)): continue else: log.warning("No GEIS header for", repr(ref)) log.verbose("Processing:", instrument, filekind, ref) _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(ref) if instrument != instrument2: log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), "for", repr(ref)) if filekind != filekind2: log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), "for", repr(ref)) for pmap_name in reversed(sorted(rmap.list_mappings("*.pmap", observatory="hst"))): r = certify.find_governing_rmap(pmap_name, ref) if not r: continue if r.instrument != instrument: log.error("Rmap instrument", repr(r.instrument), "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind: log.error("Rmap filekind", repr(r.filekind), "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name)) if r.instrument != instrument2: log.error("Rmap instrument", repr(r.instrument), "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind2: log.error("Rmap filekind", repr(r.filekind), "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name)) if not exhaustive_mapping_check: break else: log.error("Orphan reference", repr(ref), "not found under any context.")