def organize_references(self, new_mode): """Find all references in the CRDS cache and relink them to the paths which are implied by `new_mode`. This is used to reroganize existing file caches into new layouts, e.g. flat --> by instrument. """ old_refpaths = rmap.list_references("*", observatory=self.observatory, full_path=True) old_mode = config.get_crds_ref_subdir_mode(self.observatory) log.info("Reorganizing", len(old_refpaths), "references from", repr(old_mode), "to", repr(new_mode)) config.set_crds_ref_subdir_mode(new_mode, observatory=self.observatory) new_mode = config.get_crds_ref_subdir_mode(self.observatory) # did it really change. for refpath in old_refpaths: with log.error_on_exception("Failed relocating:", repr(refpath)): desired_loc = config.locate_file(os.path.basename(refpath), observatory=self.observatory) if desired_loc != refpath: if os.path.exists(desired_loc): if not self.args.organize_delete_junk: log.warning("Link or directory already exists at", repr(desired_loc), "Skipping", repr(refpath)) continue utils.remove(desired_loc, observatory=self.observatory) if config.writable_cache_or_info("Skipping file relocation from", repr(refpath), "to", repr(desired_loc)): log.info("Relocating", repr(refpath), "to", repr(desired_loc)) shutil.move(refpath, desired_loc) else: if old_mode != new_mode: log.verbose_warning("Keeping existing cached file", repr(desired_loc), "already in target mode", repr(new_mode)) else: log.verbose_warning("No change in subdirectory mode", repr(old_mode), "skipping reorganization of", repr(refpath)) if new_mode == "flat" and old_mode == "instrument": log.info("Reorganizing from 'instrument' to 'flat' cache, removing instrument directories.") for instrument in self.locator.INSTRUMENTS: self.remove_dir(instrument)
def list_mappings(self): """Consult the server and print the names of all CRDS mappings associated with the given contexts. """ mappings = [ config.locate_file(filename, self.observatory) if self.args.full_path else filename for filename in self.get_context_mappings() ] _print_list(mappings)
def _check_value(self, filename, value): """Verify that the file named `value` defined somewhere in certified file `filename` actually exists in CRDS. This is useful for e.g. checking that a SYNPHOT TMC or TMT filename column value actually exists in CRDS. """ observatory = utils.file_to_observatory(self.condition(filename)) checked_path = config.locate_file(value, observatory) log.verbose("Checking file", repr(value), "for existence in CRDS cache.") if not os.path.exists(checked_path): raise ValueError("Required CRDS file " + repr(value) + " does not exist in CRDS cache.")
def locate_file(self, filename, cache=None): """Return the full path for `filename` implementing default CRDS file cache location behavior, and verifying that the resulting path is safe. If cache is defined, override CRDS_PATH and any path included in `filename`. """ if cache is not None: os.environ["CRDS_PATH"] = cache filename = os.path.basename(filename) path = config.locate_file(filename, self.observatory) config.check_path(path) # check_path returns abspath, bad for listsings. return path
def test_sync_contexts(self): self.run_script("crds.sync --contexts hst_cos.imap") for name in crds.get_cached_mapping("hst_cos.imap").mapping_names(): self.assert_crds_exists(name) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references") for name in crds.get_cached_mapping("hst_cos_deadtab.rmap").reference_names(): self.assert_crds_exists(name) with open(config.locate_file(name, "hst"), "w+") as handle: handle.write("foo") self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files", 2) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files", 2) self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files") self.run_script("crds.sync --contexts hst_cos_deadtab.rmap --fetch-references --check-files --repair-files --check-sha1sum")
def get_exptypes(instrument=None): """Using an arbitrary reference from an instrument that matches using EXP_TYPE, return the set of all EXP_TYPE values defined in the JWST schema. XXX kludged """ tpninfos = get_schema_tpninfos(config.locate_file("jwst_nirspec_area_0005.fits", "jwst")) values = set() for info in tpninfos: if info.name in ["EXP_TYPE", "META.EXPOSURE.TYPE"]: values = values | set(info.values) if instrument is None: return sorted(list(values)) else: return sorted([value for value in values if value.startswith(INSTR_PREFIX[instrument.lower()])])
def remove_files(self, files, kind): """Remove the list of `files` basenames which are converted to fully specified CRDS paths using the locator module associated with context. """ if not files: log.verbose("No " + kind + "s to remove.") files2 = set(files) for filename in files: if re.match(r"\w+\.r[0-9]h", filename): files2.add(filename[:-1] + "d") for filename in files: with log.error_on_exception("Failed purging", kind, repr(filename)): where = config.locate_file(filename, self.observatory) utils.remove(where, observatory=self.observatory)
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = config.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): if base not in self.bad_files: log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "file") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "file") return return
def get_comptab_info(self, synname): """Dump the FILENAME column of the component table implied by `synname` (e.g. "tmc" --> something_tmc.fits) and use pysynphot to interpret the embedded iraf$-style path env var into a normal filepath. This is used to locate files within appropriate sub-directories of <synphot_dir>/comp. Return the mapping from a component file basename as defined in the CRDS rmap to the absolute path in a CDBS-style synphot file repo being created. Returns { component_basename : abs_pysyn_path, ...} """ for msg in SYNPHOT_IGNORE: warnings.filterwarnings("ignore",msg) from pysynphot import locations filekind = synname + "tab" rmap = self.imap.get_rmap(filekind) references = rmap.reference_names() assert len(references) == 1, \ "More than one '%s' reference name mentioned in '%s'." % \ (synname, rmap.name) tab_name = references[0] # rmap object locate() not module function. tab_path = config.locate_file(tab_name) # CRDS abstract table object nominally from HDU 1 table = tables.tables(tab_path)[0] fileinfo = {} for syn_name in table.columns["FILENAME"]: iraf_path, basename = syn_name.split("$") name = basename.split("[")[0] # remove parameterization dollar_syn_name = syn_name.split("[")[0] # Use pysynphot to interpret iraf_path cdbs_filepath = os.path.abspath( locations.irafconvert(dollar_syn_name)) fileinfo[name] = cdbs_filepath return fileinfo
def get_refactoring_header(rmapping, refname, required_keywords): """Create a composite header which is derived from the file contents overidden by any values as they appear in the rmap. """ rmapping = rmap.asmapping(rmapping) # A fallback source of information is the reference file headers header = rmapping.get_refactor_header( config.locate_file(refname, rmapping.observatory), extra_keys=("META.OBSERVATION.DATE", "META.OBSERVATION.TIME", "DATE-OBS", "TIME-OBS") + required_keywords) # The primary source of information is the original rmap and the matching values defined there headers2 = matches.find_match_paths_as_dict(rmapping.filename, refname) # Combine the two, using the rmap values to override anything duplicated in the reffile header assert len( headers2 ) == 1, "Can't refactor file with more than one match: " + srepr(refname) header.update(headers2[0]) return header
def locate(self, name): """Return the standard CRDS cache location for file `name`.""" return config.locate_file(name, observatory=self.observatory)
def assert_crds_not_exists(self, filename, observatory="hst"): self.assertFalse(os.path.exists(config.locate_file(filename, observatory)))