def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = rmap.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) log.verbose(api.file_progress("Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10) if not os.path.exists(path): log.error("File", repr(base), "doesn't exist at", repr(path)) return size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum: log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=100) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.warning("File", repr(base), "has been explicitly rejected.") if self.args.purge_rejected: self.remove_files([path], "files") return if info["blacklisted"] != "false": log.warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.") if self.args.purge_blacklisted: self.remove_files([path], "files") return return
def list_mappings(self): """Consult the server and print the names of all CRDS mappings associated with the given contexts. """ mappings = [ rmap.locate_file(filename, self.observatory) if self.args.full_path else filename for filename in self.get_context_mappings() ] _print_list(mappings)
def organize_references(self, new_mode): """Find all references in the CRDS cache and relink them to the paths which are implied by `new_mode`. This is used to reroganize existing file caches into new layouts, e.g. flat --> by instrument. """ old_refpaths = rmap.list_references("*", observatory=self.observatory, full_path=True) old_mode = config.get_crds_ref_subdir_mode(self.observatory) log.info("Reorganizing", len(old_refpaths), "references from", repr(old_mode), "to", repr(new_mode)) config.set_crds_ref_subdir_mode(new_mode, observatory=self.observatory) new_mode = config.get_crds_ref_subdir_mode(self.observatory) # did it really change. for refpath in old_refpaths: with log.error_on_exception("Failed relocating:", repr(refpath)): desired_loc = rmap.locate_file(os.path.basename(refpath), observatory=self.observatory) if desired_loc != refpath: if os.path.exists(desired_loc): if not self.args.organize_delete_junk: log.warning("Link or directory already exists at", repr(desired_loc), "Skipping", repr(refpath)) continue utils.remove(desired_loc, observatory=self.observatory) if config.writable_cache_or_info("Skipping file relocation from", repr(refpath), "to", repr(desired_loc)): log.info("Relocating", repr(refpath), "to", repr(desired_loc)) shutil.move(refpath, desired_loc) else: if old_mode != new_mode: log.warning("Keeping existing cached file", repr(desired_loc), "already in target mode", repr(new_mode)) else: log.warning("No change in subdirectory mode", repr(old_mode), "skipping reorganization of", repr(refpath)) if new_mode == "flat" and old_mode == "instrument": log.info("Reorganizing from 'instrument' to 'flat' cache, removing instrument directories.") for instrument in self.locator.INSTRUMENTS: self.remove_dir(instrument)
def organize_references(self, new_mode): """Find all references in the CRDS cache and relink them to the paths which are implied by `new_mode`. This is used to reroganize existing file caches into new layouts, e.g. flat --> by instrument. """ old_refpaths = rmap.list_references("*", observatory=self.observatory, full_path=True) old_mode = config.get_crds_ref_subdir_mode(self.observatory) log.info("Reorganizing", len(old_refpaths), "references from", repr(old_mode), "to", repr(new_mode)) config.set_crds_ref_subdir_mode(new_mode, observatory=self.observatory) new_mode = config.get_crds_ref_subdir_mode(self.observatory) # did it really change. for refpath in old_refpaths: with log.error_on_exception("Failed relocating:", repr(refpath)): desired_loc = rmap.locate_file(os.path.basename(refpath), observatory=self.observatory) if desired_loc != refpath: if os.path.exists(desired_loc): if not self.args.organize_delete_junk: log.warning("Link or directory already exists at", repr(desired_loc), "Skipping", repr(refpath)) continue utils.remove(desired_loc, observatory=self.observatory) if config.writable_cache_or_info("Skipping file relocation from", repr(refpath), "to", repr(desired_loc)): log.info("Relocating", repr(refpath), "to", repr(desired_loc)) shutil.move(refpath, desired_loc) else: if old_mode != new_mode: log.verbose_warning("Keeping existing cached file", repr(desired_loc), "already in target mode", repr(new_mode)) else: log.verbose_warning("No change in subdirectory mode", repr(old_mode), "skipping reorganization of", repr(refpath)) if new_mode == "flat" and old_mode == "instrument": log.info("Reorganizing from 'instrument' to 'flat' cache, removing instrument directories.") for instrument in self.locator.INSTRUMENTS: self.remove_dir(instrument)
def main(self): """Perform the differencing""" # Get the path to the fits files. tableA_path = rmap.locate_file(self.args.tableA, self.observatory) tableB_path = rmap.locate_file(self.args.tableB, self.observatory) # Expand out the input field lists. fields = [] ignore_fields = [] mode_fields = [] if self.args.fields is not None: fields = self.args.fields.split(',') if self.args.ignore_fields is not None: ignore_fields = self.args.ignore_fields.split(',') if self.args.mode_fields is not None: mode_fields = self.args.mode_fields.split(',') print(RowDiff(tableA_path, tableB_path, fields=fields, ignore_fields=ignore_fields, mode_fields=mode_fields))
def remove_files(self, files, kind): """Remove the list of `files` basenames which are converted to fully specified CRDS paths using the locator module associated with context. """ if not files: log.verbose("No " + kind + "s to remove.") files2 = set(files) for filename in files: if re.match(r"\w+\.r[0-9]h", filename): files2.add(filename[:-1] + "d") for filename in files: with log.error_on_exception("Failed purging", kind, repr(filename)): where = rmap.locate_file(filename, self.observatory) utils.remove(where, observatory=self.observatory)
def get_refactoring_header(rmapping, refname, required_keywords): """Create a composite header which is derived from the file contents overidden by any values as they appear in the rmap. """ rmapping = rmap.asmapping(rmapping) # A fallback source of information is the reference file headers header = rmapping.get_refactor_header( rmap.locate_file(refname, rmapping.observatory), extra_keys=("META.OBSERVATION.DATE", "META.OBSERVATION.TIME", "DATE-OBS","TIME-OBS") + required_keywords) # The primary source of information is the original rmap and the matching values defined there headers2 = matches.find_match_paths_as_dict(rmapping.filename, refname) # Combine the two, using the rmap values to override anything duplicated in the reffile header assert len(headers2) == 1, "Can't refactor file with more than one match: " + srepr(refname) header.update(headers2[0]) return header
def reference_name_to_ld_tpn_text(filename): """Given reference `filename`, return the text of the corresponding _ld.tpn""" path = rmap.locate_file(filename, "hst") key = TYPES.reference_name_to_ld_tpn_key(path) return get_tpn_text(*key)