def list_dataset_headers(self): """List dataset header info for self.args.dataset_headers with respect to self.args.contexts""" # Support @-files for ids specified on command line ids = self.get_words(self.args.dataset_headers) products_seen, exposures_seen = set(), set() expanded_ids = [] for context in self.contexts: with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), "for", repr(self.args.dataset_headers)): for returned_id, header in api.get_dataset_headers_unlimited(context, ids): product, exposure = returned_id.split(":") if isinstance(header, str): log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason continue if self.args.first_id_expansion_only and product in products_seen: continue products_seen.add(product) exposures_seen.add(exposure) if self.args.id_expansions_only: expanded_ids += [ returned_id + (" " + context if len(self.contexts) > 1 else "")] else: self.dump_header(context, returned_id, header) if self.args.id_expansions_only: for expanded in sorted(expanded_ids): print(expanded)
def verify_context_change(self, old_context): """Verify that the starting and post-sync contexts are different, or issue an error.""" new_context = heavy_client.load_server_info(self.observatory).operational_context if old_context == new_context: log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context)) else: log.info("Operational context updated from", repr(old_context), "to", repr(new_context))
def list_dataset_headers(self): """List dataset header info for self.args.dataset_headers with respect to self.args.contexts""" # Support @-files for ids specified on command line ids = self.get_words(self.args.dataset_headers) products_seen, exposures_seen = set(), set() expanded_ids = [] for context in self.contexts: with log.error_on_exception( "Failed fetching dataset parameters with repect to", repr(context), "for", repr(self.args.dataset_headers)): for returned_id, header in api.get_dataset_headers_unlimited( context, ids): product, exposure = returned_id.split(":") if isinstance(header, str): log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason continue if self.args.first_id_expansion_only and product in products_seen: continue products_seen.add(product) exposures_seen.add(exposure) if self.args.id_expansions_only: expanded_ids += [ returned_id + (" " + context if len(self.contexts) > 1 else "") ] else: self.dump_header(context, returned_id, header) if self.args.id_expansions_only: for expanded in sorted(expanded_ids): print(expanded)
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add",)): """Check the differences between `old_rmap` and `new_rmap` and make sure they're limited to the types listed in `expected`. expected should be "add" or "replace". Returns as_expected, True IFF all rmap modifications match `expected`. """ diffs = diff.mapping_diffs(old_rmap, new_rmap) as_expected = True for difference in diffs: actual = diff.diff_action(difference) if actual in expected: pass # white-list so it will fail when expected is bogus. else: log.error("Expected one of", srepr(expected), "but got", srepr(actual), "from change", srepr(difference)) as_expected = False with open(old_rmap) as pfile: old_count = len([line for line in pfile.readlines() if os.path.basename(old_ref) in line]) with open(new_rmap) as pfile: new_count = len([line for line in pfile.readlines() if os.path.basename(new_ref) in line]) if "replace" in expected and old_count != new_count: log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref), "with", srepr(new_ref), "in", srepr(old_rmap), old_count, "vs.", new_count) as_expected = False return as_expected
def verify_files(self, files): """Check `files` against the CRDS server database to ensure integrity and check reject status.""" basenames = [os.path.basename(file) for file in files] try: log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10) infos = api.get_file_info_map( observatory=self.observatory, files=basenames, fields=["size", "rejected", "blacklisted", "state", "sha1sum"]) except Exception as exc: log.error( "Failed getting file info. CACHE VERIFICATION FAILED. Exception: ", repr(str(exc))) return bytes_so_far = 0 total_bytes = api.get_total_bytes(infos) for nth_file, file in enumerate(files): bfile = os.path.basename(file) if infos[bfile] == "NOT FOUND": log.error("CRDS has no record of file", repr(bfile)) else: self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files)) bytes_so_far += int(infos[bfile]["size"])
def _get_cache_filelist_and_report_errors(bestrefs): """Compute the list of files to download based on the `bestrefs` dictionary, skimming off and reporting errors, and raising an exception on the last error seen. Return the list of files to download, collapsing complex return types like tuples and dictionaries into a list of simple filenames. """ wanted = [] last_error = None for filetype, refname in bestrefs.items(): if isinstance(refname, tuple): wanted.extend(list(refname)) elif isinstance(refname, dict): wanted.extend(refname.values()) elif isinstance(refname, str): if "NOT FOUND" in refname: if "n/a" in refname.lower(): log.verbose("Reference type", srepr(filetype), "NOT FOUND. Skipping reference caching/download.", verbosity=70) else: last_error = CrdsLookupError( "Error determining best reference for", srepr(filetype), " = ", str(refname)[len("NOT FOUND"):]) log.error(str(last_error)) else: log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname)) wanted.append(refname) else: last_error = CrdsLookupError( "Unhandled bestrefs return value type for", srepr(filetype)) log.error(str(last_error)) if last_error is not None: raise last_error return wanted
def _check_error(self, response, xpath_spec, error_prefix): """Extract the `xpath_spec` text from `response`, if present issue a log ERROR with `error_prefix` and the response `xpath_spec` text then raise an exception. This may result in multiple ERROR messages. Issue a log ERROR for each form error, then raise an exception if any errors found. returns None """ errors = 0 if response.ok: error_msg_parse = html.fromstring(response.text).xpath(xpath_spec) for parse in error_msg_parse: error_message = parse.text.strip().replace("\n", "") if error_message: if error_message.startswith("ERROR: "): error_message = error_message[len("ERROR: ")] errors += 1 log.error(error_prefix, error_message) else: log.error("CRDS server responded with HTTP error status", response.status_code) errors += 1 if errors: raise CrdsWebError( "A web transaction with the CRDS server had errors.")
def download_files(self, downloads, localpaths): """Serial file-by-file download.""" download_metadata = get_download_metadata() self.info_map = {} for filename in downloads: self.info_map[filename] = download_metadata.get( filename, "NOT FOUND unknown to server") if config.writable_cache_or_verbose( "Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70): bytes_so_far = 0 total_files = len(downloads) total_bytes = get_total_bytes(self.info_map) for nth_file, name in enumerate(downloads): try: if "NOT FOUND" in self.info_map[name]: raise CrdsDownloadError( "file is not known to CRDS server.") bytes, path = self.catalog_file_size( name), localpaths[name] log.info( file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files)) self.download(name, path) bytes_so_far += os.stat(path).st_size except Exception as exc: if self.raise_exceptions: raise else: log.error("Failure downloading file", repr(name), ":", str(exc)) return bytes_so_far return 0
def get_total_bytes(info_map): """Return the total byte count of file info map `info_map`.""" try: return sum([int(info_map[name]["size"]) for name in info_map if "NOT FOUND" not in info_map[name]]) except Exception as exc: log.error("Error computing total byte count: ", str(exc)) return -1
def sync_datasets(self): """Sync mappings and references for datasets with respect to `self.contexts`.""" if not self.contexts: log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""") sys.exit(-1) active_references = [] for context in self.contexts: if self.args.dataset_ids: if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"): with open(self.args.dataset_ids[0][1:]) as pfile: self.args.dataset_ids = pfile.read().splitlines() with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids): id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids) for dataset in self.args.dataset_files or self.args.dataset_ids: log.info("Syncing context '%s' dataset '%s'." % (context, dataset)) with log.error_on_exception("Failed to get matching parameters from", repr(dataset)): if self.args.dataset_files: headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) } else: headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if dataset.upper() in dataset_id } for assc_dataset, header in headers.items(): with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), "under context", repr(context)): bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, ignore_cache=self.args.ignore_cache) log.verbose("Best references for", repr(assc_dataset), "are", bestrefs) active_references.extend(bestrefs.values()) active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ] log.verbose("Syncing references:", repr(active_references)) return list(set(active_references))
def error_and_repair(self, file, *args, **keys): """Issue an error message and repair `file` if requested by command line args.""" log.error(*args, **keys) if self.args.repair_files: if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)): log.info("Repairing file", repr(file)) utils.remove(file, observatory=self.observatory) self.dump_files(self.default_context, [file])
def handle_fail(self, message): """Generic "fail" handler reports on remote process fatal error / failure and issues an error() message, then stops monitoring /exits. """ log.error(self.format_remote("Processing failed:", message.data)) self.result = message.data["result"] return _STATUS_FAILED
def verify_archive_file(self, filename): """Verify the likely presence of `filename` on the archive web server. Issue an ERROR if absent.""" url = self.archive_url(filename) response = requests.head(url) if response.status_code in [200,]: log.verbose("File", repr(filename), "is available from", repr(url)) return self.check_length(filename, response) else: log.error("File", repr(filename), "failed HTTP HEAD with code =", response.status_code, "from", repr(url)) self.missing_files.append(filename) return False
def run(self, progress_callback=None): """ Create stsynphot bandpass objects from the observation modes in the obsmodes file. Emits appropriate log messages and returns True if validations succeed, False if there's an error. """ failed = 0 with fits.open(self.obsmodes_file) as hdul: total_modes = len(hdul[-1].data) log.info( "Creating bandpass objects from {} observation modes".format( total_modes)) if self.processes > 1: with Pool(processes=self.processes) as pool: for start_index in range(0, total_modes, self.batch_size): end_index = start_index + self.batch_size results = pool.starmap( _test_stsynphot_mode, [(self.synphot_root, m) for m in hdul[-1].data["OBSMODE"][start_index:end_index]]) for i, (result, errors, warns) in enumerate(results): if not result: failed += 1 for warning in warns: log.warning(warning) for error in errors: log.error(error) if progress_callback: progress_callback(start_index + i + 1, total_modes) else: for i, obsmode in enumerate(hdul[-1].data["OBSMODE"]): result, errors, warns = _test_stsynphot_mode( self.synphot_root, obsmode) if not result: failed += 1 for warning in warns: log.warning(warning) for error in errors: log.error(error) if progress_callback: progress_callback(i + 1, total_modes) if failed > 0: log.info("{} / {} observation modes failed".format( failed, total_modes)) else: log.info("Congratulations, all observation modes succeeded!") return failed == 0
def ignore_errors(self, i, affected): """Check each context switch for errors during bestrefs run. Fail or return False on errors.""" ignore = False if affected.bestrefs_status != 0: message = log.format("CRDS server-side errors for", i, affected.computation_dir) if self.args.ignore_errant_history: ignore = True if self.args.fail_on_errant_history: self.fatal_error(message) else: log.error(message) return ignore
def _check_direction(self, hdul): """ Confirm that all rows in the graph point to higher INNODE values than themselves, which prevents cycles. """ graph = hdul[-1].data result = True if not (graph["INNODE"] < graph["OUTNODE"]).all(): log.error("TMG contains rows with INNODE >= OUTNODE.") result = False return result
def check_length(self, filename, response): """Check the content-length reported by HEAD against the CRDS database's file size.""" archive_size = int(response.headers["content-length"]) crds_size = int(self.file_info[filename]["size"]) if archive_size != crds_size: log.error("File", repr(filename), "available but length bad. crds size:", crds_size, "archive size:", archive_size) self.bad_length_files.append(filename) return False else: log.verbose("File", repr(filename), "lengths agree:", crds_size) return True
def main(self): """Process command line parameters in to a context and list of reference files. Print out the match tuples within the context which contain the reference files. """ if self.matched_files: self.dump_reference_matches() elif self.args.datasets or self.args.instrument: self.dump_dataset_headers() else: self.print_help() log.error("Specify --files to dump reference match cases or --datasets to dump dataset matching parameters.") return log.errors()
def get_datamodels(): try: from jwst import datamodels # this is fatal. except ImportError: log.error( "CRDS requires installation of the 'jwst' package to operate on JWST files.") raise global MODEL if MODEL is None: with log.error_on_exception( "Failed constructing basic JWST DataModel"): MODEL = datamodels.DataModel() return datamodels
def handle_done(self, message): """Generic "done" handler issue info() message and stops monitoring / exits.""" status = message.data["status"] result = message.data.get("result", None) if status == 0: log.info(self.format_remote("COMPLETED:", result)) elif status == 1: log.fatal_error(self.format_remote("FAILED:", result)) elif status == 2: log.error(self.format_remote("CANCELLED:", result)) else: log.info(self.format_remote("DONE:", result)) self.result = result return result
def get_level_pipeline(self, level, exp_type): """Interpret the level_pipeline_exptypes data structure relative to processing `level` and `exp_type` to determine a pipeline .cfg file. Return [ pipeline .cfg ] or [] """ pipeline_exptypes = self.loaded_cfg.level_pipeline_exptypes[level] for mapping in pipeline_exptypes: for pipeline, exptypes in mapping.items(): for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): return [pipeline] log.error("Unhandled EXP_TYPE", srepr(exp_type), "for", srepr(level)) return []
def handle_done(self, message): """Generic "done" handler issue info() message and stops monitoring / exits.""" status = message.data["status"] result = message.data.get("result", None) if status == 0: log.info(self.format_remote("COMPLETED:", result)) elif status == 1: log.error(self.format_remote("FAILED:", result)) elif status == 2: log.error(self.format_remote("CANCELLED:", result)) else: log.info(self.format_remote("DONE:", result)) self.result = result return result
def scan_for_nonsubmitted_ingests(self, ingest_info): """Check for junk in the submitter's ingest directory, left over files not in the current submission and fail if found. """ submitted_basenames = [ os.path.basename(filepath) for filepath in self.files ] msg = None for ingested in ingest_info.keys(): if ingested not in submitted_basenames: msg = log.format("Non-submitted file", log.srepr(ingested), "is already in the CRDS server's ingest directory. Delete it (--wipe-existing-files or web page Upload Files panel) or submit it.") log.error(msg) if msg is not None: raise exceptions.CrdsExtraneousFileError( "Unexpected files already delivered to CRDS server. See ERROR messages.")
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = config.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): if base not in self.bad_files: log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "file") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "file") return return
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = rmap.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "files") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "files") return return
def get_datamodels(): """Defer datamodels loads until we definitely have a roman usecase. Enables light observatory package imports which don't require all dependencies when supporting other observatories. """ try: from romancal import datamodels except ImportError: log.error( "CRDS requires installation of the 'romancal' package to operate on Roman files.") raise global MODEL if MODEL is None: with log.error_on_exception( "Failed constructing basic RomanDataModel"): MODEL = datamodels.RomanDataModel() return datamodels
def verify_files(self, files): """Check `files` against the CRDS server database to ensure integrity and check reject status.""" basenames = [os.path.basename(file) for file in files] try: log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10) infos = api.get_file_info_map(observatory=self.observatory, files=basenames, fields=["size","rejected","blacklisted","state","sha1sum"]) except Exception as exc: log.error("Failed getting file info. CACHE VERIFICATION FAILED. Exception: ", repr(str(exc))) return bytes_so_far = 0 total_bytes = api.get_total_bytes(infos) for nth_file, file in enumerate(files): bfile = os.path.basename(file) if infos[bfile] == "NOT FOUND": log.error("CRDS has no record of file", repr(bfile)) else: self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files)) bytes_so_far += int(infos[bfile]["size"])
def check_filenames(self, description, lookup, compname_to_path): """ Check that lookup filenames are correct. """ result = True log.info( "Confirming correctly formed {} filenames".format(description)) for row in lookup: lookup_filename = utils.get_lookup_filename( row["COMPNAME"], os.path.basename(compname_to_path[row["COMPNAME"]])) if lookup_filename != row["FILENAME"]: log.error( "Malformed {} filename, expected '{}', found '{}'".format( description, lookup_filename, row["FILENAME"])) result = False return result
def _check_parametrization(self, hdul): """ If the component table is parametrized, confirm that it has at least 2 parametrized columns. """ component = hdul[0].header["COMPNAME"] column_prefix = utils.get_parametrization_keyword(component) if column_prefix is not None: column_count = len([ n for n in hdul[-1].data.names if n.lower().startswith(column_prefix) ]) if column_count < 2: template = "Table is parametrized by {}, but includes only {} columns with that prefix." log.error(template.format(column_prefix, column_count)) return False return True
def _check_connectivity(self, hdul): """ Confirm that all rows in the graph can be reached by following paths starting at innode = 1. """ # This is inefficient, since the subpaths are often shared, # but graphs don't get certified regularly, and it seems worthwhile # to sacrifice a little speed to keep the code simple. def _get_visited_indexes(graph, innode=1, seen_nodes=None): if seen_nodes is None: seen_nodes = set() if innode in seen_nodes: # Cycles will result in an error in _check_direction, so # we don't need to log an error here. return set() seen_nodes = seen_nodes.union({innode}) selected = graph["INNODE"] == innode visited = set(np.where(selected)[0]) for outnode in np.unique(graph[selected]["OUTNODE"]): visited.update( _get_visited_indexes(graph, outnode, seen_nodes=seen_nodes)) return visited graph = hdul[-1].data visited_indexes = _get_visited_indexes(graph) result = True if len(visited_indexes) < len(graph): missing_indexes = set(range(len(graph))) - visited_indexes log.error("TMG contains disconnected rows at indexes:", ", ".join(str(i) for i in missing_indexes)) result = False return result
def file_transfers(self): """Top level control for the primary function of downloading files specified as: --files ... (explicit list of CRDS mappings or references) --contexts ... (many varieties of mapping specifier including --all, --range, etc.) --fetch-sqlite-db ... (Server catalog download as sqlite3 database file. Returns list of downloaded/cached files for later verification if requested. """ if self.args.files: self.sync_explicit_files() verify_file_list = self.files elif self.args.fetch_sqlite_db: self.fetch_sqlite_db() verify_file_list = [] elif self.contexts: verify_file_list = self.interpret_contexts() else: log.error("Define --all, --contexts, --last, --range, --files, or --fetch-sqlite-db to sync.") sys.exit(-1) return verify_file_list
def print_new_files(self): """Print the references or mappings which are in the second (new) context and not the firtst (old) context. """ if not config.is_mapping(self.old_file) or not config.is_mapping(self.new_file): log.error("--print-new-files really only works for mapping differences.") return -1 old = crds.get_pickled_mapping(self.old_file) # reviewed new = crds.get_pickled_mapping(self.new_file) # reviewed old_mappings = set(old.mapping_names()) new_mappings = set(new.mapping_names()) old_references = set(old.reference_names()) new_references = set(new.reference_names()) status = 0 for name in sorted(new_mappings - old_mappings): print(name) status = 1 for name in sorted(new_references - old_references): print(name) status = 1 return status
def print_new_files(self): """Print the references or mappings which are in the second (new) context and not the firtst (old) context. """ if not rmap.is_mapping(self.old_file) or not rmap.is_mapping(self.new_file): log.error("--print-new-files really only works for mapping differences.") return -1 old = crds.get_pickled_mapping(self.old_file) # reviewed new = crds.get_pickled_mapping(self.new_file) # reviewed old_mappings = set(old.mapping_names()) new_mappings = set(new.mapping_names()) old_references = set(old.reference_names()) new_references = set(new.reference_names()) status = 0 for name in sorted(new_mappings - old_mappings): print(name) status = 1 for name in sorted(new_references - old_references): print(name) status = 1 return status
def _check_error(self, response, xpath_spec, error_prefix): """Extract the `xpath_spec` text from `response`, if present issue a log ERROR with `error_prefix` and the response `xpath_spec` text then raise an exception. This may result in multiple ERROR messages. Issue a log ERROR for each form error, then raise an exception if any errors found. returns None """ error_msg_parse = html.fromstring(response.text).xpath(xpath_spec) errors = 0 for parse in error_msg_parse: error_message = parse.text.strip().replace("\n","") if error_message: if error_message.startswith("ERROR: "): error_message = error_message[len("ERROR: ")] errors += 1 log.error(error_prefix, error_message) if errors: raise CrdsWebError("A web transaction with the CRDS server had errors.")
def _check_filenames(self, hdul): """ Confirm that values in the FILENAME column are prefixed with a valid path variable, and are suffixed with the correct parametrization variable. """ result = True for i, row in enumerate(hdul[-1].data): expected_path_prefix = utils.get_path_prefix(row["COMPNAME"]) if row["FILENAME"].split("$")[0] + "$" != expected_path_prefix: log.error("Malformed FILENAME value at index", i, "(missing or invalid path prefix)") result = False param_keyword = utils.get_parametrization_keyword(row["COMPNAME"]) if param_keyword is None and row["FILENAME"].endswith("]"): log.error("Malformed FILENAME value at index", i, "(should not be parametrized)") result = False elif param_keyword is not None and not row["FILENAME"].lower( ).endswith("[{}]".format(param_keyword)): log.error("Malformed FILENAME value at index", i, "(should be parametrized)") result = False return result
def check_compname_agreement( self, description_a, compnames_a, known_missing_a, description_b, compnames_b, known_missing_b, ): """ Check and report any differences between two sets of component names. """ result = True log.info("Checking for components present in {} but missing from {}".format( description_a, description_b )) missing_from_b = (compnames_a - compnames_b) - known_missing_b if len(missing_from_b) > 0: missing_compnames = ", ".join(missing_from_b) message = "Components present in {} but missing from {}: {}".format( description_a, description_b, missing_compnames ) log.error(message) result = False log.info("Checking for components present in {} but missing from {}".format( description_b, description_a )) missing_from_a = (compnames_b - compnames_a) - known_missing_a if len(missing_from_a) > 0: missing_compnames = ", ".join(missing_from_a) message = "Components present in {} but missing from {}: {}".format( description_b, description_a, missing_compnames ) log.error(message) result = False return result
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add", )): """Check the differences between `old_rmap` and `new_rmap` and make sure they're limited to the types listed in `expected`. expected should be "add" or "replace". Returns as_expected, True IFF all rmap modifications match `expected`. """ diffs = diff.mapping_diffs(old_rmap, new_rmap) as_expected = True for difference in diffs: actual = diff.diff_action(difference) if actual in expected: pass # white-list so it will fail when expected is bogus. else: log.error("Expected one of", srepr(expected), "but got", srepr(actual), "from change", srepr(difference)) as_expected = False with open(old_rmap) as pfile: old_count = len([ line for line in pfile.readlines() if os.path.basename(old_ref) in line ]) with open(new_rmap) as pfile: new_count = len([ line for line in pfile.readlines() if os.path.basename(new_ref) in line ]) if "replace" in expected and old_count != new_count: log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref), "with", srepr(new_ref), "in", srepr(old_rmap), old_count, "vs.", new_count) as_expected = False return as_expected
def download_files(self, downloads, localpaths): """Serial file-by-file download.""" self.info_map = get_file_info_map( self.observatory, downloads, ["size", "rejected", "blacklisted", "state", "sha1sum", "instrument"]) if config.writable_cache_or_verbose("Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70): bytes_so_far = 0 total_files = len(downloads) total_bytes = get_total_bytes(self.info_map) for nth_file, name in enumerate(downloads): try: if "NOT FOUND" in self.info_map[name]: raise CrdsDownloadError("file is not known to CRDS server.") bytes, path = self.catalog_file_size(name), localpaths[name] log.info(file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files)) self.download(name, path) bytes_so_far += os.stat(path).st_size except Exception as exc: if self.raise_exceptions: raise else: log.error("Failure downloading file", repr(name), ":", str(exc)) return bytes_so_far return 0
def handle_error(self, message): """Generic "error" handler issues an error message from remote process and continues monitoring. """ log.error(self.format_remote(message.data)) return False
def handle_fail(self, message): """Generic "fail" handler reports on remote process fatal error / failure and issues an error() message, then stops monitoring /exits. """ log.error(self.format_remote("Processing failed:", message.data)) return message.data["result"]
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False): """Dev function to compare the properties returned by name decomposition to the properties determined by file contents and make sure they're the same. Also checks rmap membership. >> from crds.tests import test_config >> old_config = test_config.setup() >> check_naming_consistency("acs") >> check_naming_consistency("cos") >> check_naming_consistency("nicmos") >> check_naming_consistency("stis") >> check_naming_consistency("wfc3") >> check_naming_consistency("wfpc2") >> test_config.cleanup(old_config) """ from crds import certify for ref in rmap.list_references("*", observatory="hst", full_path=True): with log.error_on_exception("Failed processing:", repr(ref)): _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(ref) if checked_instrument is not None and instrument != checked_instrument: continue if data_file.is_geis_data(ref): if os.path.exists(data_file.get_conjugate(ref)): continue else: log.warning("No GEIS header for", repr(ref)) log.verbose("Processing:", instrument, filekind, ref) _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(ref) if instrument != instrument2: log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), "for", repr(ref)) if filekind != filekind2: log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), "for", repr(ref)) for pmap_name in reversed(sorted(rmap.list_mappings("*.pmap", observatory="hst"))): r = certify.find_governing_rmap(pmap_name, ref) if not r: continue if r.instrument != instrument: log.error("Rmap instrument", repr(r.instrument), "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind: log.error("Rmap filekind", repr(r.filekind), "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name)) if r.instrument != instrument2: log.error("Rmap instrument", repr(r.instrument), "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name)) if r.filekind != filekind2: log.error("Rmap filekind", repr(r.filekind), "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name)) if not exhaustive_mapping_check: break else: log.error("Orphan reference", repr(ref), "not found under any context.")