Ejemplo n.º 1
0
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.contexts"""
     # Support @-files for ids specified on command line
     ids = self.get_words(self.args.dataset_headers)
     products_seen, exposures_seen = set(), set()
     expanded_ids = []
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.dataset_headers)):
             for returned_id, header in api.get_dataset_headers_unlimited(context, ids):
                 product, exposure = returned_id.split(":")
                 if isinstance(header, str):
                     log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason
                     continue
                 if self.args.first_id_expansion_only and product in products_seen:
                     continue
                 products_seen.add(product)
                 exposures_seen.add(exposure)
                 if self.args.id_expansions_only:
                     expanded_ids += [ returned_id + (" " + context if len(self.contexts) > 1 else "")]
                 else:
                     self.dump_header(context, returned_id, header)
     if self.args.id_expansions_only:
         for expanded in sorted(expanded_ids):
             print(expanded)
Ejemplo n.º 2
0
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
Ejemplo n.º 3
0
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.contexts"""
     # Support @-files for ids specified on command line
     ids = self.get_words(self.args.dataset_headers)
     products_seen, exposures_seen = set(), set()
     expanded_ids = []
     for context in self.contexts:
         with log.error_on_exception(
                 "Failed fetching dataset parameters with repect to",
                 repr(context), "for", repr(self.args.dataset_headers)):
             for returned_id, header in api.get_dataset_headers_unlimited(
                     context, ids):
                 product, exposure = returned_id.split(":")
                 if isinstance(header, str):
                     log.error("No header for", repr(returned_id), ":",
                               repr(header))  # header is reason
                     continue
                 if self.args.first_id_expansion_only and product in products_seen:
                     continue
                 products_seen.add(product)
                 exposures_seen.add(exposure)
                 if self.args.id_expansions_only:
                     expanded_ids += [
                         returned_id +
                         (" " + context if len(self.contexts) > 1 else "")
                     ]
                 else:
                     self.dump_header(context, returned_id, header)
     if self.args.id_expansions_only:
         for expanded in sorted(expanded_ids):
             print(expanded)
Ejemplo n.º 4
0
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
Ejemplo n.º 5
0
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add",)):
    """Check the differences between `old_rmap` and `new_rmap` and make sure they're
    limited to the types listed in `expected`.
    
    expected should be "add" or "replace".
    
    Returns as_expected,  True IFF all rmap modifications match `expected`.
    """
    diffs = diff.mapping_diffs(old_rmap, new_rmap)
    as_expected = True
    for difference in diffs:
        actual = diff.diff_action(difference)
        if actual in expected:
            pass   # white-list so it will fail when expected is bogus.
        else:
            log.error("Expected one of", srepr(expected), "but got", srepr(actual),
                      "from change", srepr(difference))
            as_expected = False
    with open(old_rmap) as pfile:
        old_count = len([line for line in pfile.readlines() if os.path.basename(old_ref) in line])
    with open(new_rmap) as pfile:
        new_count = len([line for line in pfile.readlines() if os.path.basename(new_ref) in line])
    if "replace" in expected and old_count != new_count:
        log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref), "with", 
                  srepr(new_ref), "in", srepr(old_rmap),
                  old_count, "vs.", new_count)
        as_expected = False
    return as_expected
Ejemplo n.º 6
0
 def verify_files(self, files):
     """Check `files` against the CRDS server database to ensure integrity and check reject status."""
     basenames = [os.path.basename(file) for file in files]
     try:
         log.verbose("Downloading verification info for",
                     len(basenames),
                     "files.",
                     verbosity=10)
         infos = api.get_file_info_map(
             observatory=self.observatory,
             files=basenames,
             fields=["size", "rejected", "blacklisted", "state", "sha1sum"])
     except Exception as exc:
         log.error(
             "Failed getting file info.  CACHE VERIFICATION FAILED.  Exception: ",
             repr(str(exc)))
         return
     bytes_so_far = 0
     total_bytes = api.get_total_bytes(infos)
     for nth_file, file in enumerate(files):
         bfile = os.path.basename(file)
         if infos[bfile] == "NOT FOUND":
             log.error("CRDS has no record of file", repr(bfile))
         else:
             self.verify_file(file, infos[bfile], bytes_so_far, total_bytes,
                              nth_file, len(files))
             bytes_so_far += int(infos[bfile]["size"])
Ejemplo n.º 7
0
def _get_cache_filelist_and_report_errors(bestrefs):
    """Compute the list of files to download based on the `bestrefs` dictionary,
    skimming off and reporting errors, and raising an exception on the last error seen.

    Return the list of files to download,  collapsing complex return types like tuples
    and dictionaries into a list of simple filenames.
    """
    wanted = []
    last_error = None
    for filetype, refname in bestrefs.items():
        if isinstance(refname, tuple):
            wanted.extend(list(refname))
        elif isinstance(refname, dict):
            wanted.extend(refname.values())
        elif isinstance(refname, str):
            if "NOT FOUND" in refname:
                if "n/a" in refname.lower():
                    log.verbose("Reference type", srepr(filetype),
                                "NOT FOUND.  Skipping reference caching/download.", verbosity=70)
                else:
                    last_error = CrdsLookupError(
                        "Error determining best reference for",
                        srepr(filetype), " = ", str(refname)[len("NOT FOUND"):])
                    log.error(str(last_error))
            else:
                log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname))
                wanted.append(refname)
        else:
            last_error = CrdsLookupError(
                "Unhandled bestrefs return value type for", srepr(filetype))
            log.error(str(last_error))
    if last_error is not None:
        raise last_error
    return wanted
Ejemplo n.º 8
0
    def _check_error(self, response, xpath_spec, error_prefix):
        """Extract the `xpath_spec` text from `response`,  if present issue a
        log ERROR with  `error_prefix` and the response `xpath_spec` text 
        then raise an exception.  This may result in multiple ERROR messages.
        
        Issue a log ERROR for each form error,  then raise an exception 
        if any errors found.

        returns None
        """
        errors = 0
        if response.ok:
            error_msg_parse = html.fromstring(response.text).xpath(xpath_spec)
            for parse in error_msg_parse:
                error_message = parse.text.strip().replace("\n", "")
                if error_message:
                    if error_message.startswith("ERROR: "):
                        error_message = error_message[len("ERROR: ")]
                    errors += 1
                    log.error(error_prefix, error_message)
        else:
            log.error("CRDS server responded with HTTP error status",
                      response.status_code)
            errors += 1

        if errors:
            raise CrdsWebError(
                "A web transaction with the CRDS server had errors.")
Ejemplo n.º 9
0
 def download_files(self, downloads, localpaths):
     """Serial file-by-file download."""
     download_metadata = get_download_metadata()
     self.info_map = {}
     for filename in downloads:
         self.info_map[filename] = download_metadata.get(
             filename, "NOT FOUND unknown to server")
     if config.writable_cache_or_verbose(
             "Readonly cache, skipping download of (first 5):",
             repr(downloads[:5]),
             verbosity=70):
         bytes_so_far = 0
         total_files = len(downloads)
         total_bytes = get_total_bytes(self.info_map)
         for nth_file, name in enumerate(downloads):
             try:
                 if "NOT FOUND" in self.info_map[name]:
                     raise CrdsDownloadError(
                         "file is not known to CRDS server.")
                 bytes, path = self.catalog_file_size(
                     name), localpaths[name]
                 log.info(
                     file_progress("Fetching", name, path, bytes,
                                   bytes_so_far, total_bytes, nth_file,
                                   total_files))
                 self.download(name, path)
                 bytes_so_far += os.stat(path).st_size
             except Exception as exc:
                 if self.raise_exceptions:
                     raise
                 else:
                     log.error("Failure downloading file", repr(name), ":",
                               str(exc))
         return bytes_so_far
     return 0
Ejemplo n.º 10
0
def _get_cache_filelist_and_report_errors(bestrefs):
    """Compute the list of files to download based on the `bestrefs` dictionary,
    skimming off and reporting errors, and raising an exception on the last error seen.

    Return the list of files to download,  collapsing complex return types like tuples
    and dictionaries into a list of simple filenames.
    """
    wanted = []
    last_error = None
    for filetype, refname in bestrefs.items():
        if isinstance(refname, tuple):
            wanted.extend(list(refname))
        elif isinstance(refname, dict):
            wanted.extend(refname.values())
        elif isinstance(refname, str):
            if "NOT FOUND" in refname:
                if "n/a" in refname.lower():
                    log.verbose("Reference type", srepr(filetype),
                                "NOT FOUND.  Skipping reference caching/download.", verbosity=70)
                else:
                    last_error = CrdsLookupError(
                        "Error determining best reference for",
                        srepr(filetype), " = ", str(refname)[len("NOT FOUND"):])
                    log.error(str(last_error))
            else:
                log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname))
                wanted.append(refname)
        else:
            last_error = CrdsLookupError(
                "Unhandled bestrefs return value type for", srepr(filetype))
            log.error(str(last_error))
    if last_error is not None:
        raise last_error
    return wanted
Ejemplo n.º 11
0
def get_total_bytes(info_map):
    """Return the total byte count of file info map `info_map`."""
    try:
        return sum([int(info_map[name]["size"]) for name in info_map if "NOT FOUND" not in info_map[name]])
    except Exception as exc:
        log.error("Error computing total byte count: ", str(exc))
        return -1
Ejemplo n.º 12
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Ejemplo n.º 13
0
def get_total_bytes(info_map):
    """Return the total byte count of file info map `info_map`."""
    try:
        return sum([int(info_map[name]["size"]) for name in info_map if "NOT FOUND" not in info_map[name]])
    except Exception as exc:
        log.error("Error computing total byte count: ", str(exc))
        return -1
Ejemplo n.º 14
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset),
                                                 "under context", repr(context)):
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory,
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Ejemplo n.º 15
0
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file]) 
Ejemplo n.º 16
0
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file])
Ejemplo n.º 17
0
    def handle_fail(self, message):
        """Generic "fail" handler reports on remote process fatal error / failure
        and issues an error() message, then stops monitoring /exits.
        """
        log.error(self.format_remote("Processing failed:", message.data))

        self.result = message.data["result"]

        return _STATUS_FAILED
Ejemplo n.º 18
0
 def verify_archive_file(self, filename):
     """Verify the likely presence of `filename` on the archive web server.  Issue an ERROR if absent."""
     url = self.archive_url(filename)
     response = requests.head(url)
     if response.status_code in [200,]:
         log.verbose("File", repr(filename), "is available from", repr(url))
         return self.check_length(filename, response)
     else:
         log.error("File", repr(filename), "failed HTTP HEAD with code =", response.status_code, "from", repr(url))
         self.missing_files.append(filename)
         return False
Ejemplo n.º 19
0
    def run(self, progress_callback=None):
        """
        Create stsynphot bandpass objects from the observation modes in the obsmodes file.
        Emits appropriate log messages and returns True if validations succeed,
        False if there's an error.
        """

        failed = 0
        with fits.open(self.obsmodes_file) as hdul:
            total_modes = len(hdul[-1].data)
            log.info(
                "Creating bandpass objects from {} observation modes".format(
                    total_modes))

            if self.processes > 1:
                with Pool(processes=self.processes) as pool:
                    for start_index in range(0, total_modes, self.batch_size):
                        end_index = start_index + self.batch_size
                        results = pool.starmap(
                            _test_stsynphot_mode,
                            [(self.synphot_root, m) for m in
                             hdul[-1].data["OBSMODE"][start_index:end_index]])
                        for i, (result, errors, warns) in enumerate(results):
                            if not result:
                                failed += 1
                            for warning in warns:
                                log.warning(warning)
                            for error in errors:
                                log.error(error)

                            if progress_callback:
                                progress_callback(start_index + i + 1,
                                                  total_modes)
            else:
                for i, obsmode in enumerate(hdul[-1].data["OBSMODE"]):
                    result, errors, warns = _test_stsynphot_mode(
                        self.synphot_root, obsmode)
                    if not result:
                        failed += 1
                    for warning in warns:
                        log.warning(warning)
                    for error in errors:
                        log.error(error)

                    if progress_callback:
                        progress_callback(i + 1, total_modes)

        if failed > 0:
            log.info("{} / {} observation modes failed".format(
                failed, total_modes))
        else:
            log.info("Congratulations, all observation modes succeeded!")

        return failed == 0
Ejemplo n.º 20
0
 def ignore_errors(self, i, affected):
     """Check each context switch for errors during bestrefs run. Fail or return False on errors."""
     ignore = False
     if affected.bestrefs_status != 0:
         message = log.format("CRDS server-side errors for", i, affected.computation_dir)
         if self.args.ignore_errant_history:
             ignore = True
         if self.args.fail_on_errant_history:
             self.fatal_error(message)
         else:
             log.error(message)
     return ignore          
Ejemplo n.º 21
0
    def _check_direction(self, hdul):
        """
        Confirm that all rows in the graph point to higher INNODE values
        than themselves, which prevents cycles.
        """
        graph = hdul[-1].data
        result = True
        if not (graph["INNODE"] < graph["OUTNODE"]).all():
            log.error("TMG contains rows with INNODE >= OUTNODE.")
            result = False

        return result
Ejemplo n.º 22
0
 def check_length(self, filename, response):
     """Check the content-length reported by HEAD against the CRDS database's file size."""
     archive_size = int(response.headers["content-length"])
     crds_size = int(self.file_info[filename]["size"])
     if archive_size != crds_size:
         log.error("File", repr(filename), "available but length bad.  crds size:", crds_size,
                   "archive size:", archive_size)
         self.bad_length_files.append(filename)
         return False
     else:
         log.verbose("File", repr(filename), "lengths agree:", crds_size)
         return True
Ejemplo n.º 23
0
 def main(self):
     """Process command line parameters in to a context and list of
     reference files.   Print out the match tuples within the context
     which contain the reference files.
     """
     if self.matched_files:
         self.dump_reference_matches()
     elif self.args.datasets or self.args.instrument:
         self.dump_dataset_headers()
     else:
         self.print_help()
         log.error("Specify --files to dump reference match cases or --datasets to dump dataset matching parameters.")
     return log.errors()
Ejemplo n.º 24
0
def get_datamodels():
    try:
        from jwst import datamodels  # this is fatal.
    except ImportError:
        log.error(
            "CRDS requires installation of the 'jwst' package to operate on JWST files.")
        raise
    global MODEL
    if MODEL is None:
        with log.error_on_exception(
                "Failed constructing basic JWST DataModel"):
            MODEL = datamodels.DataModel()
    return datamodels
Ejemplo n.º 25
0
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     result = message.data.get("result", None)
     if status == 0:
         log.info(self.format_remote("COMPLETED:", result))
     elif status == 1:
         log.fatal_error(self.format_remote("FAILED:", result))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", result))
     else:
         log.info(self.format_remote("DONE:", result))
     self.result = result
     return result
Ejemplo n.º 26
0
    def get_level_pipeline(self, level, exp_type):
        """Interpret the level_pipeline_exptypes data structure relative to
        processing `level` and `exp_type` to determine a pipeline .cfg file.

        Return [ pipeline .cfg ]  or  []
        """
        pipeline_exptypes = self.loaded_cfg.level_pipeline_exptypes[level]
        for mapping in pipeline_exptypes:
            for pipeline, exptypes in mapping.items():
                for exptype_pattern in exptypes:
                    if glob_match(exptype_pattern, exp_type):
                        return [pipeline]
        log.error("Unhandled EXP_TYPE", srepr(exp_type), "for", srepr(level))
        return []
Ejemplo n.º 27
0
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     result = message.data.get("result", None)
     if status == 0:
         log.info(self.format_remote("COMPLETED:", result))
     elif status == 1:
         log.error(self.format_remote("FAILED:", result))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", result))
     else:
         log.info(self.format_remote("DONE:", result))
     self.result = result
     return result
Ejemplo n.º 28
0
 def scan_for_nonsubmitted_ingests(self, ingest_info):
     """Check for junk in the submitter's ingest directory,  left over files not
     in the current submission and fail if found.
     """
     submitted_basenames = [ os.path.basename(filepath) for filepath in self.files ]
     msg = None
     for ingested in ingest_info.keys():
         if ingested not in submitted_basenames:
             msg = log.format("Non-submitted file", log.srepr(ingested),
                              "is already in the CRDS server's ingest directory.  Delete it (--wipe-existing-files or web page Upload Files panel) or submit it.")
             log.error(msg)
     if msg is not None:
         raise exceptions.CrdsExtraneousFileError(
             "Unexpected files already delivered to CRDS server. See ERROR messages.")
Ejemplo n.º 29
0
 def get_level_pipeline(self, level, exp_type):
     """Interpret the level_pipeline_exptypes data structure relative to
     processing `level` and `exp_type` to determine a pipeline .cfg file.
 
     Return [ pipeline .cfg ]  or  []
     """
     pipeline_exptypes = self.loaded_cfg.level_pipeline_exptypes[level]
     for mapping in pipeline_exptypes:
         for pipeline, exptypes in mapping.items():
             for exptype_pattern in exptypes:
                 if glob_match(exptype_pattern, exp_type):
                     return [pipeline]
     log.error("Unhandled EXP_TYPE", srepr(exp_type), "for", srepr(level))
     return []
Ejemplo n.º 30
0
    def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
        """Check one `file` against the provided CRDS database `info` dictionary."""
        path = config.locate_file(file, observatory=self.observatory)
        base = os.path.basename(file)
        n_bytes = int(info["size"])

        # Only output verification info for slow sha1sum checks by default
        log.verbose(
            api.file_progress(
                "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
            verbosity=10 if self.args.check_sha1sum else 60)

        if not os.path.exists(path):
            if base not in self.bad_files:
                log.error("File", repr(base), "doesn't exist at", repr(path))
            return

        # Checks which force repairs should do if/else to avoid repeat repair
        size = os.stat(path).st_size
        if int(info["size"]) != size:
            self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size),
                                  "CRDS size=" + srepr(info["size"]))
        elif self.args.check_sha1sum or config.is_mapping(base):
            log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60)
            sha1sum = utils.checksum(path)
            if info["sha1sum"] == "none":
                log.warning("CRDS doesn't know the checksum for", repr(base))
            elif info["sha1sum"] != sha1sum:
                self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]),
                                      "LOCAL=" + repr(sha1sum))

        if info["state"] not in ["archived", "operational"]:
            log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))

        if info["rejected"] != "false":
            log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60)
            if self.args.purge_rejected:
                self.remove_files([path], "file")
            return

        if info["blacklisted"] != "false":
            log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.",
                                verbosity=60)
            if self.args.purge_blacklisted:
                self.remove_files([path], "file")
            return
        return
Ejemplo n.º 31
0
    def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
        """Check one `file` against the provided CRDS database `info` dictionary."""
        path = rmap.locate_file(file, observatory=self.observatory)
        base = os.path.basename(file)
        n_bytes = int(info["size"])
        
        # Only output verification info for slow sha1sum checks by default
        log.verbose(
            api.file_progress(
                "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
            verbosity=10 if self.args.check_sha1sum else 60)
        
        if not os.path.exists(path):
            log.error("File", repr(base), "doesn't exist at", repr(path))
            return

        # Checks which force repairs should do if/else to avoid repeat repair
        size = os.stat(path).st_size
        if int(info["size"]) != size:
            self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), 
                                  "CRDS size=" + srepr(info["size"]))
        elif self.args.check_sha1sum or config.is_mapping(base):
            log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60)
            sha1sum = utils.checksum(path)
            if info["sha1sum"] == "none":
                log.warning("CRDS doesn't know the checksum for", repr(base))
            elif info["sha1sum"] != sha1sum:
                self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), 
                                      "LOCAL=" + repr(sha1sum))

        if info["state"] not in ["archived", "operational"]:
            log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))

        if info["rejected"] != "false":
            log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60)
            if self.args.purge_rejected:
                self.remove_files([path], "files")
            return

        if info["blacklisted"] != "false":
            log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.",
                                verbosity=60)
            if self.args.purge_blacklisted:
                self.remove_files([path], "files")
            return
        return
Ejemplo n.º 32
0
def get_datamodels():
    """Defer datamodels loads until we definitely have a roman usecase.
    Enables light observatory package imports which don't require all
    dependencies when supporting other observatories.
    """
    try:
        from romancal import datamodels
    except ImportError:
        log.error(
            "CRDS requires installation of the 'romancal' package to operate on Roman files.")
        raise
    global MODEL
    if MODEL is None:
        with log.error_on_exception(
                "Failed constructing basic RomanDataModel"):
            MODEL = datamodels.RomanDataModel()
    return datamodels
Ejemplo n.º 33
0
 def verify_files(self, files):
     """Check `files` against the CRDS server database to ensure integrity and check reject status."""
     basenames = [os.path.basename(file) for file in files]
     try:
         log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10)
         infos = api.get_file_info_map(observatory=self.observatory, files=basenames, 
                                      fields=["size","rejected","blacklisted","state","sha1sum"])
     except Exception as exc:
         log.error("Failed getting file info.  CACHE VERIFICATION FAILED.  Exception: ", repr(str(exc)))
         return
     bytes_so_far = 0
     total_bytes = api.get_total_bytes(infos)
     for nth_file, file in enumerate(files):
         bfile = os.path.basename(file)
         if infos[bfile] == "NOT FOUND":
             log.error("CRDS has no record of file", repr(bfile))
         else:
             self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files))
             bytes_so_far += int(infos[bfile]["size"])
Ejemplo n.º 34
0
    def check_filenames(self, description, lookup, compname_to_path):
        """
        Check that lookup filenames are correct.
        """
        result = True

        log.info(
            "Confirming correctly formed {} filenames".format(description))
        for row in lookup:
            lookup_filename = utils.get_lookup_filename(
                row["COMPNAME"],
                os.path.basename(compname_to_path[row["COMPNAME"]]))
            if lookup_filename != row["FILENAME"]:
                log.error(
                    "Malformed {} filename, expected '{}', found '{}'".format(
                        description, lookup_filename, row["FILENAME"]))
                result = False

        return result
Ejemplo n.º 35
0
    def _check_parametrization(self, hdul):
        """
        If the component table is parametrized, confirm that it has at least 2
        parametrized columns.
        """
        component = hdul[0].header["COMPNAME"]

        column_prefix = utils.get_parametrization_keyword(component)
        if column_prefix is not None:
            column_count = len([
                n for n in hdul[-1].data.names
                if n.lower().startswith(column_prefix)
            ])
            if column_count < 2:
                template = "Table is parametrized by {}, but includes only {} columns with that prefix."
                log.error(template.format(column_prefix, column_count))
                return False

        return True
Ejemplo n.º 36
0
    def _check_connectivity(self, hdul):
        """
        Confirm that all rows in the graph can be reached by following paths
        starting at innode = 1.
        """

        # This is inefficient, since the subpaths are often shared,
        # but graphs don't get certified regularly, and it seems worthwhile
        # to sacrifice a little speed to keep the code simple.
        def _get_visited_indexes(graph, innode=1, seen_nodes=None):
            if seen_nodes is None:
                seen_nodes = set()

            if innode in seen_nodes:
                # Cycles will result in an error in _check_direction, so
                # we don't need to log an error here.
                return set()

            seen_nodes = seen_nodes.union({innode})

            selected = graph["INNODE"] == innode

            visited = set(np.where(selected)[0])

            for outnode in np.unique(graph[selected]["OUTNODE"]):
                visited.update(
                    _get_visited_indexes(graph, outnode,
                                         seen_nodes=seen_nodes))

            return visited

        graph = hdul[-1].data
        visited_indexes = _get_visited_indexes(graph)

        result = True
        if len(visited_indexes) < len(graph):
            missing_indexes = set(range(len(graph))) - visited_indexes
            log.error("TMG contains disconnected rows at indexes:",
                      ", ".join(str(i) for i in missing_indexes))
            result = False

        return result
Ejemplo n.º 37
0
 def file_transfers(self):
     """Top level control for the primary function of downloading files specified as:
     
     --files ...      (explicit list of CRDS mappings or references)
     --contexts ...   (many varieties of mapping specifier including --all, --range, etc.)
     --fetch-sqlite-db ...  (Server catalog download as sqlite3 database file.
     
     Returns list of downloaded/cached files for later verification if requested.
     """
     if self.args.files:
         self.sync_explicit_files()
         verify_file_list = self.files
     elif self.args.fetch_sqlite_db:
         self.fetch_sqlite_db()
         verify_file_list = []
     elif self.contexts:
         verify_file_list = self.interpret_contexts()
     else:
         log.error("Define --all, --contexts, --last, --range, --files, or --fetch-sqlite-db to sync.")
         sys.exit(-1)
     return verify_file_list
Ejemplo n.º 38
0
 def print_new_files(self):
     """Print the references or mappings which are in the second (new) context and not
     the firtst (old) context.
     """
     if not config.is_mapping(self.old_file) or not config.is_mapping(self.new_file):
         log.error("--print-new-files really only works for mapping differences.")
         return -1
     old = crds.get_pickled_mapping(self.old_file)   # reviewed
     new = crds.get_pickled_mapping(self.new_file)   # reviewed
     old_mappings = set(old.mapping_names())
     new_mappings = set(new.mapping_names())
     old_references = set(old.reference_names())
     new_references = set(new.reference_names())
     status = 0
     for name in sorted(new_mappings - old_mappings):
         print(name)
         status = 1
     for name in sorted(new_references - old_references):
         print(name)
         status = 1
     return status
Ejemplo n.º 39
0
 def print_new_files(self):
     """Print the references or mappings which are in the second (new) context and not
     the firtst (old) context.
     """
     if not rmap.is_mapping(self.old_file) or not rmap.is_mapping(self.new_file):
         log.error("--print-new-files really only works for mapping differences.")
         return -1
     old = crds.get_pickled_mapping(self.old_file)   # reviewed
     new = crds.get_pickled_mapping(self.new_file)   # reviewed
     old_mappings = set(old.mapping_names())
     new_mappings = set(new.mapping_names())
     old_references = set(old.reference_names())
     new_references = set(new.reference_names())
     status = 0
     for name in sorted(new_mappings - old_mappings):
         print(name)
         status = 1
     for name in sorted(new_references - old_references):
         print(name)
         status = 1
     return status
Ejemplo n.º 40
0
    def _check_error(self, response, xpath_spec, error_prefix):
        """Extract the `xpath_spec` text from `response`,  if present issue a
        log ERROR with  `error_prefix` and the response `xpath_spec` text 
        then raise an exception.  This may result in multiple ERROR messages.
        
        Issue a log ERROR for each form error,  then raise an exception 
        if any errors found.

        returns None
        """
        error_msg_parse = html.fromstring(response.text).xpath(xpath_spec)
        errors = 0
        for parse in error_msg_parse:
            error_message = parse.text.strip().replace("\n","")
            if error_message:
                if error_message.startswith("ERROR: "):
                    error_message = error_message[len("ERROR: ")]
                errors += 1
                log.error(error_prefix, error_message)
        if errors:
            raise CrdsWebError("A web transaction with the CRDS server had errors.")
Ejemplo n.º 41
0
    def file_transfers(self):
        """Top level control for the primary function of downloading files specified as:

        --files ...      (explicit list of CRDS mappings or references)
        --contexts ...   (many varieties of mapping specifier including --all, --range, etc.)
        --fetch-sqlite-db ...  (Server catalog download as sqlite3 database file.

        Returns list of downloaded/cached files for later verification if requested.
        """
        if self.args.files:
            self.sync_explicit_files()
            verify_file_list = self.files
        elif self.args.fetch_sqlite_db:
            self.fetch_sqlite_db()
            verify_file_list = []
        elif self.contexts:
            verify_file_list = self.interpret_contexts()
        else:
            log.error("Define --all, --contexts, --last, --range, --files, or --fetch-sqlite-db to sync.")
            sys.exit(-1)
        return verify_file_list
Ejemplo n.º 42
0
    def _check_filenames(self, hdul):
        """
        Confirm that values in the FILENAME column are prefixed
        with a valid path variable, and are suffixed with the correct
        parametrization variable.
        """
        result = True
        for i, row in enumerate(hdul[-1].data):
            expected_path_prefix = utils.get_path_prefix(row["COMPNAME"])
            if row["FILENAME"].split("$")[0] + "$" != expected_path_prefix:
                log.error("Malformed FILENAME value at index", i,
                          "(missing or invalid path prefix)")
                result = False

            param_keyword = utils.get_parametrization_keyword(row["COMPNAME"])
            if param_keyword is None and row["FILENAME"].endswith("]"):
                log.error("Malformed FILENAME value at index", i,
                          "(should not be parametrized)")
                result = False
            elif param_keyword is not None and not row["FILENAME"].lower(
            ).endswith("[{}]".format(param_keyword)):
                log.error("Malformed FILENAME value at index", i,
                          "(should be parametrized)")
                result = False

        return result
Ejemplo n.º 43
0
    def check_compname_agreement(
        self,
        description_a,
        compnames_a,
        known_missing_a,
        description_b,
        compnames_b,
        known_missing_b,
    ):
        """
        Check and report any differences between two sets of component names.
        """
        result = True

        log.info("Checking for components present in {} but missing from {}".format(
            description_a, description_b
        ))
        missing_from_b = (compnames_a - compnames_b) - known_missing_b
        if len(missing_from_b) > 0:
            missing_compnames = ", ".join(missing_from_b)
            message = "Components present in {} but missing from {}: {}".format(
                description_a, description_b, missing_compnames
            )
            log.error(message)
            result = False

        log.info("Checking for components present in {} but missing from {}".format(
            description_b, description_a
        ))
        missing_from_a = (compnames_b - compnames_a) - known_missing_a
        if len(missing_from_a) > 0:
            missing_compnames = ", ".join(missing_from_a)
            message = "Components present in {} but missing from {}: {}".format(
                description_b, description_a, missing_compnames
            )
            log.error(message)
            result = False

        return result
Ejemplo n.º 44
0
def rmap_check_modifications(old_rmap,
                             new_rmap,
                             old_ref,
                             new_ref,
                             expected=("add", )):
    """Check the differences between `old_rmap` and `new_rmap` and make sure they're
    limited to the types listed in `expected`.
    
    expected should be "add" or "replace".
    
    Returns as_expected,  True IFF all rmap modifications match `expected`.
    """
    diffs = diff.mapping_diffs(old_rmap, new_rmap)
    as_expected = True
    for difference in diffs:
        actual = diff.diff_action(difference)
        if actual in expected:
            pass  # white-list so it will fail when expected is bogus.
        else:
            log.error("Expected one of", srepr(expected), "but got",
                      srepr(actual), "from change", srepr(difference))
            as_expected = False
    with open(old_rmap) as pfile:
        old_count = len([
            line for line in pfile.readlines()
            if os.path.basename(old_ref) in line
        ])
    with open(new_rmap) as pfile:
        new_count = len([
            line for line in pfile.readlines()
            if os.path.basename(new_ref) in line
        ])
    if "replace" in expected and old_count != new_count:
        log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref),
                  "with", srepr(new_ref), "in", srepr(old_rmap), old_count,
                  "vs.", new_count)
        as_expected = False
    return as_expected
Ejemplo n.º 45
0
 def download_files(self, downloads, localpaths):
     """Serial file-by-file download."""
     self.info_map = get_file_info_map(
         self.observatory, downloads, ["size", "rejected", "blacklisted", "state", "sha1sum", "instrument"])
     if config.writable_cache_or_verbose("Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70):
         bytes_so_far = 0
         total_files = len(downloads)
         total_bytes = get_total_bytes(self.info_map)
         for nth_file, name in enumerate(downloads):
             try:
                 if "NOT FOUND" in self.info_map[name]:
                     raise CrdsDownloadError("file is not known to CRDS server.")
                 bytes, path = self.catalog_file_size(name), localpaths[name]
                 log.info(file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files))
                 self.download(name, path)
                 bytes_so_far += os.stat(path).st_size
             except Exception as exc:
                 if self.raise_exceptions:
                     raise
                 else:
                     log.error("Failure downloading file", repr(name), ":", str(exc))
         return bytes_so_far
     return 0
Ejemplo n.º 46
0
 def handle_error(self, message):
     """Generic "error" handler issues an error message from remote process and
     continues monitoring.
     """
     log.error(self.format_remote(message.data))
     return False
Ejemplo n.º 47
0
 def handle_fail(self, message):
     """Generic "fail" handler reports on remote process fatal error / failure
     and issues an error() message, then stops monitoring /exits.
     """
     log.error(self.format_remote("Processing failed:",  message.data))
     return message.data["result"]
Ejemplo n.º 48
0
def check_naming_consistency(checked_instrument=None, exhaustive_mapping_check=False):
    """Dev function to compare the properties returned by name decomposition
    to the properties determined by file contents and make sure they're the same.
    Also checks rmap membership.

    >> from crds.tests import test_config
    >> old_config = test_config.setup()
    >> check_naming_consistency("acs")
    >> check_naming_consistency("cos")
    >> check_naming_consistency("nicmos")
    >> check_naming_consistency("stis")
    >> check_naming_consistency("wfc3")
    >> check_naming_consistency("wfpc2")
    >> test_config.cleanup(old_config)
    """
    from crds import certify

    for ref in rmap.list_references("*", observatory="hst", full_path=True):
        with log.error_on_exception("Failed processing:", repr(ref)):

            _path, _observ, instrument, filekind, _serial, _ext = ref_properties_from_cdbs_path(ref)

            if checked_instrument is not None and instrument != checked_instrument:
                continue

            if data_file.is_geis_data(ref):
                if os.path.exists(data_file.get_conjugate(ref)):
                    continue
                else:
                    log.warning("No GEIS header for", repr(ref))

            log.verbose("Processing:", instrument, filekind, ref)
            
            _path2, _observ2, instrument2, filekind2, _serial2, _ext2 = ref_properties_from_header(ref)
            if instrument != instrument2:
                log.error("Inconsistent instruments", repr(instrument), "vs.", repr(instrument2), 
                          "for", repr(ref))
            if filekind != filekind2:
                log.error("Inconsistent filekinds", repr(filekind), "vs.", repr(filekind2), 
                          "for", repr(ref))

            for pmap_name in reversed(sorted(rmap.list_mappings("*.pmap", observatory="hst"))):

                r = certify.find_governing_rmap(pmap_name, ref)

                if not r:
                    continue

                if r.instrument != instrument:
                    log.error("Rmap instrument", repr(r.instrument), 
                              "inconsistent with name derived instrument", repr(instrument), "for", repr(ref), "in", repr(pmap_name))
                if r.filekind != filekind:
                    log.error("Rmap filekind", repr(r.filekind), 
                              "inconsistent with name derived filekind", repr(filekind), "for", repr(ref), "in", repr(pmap_name))
                if r.instrument != instrument2:
                    log.error("Rmap instrument", repr(r.instrument), 
                              "inconsistent with content derived instrument", repr(instrument2), "for", repr(ref), "in", repr(pmap_name))
                if r.filekind != filekind2:
                    log.error("Rmap filekind", repr(r.filekind), 
                              "inconsistent with content derived filekind", repr(filekind2), "for", repr(ref), "in", repr(pmap_name))
                
                if not exhaustive_mapping_check:
                    break

            else:
                log.error("Orphan reference", repr(ref), "not found under any context.")