Example #1
0
def _flat_to_tpns(flat=None, schema_name=None):
    """Convert flat representation of DM schema to list of all TpnInfo objects."""
    if flat is None:
        flat = _schema_to_flat(_load_schema(schema_name))
    tpns = []
    for key, value in flat.items():
        if key.endswith(".TYPE"):
            basekey = str(key[:-len(".TYPE")])
            legal_values = [
                str(val) for val in flat.get(basekey + ".ENUM", [])
            ]
            if legal_values:
                legal_values += ["ANY", "N/A"]
            legal_values = sorted(set(legal_values))
            if isinstance(value, list):
                value = tuple(value)
            datatype = SCHEMA_TYPE_TO_TPN.get(value, None)
            if datatype is not None:
                tpn = TpnInfo(name=basekey.upper(),
                              keytype="H",
                              datatype=datatype[0],
                              presence=datatype[1],
                              values=legal_values)
                log.verbose("Adding tpn constraint from DM schema:",
                            repr(tpn),
                            verbosity=65)
                tpns.append(tpn)
            else:
                log.warning("No TPN form for", repr(key), repr(value))
    return sorted(tpns)
Example #2
0
def get_free_header(filepath,
                    needed_keys=(),
                    original_name=None,
                    observatory=None):
    """Return the complete unconditioned header dictionary of a reference file.

    Does not hijack warnings.

    Original name is used to determine file type for web upload temporary files which
    have no distinguishable extension.  Original name is browser-side name for file.
    """
    if original_name is None:
        original_name = os.path.basename(filepath)
    filetype = get_filetype(original_name, filepath)
    try:
        header_func = {
            "asdf": get_asdf_header,
            "json": get_json_header,
            "yaml": get_yaml_header,
            "geis": get_geis_header,
        }[filetype]
        header = header_func(filepath, needed_keys)
    except KeyError:
        if observatory is None:
            observatory = get_observatory(filepath, original_name)
        if observatory == "jwst":
            header = get_data_model_header(filepath, needed_keys)
        else:
            header = get_fits_header_union(filepath, needed_keys)
    log.verbose("Header of", repr(filepath), "=", log.PP(header), verbosity=90)
    return header
Example #3
0
 def get_row_keys(self, instrument, filekind):
     """Return the row_keys which define unique table rows corresponding to mapping.
     
     These are used for "mode" checks to issue warnings when unique rows are deleted
     in a certify comparison check against the preceding version of a table.
     
     row_keys are now also utlized to perform "affected datasets" table row
     lookups which essentially requires emulating that aspect of the calibration
     software.  Consequently, row_keys now have a requirement for a higher level
     of fidelity since they were originally defined for mode checks, since the
     consequences of inadequate row keys becomes failed "affects checks" and not
     merely an extraneous warning.  In their capacity as affected datasets
     parameters,  row_keys must be supported by the interface which connects the
     CRDS server to the appropriate system dataset parameter database,  DADSOPS
     for HST.   That interface must be updated when row_keys.dat is changed.
     
     The certify mode checks have a shaky foundation since the concept of mode
     doesn't apply to all tables and sometimes "data" parameters are required to
     render rows unique.   The checks only issue warnings however so they can be
     ignored by file submitters.
     
     For HST calibration references mapping is an rmap.
     """
     try:
         return self.row_keys[instrument][filekind]
     except KeyError:
         log.verbose("No unique row keys defined for", repr((instrument, filekind)))
         return []
Example #4
0
 def _reference_name_to_validator_key(self, filename, field, header, observatory, instrument, filekind):
     """Given a reference filename `fitsname`,  return a dictionary key
     suitable for caching the reference type's Validator.
     
     This revised version supports computing "subtype" .tpn files based
     on the parameters of the reference.   Most references have unconditional
     associations based on (instrument, filekind).   A select few have
     conditional lookups which select between several .tpn's for the same
     instrument and filetype.
     
     Returns (.tpn filename,)
     """
     try:
         tpnfile = self.unified_defs[instrument][filekind][field]
         if isinstance(tpnfile, python23.string_types):
             key = (tpnfile, filename)  # tpn filename
         else: # it's a list of conditional tpns
             for (condition, tpn) in tpnfile:
                 if eval(condition, header):
                     key = (tpn, filename)  # tpn filename
                     break
             else:
                 assert False
     except (AssertionError, KeyError):
         raise ValueError("No TPN match for reference='{}' instrument='{}' reftype='{}'".format(
                 os.path.basename(filename), instrument, filekind))
     log.verbose("Validator key for", field, "for", repr(filename), instrument, filekind, "=", key, verbosity=60)
     return key
Example #5
0
File: api.py Project: nden/crds
 def remove_file(self, localpath):
     """Removes file at `localpath`."""
     log.verbose("Removing file", repr(localpath))
     try:
         os.remove(localpath)
     except Exception:
         log.verbose("Exception during file removal of", repr(localpath))
Example #6
0
File: api.py Project: nden/crds
def _get_cache_filelist_and_report_errors(bestrefs):
    """Compute the list of files to download based on the `bestrefs` dictionary,
    skimming off and reporting errors, and raising an exception on the last error seen.

    Return the list of files to download,  collapsing complex return types like tuples
    and dictionaries into a list of simple filenames.
    """
    wanted = []
    last_error = None
    for filetype, refname in bestrefs.items():
        if isinstance(refname, tuple):
            wanted.extend(list(refname))
        elif isinstance(refname, dict):
            wanted.extend(refname.values())
        elif isinstance(refname, python23.string_types):
            if "NOT FOUND" in refname:
                if "n/a" in refname.lower():
                    log.verbose("Reference type", repr(filetype),
                                "NOT FOUND.  Skipping reference caching/download.")
                else:
                    last_error = CrdsLookupError("Error determining best reference for",
                                                 repr(str(filetype)), " = ", str(refname)[len("NOT FOUND"):])
                    log.error(str(last_error))
            else:
                wanted.append(refname)
        else:
            last_error = CrdsLookupError("Unhandled bestrefs return value type for " + repr(str(filetype)))
            log.error(str(last_error))
    if last_error is not None:
        raise last_error
    return wanted
Example #7
0
File: api.py Project: nden/crds
def get_best_references(pipeline_context, header, reftypes=None):
    """Get best references for dict-like `header` relative to 
    `pipeline_context`.
    
    pipeline_context  CRDS context for lookup,   e.g.   'hst_0001.pmap'
    header            dict-like mapping { lookup_parameter : value }
    reftypes         If None,  return all reference types;  otherwise return 
                     best refs for the specified list of reftypes. 

    Returns          { reftype : reference_basename ... }
    
    Raises           CrdsLookupError,  typically for problems with header values
    """
    header = { str(key):str(value) for (key,value) in header.items() }
    try:
        bestrefs = S.get_best_references(pipeline_context, dict(header), reftypes)
    except Exception as exc:
        raise CrdsLookupError(str(exc))
    # Due to limitations of jsonrpc,  exception handling is kludged in here.
    for filetype, refname in bestrefs.items():
        if "NOT FOUND" in refname:
            if refname == "NOT FOUND n/a":
                log.verbose("Reference type", srepr(filetype), "not applicable.", verbosity=80)
            else:
                raise CrdsLookupError("Error determining best reference for " + 
                                      srepr(filetype) + " = " + 
                                      str(refname)[len("NOT FOUND"):])
    return bestrefs
Example #8
0
File: sync.py Project: nden/crds
 def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
     """Check one `file` against the provided CRDS database `info` dictionary."""
     path = rmap.locate_file(file, observatory=self.observatory)
     base = os.path.basename(file)
     n_bytes = int(info["size"])
     log.verbose(api.file_progress("Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
                 verbosity=10)
     if not os.path.exists(path):
         log.error("File", repr(base), "doesn't exist at", repr(path))
         return
     size = os.stat(path).st_size
     if int(info["size"]) != size:
         self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), 
                               "CRDS size=" + srepr(info["size"]))
     elif self.args.check_sha1sum:
         log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=100)
         sha1sum = utils.checksum(path)
         if info["sha1sum"] == "none":
             log.warning("CRDS doesn't know the checksum for", repr(base))
         elif info["sha1sum"] != sha1sum:
             self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), 
                                   "LOCAL=" + repr(sha1sum))
     if info["state"] not in ["archived", "operational"]:
         log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))
     if info["rejected"] != "false":
         log.warning("File", repr(base), "has been explicitly rejected.")
         if self.args.purge_rejected:
             self.remove_files([path], "files")
         return
     if info["blacklisted"] != "false":
         log.warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.")
         if self.args.purge_blacklisted:
             self.remove_files([path], "files")
         return
     return
Example #9
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Example #10
0
def get_free_header(filepath, needed_keys=(), original_name=None, observatory=None):
    """Return the complete unconditioned header dictionary of a reference file.

    Does not hijack warnings.

    Original name is used to determine file type for web upload temporary files which
    have no distinguishable extension.  Original name is browser-side name for file.
    """
    if original_name is None:
        original_name = os.path.basename(filepath)
    filetype = get_filetype(original_name, filepath)
    try:
        header_func = {
            "asdf" : get_asdf_header,
            "json" : get_json_header,
            "yaml" : get_yaml_header,
            "geis" : get_geis_header,
        }[filetype]
        header = header_func(filepath, needed_keys)
    except KeyError:
        if observatory is None:
            observatory = get_observatory(filepath, original_name)
        if observatory == "jwst":
            header = get_data_model_header(filepath, needed_keys)
        else:
            header = get_fits_header_union(filepath, needed_keys)
    log.verbose("Header of", repr(filepath), "=", log.PP(header), verbosity=90)
    return header
Example #11
0
    def _call(self, *args, **kwargs):
        """Core of RPC dispatch without error interpretation, logging, or return value decoding."""
        params = kwargs if len(kwargs) else args
        # if Any.kind(params) == Object and self.__version != '2.0':
        #   raise Exception('Unsupport arg type for JSON-RPC 1.0 '
        #                  '(the default version for this client, '
        #                  'pass version="2.0" to use keyword arguments)')
        jsonrpc_params = {"jsonrpc": self.__version,
                          "method": self.__service_name,
                          'params': params,
                          'id': message_id()
                         }
        
        parameters = json.dumps(jsonrpc_params)
        
        url = self._get_url(jsonrpc_params)
        
        if "serverless" in url or "server-less" in url:
            raise exceptions.ServiceError("Configured for server-less mode.  Skipping JSON RPC " + repr(self.__service_name))

        if log.get_verbose() <= 50:
            log.verbose("CRDS JSON RPC", self.__service_name, params if len(str(params)) <= 60 else "(...)", "-->")
        else:
            log.verbose("CRDS JSON RPC to", url, "parameters", params, "-->")
        
        response = apply_with_retries(self._call_service, parameters, url)

        try:
            rval = json.loads(response)
        except Exception:
            log.warning("Invalid CRDS jsonrpc response:\n", response)
            raise
        
        return rval
Example #12
0
File: locate.py Project: nden/crds
def locate_dir(instrument, mode=None):
    """Locate the instrument specific directory for a reference file."""
    if mode is None:
        mode = config.get_crds_ref_subdir_mode(observatory="hst")
    else:
        config.check_crds_ref_subdir_mode(mode)
    crds_refpath = config.get_crds_refpath("hst")
    prefix = get_env_prefix(instrument)
    if mode == "legacy":  # Locate cached files at the appropriate CDBS-style  iref$ locations
        try:
            rootdir = os.environ[prefix]
        except KeyError:
            try:
                rootdir = os.environ[prefix[:-1]]
            except KeyError:
                raise KeyError("Reference location not defined for " +
                               repr(instrument) + ".  Did you configure " +
                               repr(prefix) + "?")
    elif mode == "instrument":  # use simple names inside CRDS cache.
        rootdir = os.path.join(crds_refpath, instrument)
        refdir = os.path.join(crds_refpath, prefix[:-1])
        if not os.path.exists(refdir):
            if config.writable_cache_or_verbose(
                    "Skipping making instrument directory link for",
                    repr(instrument)):
                log.verbose("Creating legacy cache link", repr(refdir), "-->",
                            repr(rootdir))
                utils.ensure_dir_exists(rootdir + "/locate_dir.fits")
                os.symlink(rootdir, refdir)
    elif mode == "flat":  # use original flat cache structure,  all instruments in same directory.
        rootdir = crds_refpath
    else:
        raise ValueError("Unhandled reference file location mode " +
                         repr(mode))
    return rootdir
Example #13
0
def get_crds_ref_subdir_mode(observatory):
    """Return the mode value defining how reference files are located."""
    global CRDS_REF_SUBDIR_MODE
    if CRDS_REF_SUBDIR_MODE in ["None", None]:
        mode_path = os.path.join(get_crds_cfgpath(observatory),
                                 CRDS_SUBDIR_TAG_FILE)
        try:
            with open(mode_path) as pfile:
                mode = pfile.read().strip()
            # log.verbose("Determined cache format from", repr(mode_path), "as", repr(mode))
        except IOError:
            if len(glob.glob(os.path.join(get_crds_refpath(observatory),
                                          "*"))) > 20:
                mode = "flat"
                log.verbose(
                    "No cache config tag found, looks like a 'flat' cache based on existing references."
                )
            else:
                mode = "instrument"
                log.verbose(
                    "No cache config tag found, defaulting to 'instrument' based cache."
                )
            with log.verbose_on_exception(
                    "Failed saving default subdir mode to", repr(mode)):
                set_crds_ref_subdir_mode(mode, observatory)
        check_crds_ref_subdir_mode(mode)
        CRDS_REF_SUBDIR_MODE = mode
    else:
        mode = CRDS_REF_SUBDIR_MODE
    return mode
Example #14
0
 def get_row_keys(self, instrument, filekind):
     """Return the row_keys which define unique table rows corresponding to mapping.
     
     These are used for "mode" checks to issue warnings when unique rows are deleted
     in a certify comparison check against the preceding version of a table.
     
     row_keys are now also utlized to perform "affected datasets" table row
     lookups which essentially requires emulating that aspect of the calibration
     software.  Consequently, row_keys now have a requirement for a higher level
     of fidelity since they were originally defined for mode checks, since the
     consequences of inadequate row keys becomes failed "affects checks" and not
     merely an extraneous warning.  In their capacity as affected datasets
     parameters,  row_keys must be supported by the interface which connects the
     CRDS server to the appropriate system dataset parameter database,  DADSOPS
     for HST.   That interface must be updated when row_keys.dat is changed.
     
     The certify mode checks have a shaky foundation since the concept of mode
     doesn't apply to all tables and sometimes "data" parameters are required to
     render rows unique.   The checks only issue warnings however so they can be
     ignored by file submitters.
     
     For HST calibration references mapping is an rmap.
     """
     try:
         return self.row_keys[instrument][filekind]
     except KeyError:
         log.verbose("No unique row keys defined for", repr((instrument, filekind)))
         return []
Example #15
0
 def __init__(self, argv=None, parser_pars=None, reset_log=True, print_status=False):
     self.stats = utils.TimingStats()
     self._already_reported_stats = False
     if isinstance(argv, python23.string_types):
         argv = argv.split()
     elif argv is None:
         argv = sys.argv
     self._argv = argv
     if parser_pars is None:
         parser_pars = {}
     for key in ["description", "epilog", "usage", "formatter_class"]: 
         self._add_key(key, parser_pars)
     self.parser = argparse.ArgumentParser(prog=argv[0], **parser_pars)
     self.add_args()
     self.add_standard_args()
     self.args = self.parser.parse_args(argv[1:])
     if self.args.readonly_cache:
         config.set_cache_readonly(True)
     log.set_verbose(log.get_verbose() or self.args.verbosity or self.args.verbose)
     # log.verbose("Script parameters:", os.path.basename(argv[0]), *argv[1:])
     log.set_log_time(config.get_log_time() or self.args.log_time)
     log.verbose("Command:", [os.path.basename(argv[0])] + argv[1:], verbosity=30)
     self.print_status = print_status
     self.reset_log = reset_log
     if self.reset_log:
         log.reset()  # reset the infos, warnings, and errors counters as if new commmand line run.
     self._exit_status = None
     self.show_context_resolution = True
Example #16
0
File: cmdline.py Project: nden/crds
 def __init__(self, argv=None, parser_pars=None, reset_log=True, print_status=False):
     self.stats = utils.TimingStats()
     self._already_reported_stats = False
     if isinstance(argv, python23.string_types):
         argv = argv.split()
     elif argv is None:
         argv = sys.argv
     self._argv = argv
     if parser_pars is None:
         parser_pars = {}
     for key in ["description", "epilog", "usage", "formatter_class"]: 
         self._add_key(key, parser_pars)
     self.parser = argparse.ArgumentParser(prog=argv[0], **parser_pars)
     self.add_args()
     self.add_standard_args()
     self.args = self.parser.parse_args(argv[1:])
     if self.args.readonly_cache:
         config.set_cache_readonly(True)
     log.set_verbose(log.get_verbose() or self.args.verbosity or self.args.verbose)
     # log.verbose("Script parameters:", os.path.basename(argv[0]), *argv[1:])
     log.set_log_time(config.get_log_time() or self.args.log_time)
     log.verbose("Command:", [os.path.basename(argv[0])] + argv[1:], verbosity=30)
     self.print_status = print_status
     self.reset_log = reset_log
     if self.reset_log:
         log.reset()  # reset the infos, warnings, and errors counters as if new commmand line run.
     self._exit_status = None
     self.show_context_resolution = True
Example #17
0
 def _write_last_processed(self, hist):
     """Write down the history tuple of the last context processed."""
     log.verbose("Saving last processed:", repr(hist))
     log.verbose("Storing last processed state at", repr(self.last_processed_path))
     utils.ensure_dir_exists(self.last_processed_path)
     with open(self.last_processed_path, "w+") as last:
         last.write(str(hist))
Example #18
0
 def _reference_name_to_validator_key(self, filename, field, header, observatory, instrument, filekind):
     """Given a reference filename `fitsname`,  return a dictionary key
     suitable for caching the reference type's Validator.
     
     This revised version supports computing "subtype" .tpn files based
     on the parameters of the reference.   Most references have unconditional
     associations based on (instrument, filekind).   A select few have
     conditional lookups which select between several .tpn's for the same
     instrument and filetype.
     
     Returns (.tpn filename,)
     """
     try:
         tpnfile = self.unified_defs[instrument][filekind][field]
         if isinstance(tpnfile, python23.string_types):
             key = (tpnfile, filename)  # tpn filename
         else: # it's a list of conditional tpns
             for (condition, tpn) in tpnfile:
                 if eval(condition, header):
                     key = (tpn, filename)  # tpn filename
                     break
             else:
                 assert False
     except (AssertionError, KeyError):
         raise ValueError("No TPN match for reference='{}' instrument='{}' reftype='{}'".format(
                 os.path.basename(filename), instrument, filekind))
     log.verbose("Validator key for", field, "for", repr(filename), instrument, filekind, "=", key, verbosity=60)
     return key
Example #19
0
def locate_dir(instrument, mode=None):
    """Locate the instrument specific directory for a reference file."""
    if mode is  None:
        mode = config.get_crds_ref_subdir_mode(observatory="tobs")
    else:
        config.check_crds_ref_subdir_mode(mode)
    crds_refpath = config.get_crds_refpath("tobs")
    prefix = get_env_prefix(instrument)
    if mode == "legacy":   # Locate cached files at the appropriate CDBS-style  iref$ locations
        try:
            rootdir = os.environ[prefix]
        except KeyError:
            try:
                rootdir = os.environ[prefix[:-1]]
            except KeyError:
                raise KeyError("Reference location not defined for " + repr(instrument) + 
                               ".  Did you configure " + repr(prefix) + "?")
    elif mode == "instrument":   # use simple names inside CRDS cache.
        rootdir = os.path.join(crds_refpath, instrument)
        refdir = os.path.join(crds_refpath, prefix[:-1])
        if not os.path.exists(refdir):
            if config.writable_cache_or_verbose("Skipping making instrument directory link for", repr(instrument)):
                log.verbose("Creating legacy cache link", repr(refdir), "-->", repr(rootdir))
                utils.ensure_dir_exists(rootdir + "/locate_dir.fits")
                os.symlink(rootdir, refdir)
    elif mode == "flat":    # use original flat cache structure,  all instruments in same directory.
        rootdir = crds_refpath
    else:
        raise ValueError("Unhandled reference file location mode " + repr(mode))
    return rootdir
Example #20
0
 def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files):
     """Check one `file` against the provided CRDS database `info` dictionary."""
     path = rmap.locate_file(file, observatory=self.observatory)
     base = os.path.basename(file)
     n_bytes = int(info["size"])
     log.verbose(api.file_progress("Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files),
                 verbosity=10)
     if not os.path.exists(path):
         log.error("File", repr(base), "doesn't exist at", repr(path))
         return
     size = os.stat(path).st_size
     if int(info["size"]) != size:
         self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), 
                               "CRDS size=" + srepr(info["size"]))
     elif self.args.check_sha1sum:
         log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=100)
         sha1sum = utils.checksum(path)
         if info["sha1sum"] == "none":
             log.warning("CRDS doesn't know the checksum for", repr(base))
         elif info["sha1sum"] != sha1sum:
             self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), 
                                   "LOCAL=" + repr(sha1sum))
     if info["state"] not in ["archived", "operational"]:
         log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"]))
     if info["rejected"] != "false":
         log.warning("File", repr(base), "has been explicitly rejected.")
         if self.args.purge_rejected:
             self.remove_files([path], "files")
         return
     if info["blacklisted"] != "false":
         log.warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.")
         if self.args.purge_blacklisted:
             self.remove_files([path], "files")
         return
     return
Example #21
0
File: sync.py Project: nden/crds
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Example #22
0
 def get_affected(self):
     """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types.
     
     Returns { affected_instrument : { affected_type, ... } }
     """
     instrs = defaultdict(set)
     diffs = self.mapping_diffs()
     diffs = remove_boring(diffs)
     for diff in diffs:
         for step in diff:
             # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which
             # will define both instrument and type.  pmaps and imaps leave at least one blank.
             if len(step) == 2 and rmap.is_mapping(step[0]):
                 instrument, filekind = utils.get_file_properties(self.observatory, step[0])
             # This is inefficient since diff doesn't vary by step,  but set logic cleans up the redundancy
             # New rmaps imply reprocessing the entire type.
             elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \
                     diff[-1].endswith(".rmap'"):
                 rmap_name = diff[-1].split()[-1].replace("'","")
                 rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True)
                 instrument, filekind = rmapping.instrument, rmapping.filekind
             if instrument.strip() and filekind.strip():
                 if filekind not in instrs[instrument]:
                     log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20)
                     instrs[instrument].add(filekind)
     return { key:list(val) for (key, val) in instrs.items() }
Example #23
0
 def get_affected(self):
     """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types.
     
     Returns { affected_instrument : { affected_type, ... } }
     """
     instrs = defaultdict(set)
     diffs = self.mapping_diffs()
     diffs = remove_boring(diffs)
     for diff in diffs:
         for step in diff:
             # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which
             # will define both instrument and type.  pmaps and imaps leave at least one blank.
             if len(step) == 2 and rmap.is_mapping(step[0]):
                 instrument, filekind = utils.get_file_properties(
                     self.observatory, step[0])
             # This is inefficient since diff doesn't vary by step,  but set logic cleans up the redundancy
             # New rmaps imply reprocessing the entire type.
             elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \
                     diff[-1].endswith(".rmap'"):
                 rmap_name = diff[-1].split()[-1].replace("'", "")
                 rmapping = rmap.fetch_mapping(rmap_name,
                                               ignore_checksum=True)
                 instrument, filekind = rmapping.instrument, rmapping.filekind
             if instrument.strip() and filekind.strip():
                 if filekind not in instrs[instrument]:
                     log.verbose("Affected", (instrument, filekind),
                                 "based on diff",
                                 diff,
                                 verbosity=20)
                     instrs[instrument].add(filekind)
     return {key: list(val) for (key, val) in instrs.items()}
Example #24
0
 def _write_last_processed(self, hist):
     """Write down the history tuple of the last context processed."""
     log.verbose("Saving last processed:", repr(hist))
     log.verbose("Storing last processed state at",
                 repr(self.last_processed_path))
     utils.ensure_dir_exists(self.last_processed_path)
     with open(self.last_processed_path, "w+") as last:
         last.write(str(hist))
Example #25
0
File: locate.py Project: nden/crds
def load_all_type_constraints():
    """Make sure that all HST .tpn files are loadable."""
    from crds import certify
    tpns = glob.glob(os.path.join(HERE, "tpns", "*.tpn"))
    for tpn_path in tpns:
        tpn_name = tpn_path.split("/")[-1]  # simply lost all patience with basename and path.split
        log.verbose("Loading", repr(tpn_name))
        certify.validators_by_typekey((tpn_name,), "tobs")
Example #26
0
def del_rmap_header(rmapping, new_filename, header_key):
    """Set the value of `key` in `filename` to `new_value` and rewrite the rmap.
    This is potentially lossy since rewriting the rmap may/will lose comments and 
    formatting quirks.
    """
    log.verbose("Deleting header value in", srepr(rmapping.basename), "for", srepr(header_key))
    del rmapping.header[header_key]
    rmapping.write(new_filename)
Example #27
0
def load_all_type_constraints():
    """Make sure that all HST .tpn files are loadable."""
    from crds import certify
    tpns = glob.glob(os.path.join(HERE, "tpns", "*.tpn"))
    for tpn_path in tpns:
        tpn_name = tpn_path.split("/")[-1]  # simply lost all patience with basename and path.split
        log.verbose("Loading", repr(tpn_name))
        certify.validators_by_typekey((tpn_name,), "tobs")
Example #28
0
def del_rmap_header(rmapping, new_filename, header_key):
    """Set the value of `key` in `filename` to `new_value` and rewrite the rmap.
    This is potentially lossy since rewriting the rmap may/will lose comments and 
    formatting quirks.
    """
    log.verbose("Deleting header value in", srepr(rmapping.basename), "for", srepr(header_key))
    del rmapping.header[header_key]
    rmapping.write(new_filename)
Example #29
0
def fallback_header_wfpc2_flatfile_v1(rmap, header):
    """Compute a fallback header for WFPC2 BIASFILE."""
    filter1 = header["FILTER1"]
    filter2 = header["FILTER2"]
    log.verbose("Computing fallback header wfpc2 ", rmap.filekind, 
                "swapping filter1 was" , filter1, "filter2 was", filter2)
    header["FILTER1"] = filter2
    header["FILTER2"] = filter1
    return header
Example #30
0
File: utils.py Project: nden/crds
 def _readonly(self, *args, **keys):
     """Compute (cache_key, func(*args, **keys)).   Do not add to cache."""
     key = self.cache_key(*args, **keys)
     if key in self.cache:
         log.verbose("Cached call", self.uncached.__name__, repr(key), verbosity=80)
         return key, self.cache[key]
     else:
         log.verbose("Uncached call", self.uncached.__name__, repr(key), verbosity=80)
         return key, self.uncached(*args, **keys)
Example #31
0
File: sql.py Project: nden/crds
 def run_query(self, query):
     """Run the string `query` on the downloaded CRDS sqlite database."""
     connection = sqlite3.connect(self.sqlite_db_path)
     cursor = connection.cursor()
     log.verbose("querying:", repr(query))
     for row in cursor.execute(query):
         print(self.format_row(row))
     connection.commit()
     connection.close()
Example #32
0
 def test_2_delete_fails(self):
     log.verbose("-"*60)
     r = rmap.ReferenceMapping.from_string(self.rmap_str, "./test.rmap", ignore_checksum=True)
     try:
         result = r.delete("shazaam.fits")
     except crds.CrdsError:
         pass
     else:
         assert False, "Expected delete to fail."    
Example #33
0
def fallback_header_wfpc2_flatfile_v1(rmap, header):
    """Compute a fallback header for WFPC2 BIASFILE."""
    filter1 = header["FILTER1"]
    filter2 = header["FILTER2"]
    log.verbose("Computing fallback header wfpc2 ", rmap.filekind,
                "swapping filter1 was", filter1, "filter2 was", filter2)
    header["FILTER1"] = filter2
    header["FILTER2"] = filter1
    return header
Example #34
0
File: sql.py Project: nden/crds
 def get_tables(self):
     """Return the list of database table names."""
     connection = sqlite3.connect(self.sqlite_db_path)
     cursor = connection.cursor()
     query = 'select name from sqlite_master where type=\'table\''
     log.verbose("querying:", repr(query))
     tables = [row[0] for row in cursor.execute(query)]
     connection.close()
     return tables
Example #35
0
 def run_query(self, query):
     """Run the string `query` on the downloaded CRDS sqlite database."""
     connection = sqlite3.connect(self.sqlite_db_path)
     cursor = connection.cursor()
     log.verbose("querying:", repr(query))
     for row in cursor.execute(query):
         print(self.format_row(row))
     connection.commit()
     connection.close()
Example #36
0
 def get_tables(self):
     """Return the list of database table names."""
     connection = sqlite3.connect(self.sqlite_db_path)
     cursor = connection.cursor()
     query = 'select name from sqlite_master where type=\'table\''
     log.verbose("querying:", repr(query))
     tables = [row[0] for row in cursor.execute(query)]
     connection.close()
     return tables
Example #37
0
    def from_filekind(cls, instrument, filekind):
        """Create the appropriate object for the type of reference file"""

        name = (instrument + '_' + filekind).lower()
        log.verbose('Instantiating rules for reference type {}.'.format(name), verbosity=25)
        if name in cls.rules:
            return  cls.rules[name]() 
        else:
            raise DeepLookError('No rules for instrument {} and reference file kind {}'.format(instrument, filekind))
Example #38
0
File: wfc3.py Project: nden/crds
def precondition_header_wfc3_biasfile_v1(rmap, header_in):
    """Mutate the incoming dataset header based upon hard coded rules
    and the header's contents.
    """
    header = dict(header_in)
    if header["SUBARRAY"] == "T" and "SUB" not in header["APERTURE"]:
        header["APERTURE"] = "N/A"
        log.verbose("Mutated APERTURE to ", repr(header["APERTURE"]),
                    "based on SUBARRAY='T' and 'SUB' not in APERTURE.")
    return header
Example #39
0
 def append_tpn_level(results, instrument, filekind):
     """Append the validator key for associated with one level of the `instrument`
     and `filekind` to `results`.
     """
     try:
         validator_key = self._reference_name_to_validator_key(filename, field, header, observatory, instrument, filekind)
         log.verbose("Adding validator key", repr(validator_key))
         results.append(validator_key)
     except Exception as exc:
         log.verbose_warning("Can't find TPN key for", (filename, instrument, filekind), ":", str(exc), verbosity=75)
Example #40
0
 def append_tpn_level(results, instrument, filekind):
     """Append the validator key for associated with one level of the `instrument`
     and `filekind` to `results`.
     """
     try:
         validator_key = self._reference_name_to_validator_key(filename, field, header, observatory, instrument, filekind)
         log.verbose("Adding validator key", repr(validator_key))
         results.append(validator_key)
     except Exception as exc:
         log.verbose_warning("Can't find TPN key for", (filename, instrument, filekind), ":", str(exc), verbosity=75)
Example #41
0
def precondition_header_wfc3_biasfile_v1(rmap, header_in):
    """Mutate the incoming dataset header based upon hard coded rules
    and the header's contents.
    """
    header = dict(header_in)
    if header["SUBARRAY"] == "T" and "SUB" not in header["APERTURE"]:
        header["APERTURE"] = "N/A"
        log.verbose("Mutated APERTURE to ", repr(header["APERTURE"]), 
                    "based on SUBARRAY='T' and 'SUB' not in APERTURE.")
    return header
Example #42
0
 def verify_archive_file(self, filename):
     """Verify the likely presence of `filename` on the archive web server.  Issue an ERROR if absent."""
     url = self.archive_url(filename)
     response = requests.head(url)
     if response.status_code in [200,]:
         log.verbose("File", repr(filename), "is available from", repr(url))
         self.check_length(filename, response)
     else:
         log.error("File", repr(filename), "failed HTTP HEAD with code =", response.status_code, "from", repr(url))
         self.missing_files.append(filename)
Example #43
0
 def __call__(self, *args, **kwargs):
     jsonrpc = self._call(*args, **kwargs)
     if jsonrpc["error"]:
         decoded = str(python23.unescape(jsonrpc["error"]["message"]))
         raise self.classify_exception(decoded)
     else:
         result = crds_decode(jsonrpc["result"])
         result = fix_strings(result)
         log.verbose("RPC OK", log.PP(result) if log.get_verbose() >= 70 else "")
         return result
Example #44
0
 def check_length(self, filename, response):
     """Check the content-length reported by HEAD against the CRDS database's file size."""
     archive_size = python23.long(response.headers["content-length"])
     crds_size = python23.long(self.file_info[filename]["size"])
     if archive_size != crds_size:
         log.error("File", repr(filename), "available but length bad.  crds size:", crds_size,
                   "archive size:", archive_size)
         self.bad_length_files.append(filename)
     else:
         log.verbose("File", repr(filename), "lengths agree:", crds_size)
Example #45
0
File: api.py Project: nden/crds
def get_dataset_headers_by_instrument(context, instrument, datasets_since=None):
    """Return { dataset_id : { header } } for `instrument`."""
    max_ids_per_rpc = get_server_info().get("max_headers_per_rpc", 5000)
    ids = get_dataset_ids(context, instrument, datasets_since)
    headers = {}
    for i in range(0, len(ids), max_ids_per_rpc):
        id_slice = ids[i : i + max_ids_per_rpc]
        log.verbose("Dumping datasets for", repr(instrument), "ids", i , "of", len(ids), verbosity=20)
        header_slice = get_dataset_headers_by_id(context, id_slice)
        headers.update(header_slice)
    return headers
Example #46
0
 def from_file(cls, filename):
     """For historical HST types,  build type info from a spec file derived from CDBS specs like
     reference_file_defs.xml or cdbscatalog.dat.  For new CRDS-only types,  use a prototype rmap
     with an enhanced header to define the type.   Prototypes should be submissible but should not
     contain references.
     """
     log.verbose("Loading type spec", repr(filename), verbosity=75)
     if filename.endswith(".spec"):
         return cls(utils.evalfile(filename))
     else:
         return cls(rmap.load_mapping(filename).header)
Example #47
0
 def __call__(self, *args, **kwargs):
     jsonrpc = self._call(*args, **kwargs)
     if jsonrpc["error"]:
         decoded = str(python23.unescape(jsonrpc["error"]["message"]))
         raise self.classify_exception(decoded)
     else:
         result = crds_decode(jsonrpc["result"])
         result = fix_strings(result)
         log.verbose("RPC OK",
                     log.PP(result) if log.get_verbose() >= 70 else "")
         return result
Example #48
0
 def check_length(self, filename, response):
     """Check the content-length reported by HEAD against the CRDS database's file size."""
     archive_size = python23.long(response.headers["content-length"])
     crds_size = python23.long(self.file_info[filename]["size"])
     if archive_size != crds_size:
         log.error("File", repr(filename),
                   "available but length bad.  crds size:", crds_size,
                   "archive size:", archive_size)
         self.bad_length_files.append(filename)
     else:
         log.verbose("File", repr(filename), "lengths agree:", crds_size)
Example #49
0
 def get_columns(self, table):
     """Return the list of database column names for `table`."""
     connection = sqlite3.connect(self.sqlite_db_path)
     connection.row_factory = sqlite3.Row
     cursor = connection.cursor()
     query = "select * from {};".format(table)
     log.verbose("querying:", repr(query))
     row = cursor.execute(query).fetchone()
     columns = row.keys()
     connection.close()
     return columns
Example #50
0
 def dump_match_tuples(self, context):
     """Print out the match tuples for `references` under `context`.
     """
     ctx = context if len(self.contexts) > 1 else ""
     for ref in self.matched_files:
         matches = self.find_match_tuples(context, ref)
         if matches:
             for match in matches:
                 log.write(ctx, ref, ":", match)
         else:
             log.verbose(ctx, ref, ":", "none")
Example #51
0
 def from_file(cls, filename):
     """For historical HST types,  build type info from a spec file derived from CDBS specs like
     reference_file_defs.xml or cdbscatalog.dat.  For new CRDS-only types,  use a prototype rmap
     with an enhanced header to define the type.   Prototypes should be submissible but should not
     contain references.
     """
     log.verbose("Loading type spec", repr(filename), verbosity=75)
     if filename.endswith(".spec"):
         return cls(utils.evalfile(filename))
     else:
         return cls(rmap.load_mapping(filename).header)
Example #52
0
File: sql.py Project: nden/crds
 def get_columns(self, table):
     """Return the list of database column names for `table`."""
     connection = sqlite3.connect(self.sqlite_db_path)
     connection.row_factory = sqlite3.Row
     cursor = connection.cursor()
     query = "select * from {};".format(table)
     log.verbose("querying:", repr(query))
     row = cursor.execute(query).fetchone()
     columns = row.keys()
     connection.close()
     return columns
Example #53
0
 def dump_match_tuples(self, context):
     """Print out the match tuples for `references` under `context`.
     """
     ctx = context if len(self.contexts) > 1 else ""  
     for ref in self.matched_files:
         matches = self.find_match_tuples(context, ref)
         if matches:
             for match in matches:
                 log.write(ctx, ref, ":", match)
         else:
             log.verbose(ctx, ref, ":", "none")
Example #54
0
File: tests.py Project: nden/crds
 def test_2_delete_fails(self):
     log.verbose("-" * 60)
     r = rmap.ReferenceMapping.from_string(self.rmap_str,
                                           "./test.rmap",
                                           ignore_checksum=True)
     try:
         result = r.delete("shazaam.fits")
     except crds.CrdsError:
         pass
     else:
         assert False, "Expected delete to fail."