def _get_config_refpath(context, cal_ver): """Given CRDS `context` and calibration s/w version `cal_ver`, identify the applicable SYSTEM CRDSCFG reference file, cache it, and return the file path. """ i = 0 while (i < len(REFPATHS)-1 and not _versions_lt(cal_ver, REFPATHS[i+1][0])): i += 1 refpath = os.path.join(HERE, REFPATHS[i][1]) try: # Use a normal try/except because exceptions are expected. header = { "META.INSTRUMENT.NAME" : "SYSTEM", "META.CALIBRATION_SOFTWARE_VERSION": cal_ver } pmap = crds.get_symbolic_mapping(context) imap = pmap.get_imap("system") rmapping = imap.get_rmap("crdscfg") ref = rmapping.get_best_ref(header) refpath = rmapping.locate_file(ref) api.dump_references(context, [ref]) except Exception: log.verbose_warning( "Failed locating SYSTEM CRDSCFG reference", "under context", repr(context), "and cal_ver", repr(cal_ver) + ". Using built-in references.") log.verbose("Using", srepr(os.path.basename(refpath)), "to determine applicable default reftypes for", srepr(cal_ver)) return refpath
def get_best_references(pipeline_context, header, reftypes=None): """Get best references for dict-like `header` relative to `pipeline_context`. pipeline_context CRDS context for lookup, e.g. 'hst_0001.pmap' header dict-like mapping { lookup_parameter : value } reftypes If None, return all reference types; otherwise return best refs for the specified list of reftypes. Returns { reftype : reference_basename ... } Raises CrdsLookupError, typically for problems with header values """ header = { str(key):str(value) for (key,value) in header.items() } try: bestrefs = S.get_best_references(pipeline_context, dict(header), reftypes) except Exception as exc: raise CrdsLookupError(str(exc)) from exc # Due to limitations of jsonrpc, exception handling is kludged in here. for filetype, refname in bestrefs.items(): if "NOT FOUND" in refname: if refname.upper() == "NOT FOUND N/A": log.verbose("Reference type", srepr(filetype), "not applicable.", verbosity=80) else: exc_str = str(refname)[len("NOT FOUND"):] raise CrdsLookupError( "Error determining best reference for", srepr(filetype), "=", repr(exc_str)) return bestrefs
def get_pipeline_types(self, pipeline, exp_type): """Based on a pipeline .cfg filename and an EXP_TYPE, look up the Steps corresponding to the .cfg and extrapolate those to the reftypes used by those Steps. If there are exceptions to the reftypes assigned for a particular Step that depend on EXP_TYPE, return the revised types for that Step instead. Return [reftypes, ...] """ steps = self.pipeline_cfgs_to_steps[pipeline] reftypes = [] for step in steps: if step not in self.loaded_cfg.steps_to_reftypes_exceptions: reftypes.extend(self.steps_to_reftypes[step]) else: for case in self.loaded_cfg.steps_to_reftypes_exceptions[step]: item = list(case.values())[0] more_reftypes = item["reftypes"][:] exptypes = item["exp_types"][:] found = False for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): log.verbose("Adding exceptional types", more_reftypes, "for step", srepr(step), "case", srepr(exptype_pattern), "based on exp_type", srepr(exp_type)) found = True reftypes.extend(more_reftypes) break if found: break else: raise exceptions.CrdsPipelineTypeDeterminationError("Unhandled EXP_TYPE for exceptional Step", srepr(step)) return reftypes
def _get_cache_filelist_and_report_errors(bestrefs): """Compute the list of files to download based on the `bestrefs` dictionary, skimming off and reporting errors, and raising an exception on the last error seen. Return the list of files to download, collapsing complex return types like tuples and dictionaries into a list of simple filenames. """ wanted = [] last_error = None for filetype, refname in bestrefs.items(): if isinstance(refname, tuple): wanted.extend(list(refname)) elif isinstance(refname, dict): wanted.extend(refname.values()) elif isinstance(refname, str): if "NOT FOUND" in refname: if "n/a" in refname.lower(): log.verbose("Reference type", srepr(filetype), "NOT FOUND. Skipping reference caching/download.", verbosity=70) else: last_error = CrdsLookupError( "Error determining best reference for", srepr(filetype), " = ", str(refname)[len("NOT FOUND"):]) log.error(str(last_error)) else: log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname)) wanted.append(refname) else: last_error = CrdsLookupError( "Unhandled bestrefs return value type for", srepr(filetype)) log.error(str(last_error)) if last_error is not None: raise last_error return wanted
def rmap_apply(self, func, *args, **keys): """Apply `func()` to *args and **keys, adding the pmap, imap, and rmap values associated with the elaboration of args.source_context, args.instruments, args.types. """ keywords = dict(keys) self._setup_source_context() if self.args.rmaps: for rmap_name in self.args.rmaps: with log.error_on_exception("Failed processing rmap", srepr(rmap_name)): log.info("="*20, "Refactoring rmap", srepr(rmap_name), "="*20) rmapping = rmap.load_mapping(rmap_name) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, new_filename=new_filename, source_context=self.source_context, **keywords) else: pmapping = rmap.load_mapping(self.source_context) instruments = pmapping.selections.keys() if "all" in self.args.instruments else self.args.instruments for instr in instruments: with log.augment_exception("Failed loading imap for", repr(instr), "from", repr(self.source_context)): imapping = pmapping.get_imap(instr) types = imapping.selections.keys() if "all" in self.args.types else self.args.types for filekind in types: with log.error_on_exception("Failed processing rmap for", repr(filekind)): #, "from", # repr(imapping.basename), "of", repr(self.source_context)): try: rmapping = imapping.get_rmap(filekind).copy() except crds.exceptions.IrrelevantReferenceTypeError as exc: log.info("Skipping type", srepr(filekind), "as N/A") continue log.info("="*20, "Refactoring rmap", srepr(rmapping.basename), "="*20) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, source_context=self.source_context, new_filename=new_filename, **keywords)
def get_best_references(pipeline_context, header, reftypes=None): """Get best references for dict-like `header` relative to `pipeline_context`. pipeline_context CRDS context for lookup, e.g. 'hst_0001.pmap' header dict-like mapping { lookup_parameter : value } reftypes If None, return all reference types; otherwise return best refs for the specified list of reftypes. Returns { reftype : reference_basename ... } Raises CrdsLookupError, typically for problems with header values """ header = { str(key):str(value) for (key,value) in header.items() } try: bestrefs = S.get_best_references(pipeline_context, dict(header), reftypes) except Exception as exc: raise CrdsLookupError(str(exc)) from exc # Due to limitations of jsonrpc, exception handling is kludged in here. for filetype, refname in bestrefs.items(): if "NOT FOUND" in refname: if refname.upper() == "NOT FOUND N/A": log.verbose("Reference type", srepr(filetype), "not applicable.", verbosity=80) else: exc_str = str(refname)[len("NOT FOUND"):] raise CrdsLookupError( "Error determining best reference for", srepr(filetype), "=", repr(exc_str)) return bestrefs
def _get_server_info(): """Fetch the server info dict. If CRDS_CONFIG_URI is set then download that URL and load json from the contents. Otherwise, call the CRDS server JSONRPC get_server_info() API. Returns server info dict """ config_uri = config.get_uri("server_config") try: if config_uri != "none": log.verbose(f"Loading config from URI '{config_uri}'.") content = utils.get_uri_content(config_uri) info = ast.literal_eval(content) info["status"] = "uri" info["connected"] = False else: config_uri = f"JSON RPC service at '{get_crds_server()}'" info = S.get_server_info() log.verbose("Connected to server at", srepr(get_crds_server())) info["status"] = "server" info["connected"] = True except Exception as exc: raise CrdsNetworkError( f"Failed downloading cache config from: {config_uri}:", srepr(exc)) from exc return info
def get_data_http(self, filename): """Yield the data returned from `filename` of `pipeline_context` in manageable chunks.""" url = self.get_url(filename) try: infile = request.urlopen(url) file_size = utils.human_format_number( self.catalog_file_size(filename)).strip() stats = utils.TimingStats() data = infile.read(config.CRDS_DATA_CHUNK_SIZE) while data: stats.increment("bytes", len(data)) status = stats.status("bytes") bytes_so_far = " ".join(status[0].split()[:-1]) log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20) yield data data = infile.read(config.CRDS_DATA_CHUNK_SIZE) except Exception as exc: raise CrdsDownloadError("Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc)) from exc finally: try: infile.close() except UnboundLocalError: # maybe the open failed. pass
def _get_cache_filelist_and_report_errors(bestrefs): """Compute the list of files to download based on the `bestrefs` dictionary, skimming off and reporting errors, and raising an exception on the last error seen. Return the list of files to download, collapsing complex return types like tuples and dictionaries into a list of simple filenames. """ wanted = [] last_error = None for filetype, refname in bestrefs.items(): if isinstance(refname, tuple): wanted.extend(list(refname)) elif isinstance(refname, dict): wanted.extend(refname.values()) elif isinstance(refname, str): if "NOT FOUND" in refname: if "n/a" in refname.lower(): log.verbose("Reference type", srepr(filetype), "NOT FOUND. Skipping reference caching/download.", verbosity=70) else: last_error = CrdsLookupError( "Error determining best reference for", srepr(filetype), " = ", str(refname)[len("NOT FOUND"):]) log.error(str(last_error)) else: log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname)) wanted.append(refname) else: last_error = CrdsLookupError( "Unhandled bestrefs return value type for", srepr(filetype)) log.error(str(last_error)) if last_error is not None: raise last_error return wanted
def get_pipeline_types(self, pipeline, exp_type): """Based on a pipeline .cfg filename and an EXP_TYPE, look up the Steps corresponding to the .cfg and extrapolate those to the reftypes used by those Steps. If there are exceptions to the reftypes assigned for a particular Step that depend on EXP_TYPE, return the revised types for that Step instead. Return [reftypes, ...] """ steps = self.pipeline_cfgs_to_steps[pipeline] reftypes = [] for step in steps: if step not in self.loaded_cfg.steps_to_reftypes_exceptions: reftypes.extend(self.steps_to_reftypes[step]) else: for case in self.loaded_cfg.steps_to_reftypes_exceptions[step]: item = list(case.values())[0] more_reftypes = item["reftypes"][:] exptypes = item["exp_types"][:] found = False for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): log.verbose("Adding exceptional types", more_reftypes, "for step", srepr(step), "case", srepr(exptype_pattern), "based on exp_type", srepr(exp_type)) found = True reftypes.extend(more_reftypes) break if found: break else: raise exceptions.CrdsPipelineTypeDeterminationError("Unhandled EXP_TYPE for exceptional Step", srepr(step)) return reftypes
def header_to_pipelines(header, context=None): """Given a dataset `header`, extract the EXP_TYPE or META.EXPOSURE.TYPE keyword from and use it to look up the pipelines required to process it. Return a list of pipeline .cfg names. """ with log.augment_exception( "Failed determining exp_type, cal_ver from header", log.PP(header)): exp_type, cal_ver = _header_to_exptype_calver(header) config_manager = _get_config_manager(context, cal_ver) pipelines = _get_pipelines(exp_type, cal_ver, context) # uncorrected if config_manager.pipeline_exceptions: # correction based on extra non-EXP_TYPE params pipelines2 = [] for cfg in pipelines: for param, exceptions in config_manager.pipeline_exceptions.items( ): exceptions = dict(exceptions) dont_replace = exceptions.pop("dont_replace") default_missing = exceptions.pop("default_missing") paramval = header.get(param.upper(), default_missing) if paramval not in dont_replace: cfg = exceptions.get(cfg, cfg) pipelines2.append(cfg) pipelines = pipelines2 log.verbose("Applicable pipelines for", srepr(exp_type), "are", srepr(pipelines)) return pipelines
def _get_config_refpath(context, cal_ver): """Given CRDS `context` and calibration s/w version `cal_ver`, identify the applicable SYSTEM CRDSCFG reference file, cache it, and return the file path. """ context = _get_missing_context(context) cal_ver = _get_missing_calver(cal_ver) i = 0 while (i < len(REFPATHS) - 1 and not _versions_lt(cal_ver, REFPATHS[i + 1][0])): i += 1 refpath = os.path.join(HERE, REFPATHS[i][1]) try: # Use a normal try/except because exceptions are expected. header = { "META.INSTRUMENT.NAME": "SYSTEM", "META.CALIBRATION_SOFTWARE_VERSION": cal_ver } pmap = crds.get_symbolic_mapping(context) imap = pmap.get_imap("system") rmapping = imap.get_rmap("crdscfg") ref = rmapping.get_best_ref(header) refpath = rmapping.locate_file(ref) api.dump_references(context, [ref]) except Exception: log.verbose_warning("Failed locating SYSTEM CRDSCFG reference", "under context", repr(context), "and cal_ver", repr(cal_ver) + ". Using built-in references.") log.verbose("Using", srepr(os.path.basename(refpath)), "to determine applicable default reftypes for", srepr(cal_ver)) return refpath
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text, new_text, *args, **keys): log.info("Adding substitution for", srepr(parameter_name), "from", srepr(old_text), "to", srepr(new_text), "in", srepr(rmapping.basename)) new_mapping = rmapping.copy() if "substitutions" not in new_mapping.header: new_mapping.header["substitutions"] = {} new_mapping.header["substitutions"][parameter_name] = { old_text : new_text } new_mapping.write(new_filename)
def del_rmap_header(rmapping, new_filename, header_key): """Set the value of `key` in `filename` to `new_value` and rewrite the rmap. This is potentially lossy since rewriting the rmap may/will lose comments and formatting quirks. """ log.verbose("Deleting header value in", srepr(rmapping.basename), "for", srepr(header_key)) del rmapping.header[header_key] rmapping.write(new_filename)
def cat_rmap(rmapping, new_filename, header_key, *args, **keys): """Cat/print rmapping's source text or the value of `header_key` in the rmap header.""" if header_key is not None: log.info("In", srepr(rmapping.basename), "parameter", srepr(header_key), "=", srepr(rmapping.header[header_key])) else: log.info("-"*80) log.info("Rmap", srepr(rmapping.basename), "is:") log.info("-"*80) log.write(str(rmapping))
def del_rmap_header(rmapping, new_filename, header_key): """Set the value of `key` in `filename` to `new_value` and rewrite the rmap. This is potentially lossy since rewriting the rmap may/will lose comments and formatting quirks. """ log.verbose("Deleting header value in", srepr(rmapping.basename), "for", srepr(header_key)) del rmapping.header[header_key] rmapping.write(new_filename)
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text, new_text, *args, **keys): log.info("Adding substitution for", srepr(parameter_name), "from", srepr(old_text), "to", srepr(new_text), "in", srepr(rmapping.basename)) new_mapping = rmapping.copy() if "substitutions" not in new_mapping.header: new_mapping.header["substitutions"] = {} new_mapping.header["substitutions"][parameter_name] = {old_text: new_text} new_mapping.write(new_filename)
def cat_rmap(rmapping, new_filename, header_key, *args, **keys): """Cat/print rmapping's source text or the value of `header_key` in the rmap header.""" if header_key is not None: log.info("In", srepr(rmapping.basename), "parameter", srepr(header_key), "=", srepr(rmapping.header[header_key])) else: log.info("-" * 80) log.info("Rmap", srepr(rmapping.basename), "is:") log.info("-" * 80) log.write(str(rmapping))
def exptype_to_reftypes(self, exp_type): """For a given EXP_TYPE string, return a list of reftypes needed to process that EXP_TYPE through the data levels appropriate for that EXP_TYPE. Return [reftypes, ... ] """ reftypes = self._crdscfg.exptypes_to_reftypes[exp_type] log.verbose("Applicable reftypes for", srepr(exp_type), "determined by", srepr(os.path.basename(self._refpath)), "are", srepr(reftypes)) return reftypes
def exptype_to_pipelines(self, exp_type): """For a given EXP_TYPE string, return a list of pipeline .cfg's needed to process that EXP_TYPE through the appropriate data levels. Return [.cfg's, ... ] """ pipelines = self._crdscfg.exptypes_to_pipelines[exp_type] log.verbose("Applicable pipelines for", srepr(exp_type), "determined by", srepr(os.path.basename(self._refpath)), "are", srepr(pipelines)) return pipelines
def exptype_to_reftypes(self, exp_type): """For a given EXP_TYPE string, return a list of reftypes needed to process that EXP_TYPE through the data levels appropriate for that EXP_TYPE. Return [reftypes, ... ] """ reftypes = self._crdscfg.exptypes_to_reftypes[exp_type] log.verbose("Applicable reftypes for", srepr(exp_type), "determined by", srepr(os.path.basename(self._refpath)), "are", srepr(reftypes)) return reftypes
def reftype_to_pipelines(reftype, cal_ver=None, context=None): """Given `exp_type` and `cal_ver` and `context`, locate the appropriate SYSTEM CRDSCFG reference file and determine the sequence of pipeline .cfgs required to process that exp_type. """ context = _get_missing_context(context) cal_ver = _get_missing_calver(cal_ver) with log.augment_exception("Failed determining required pipeline .cfgs for", "EXP_TYPE", srepr(reftype), "CAL_VER", srepr(cal_ver)): config_manager = _get_config_manager(context, cal_ver) return config_manager.reftype_to_pipelines(reftype)
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys): """Delete `parameter_name` from the parkey item of the `types` of the specified `instruments` in `context`. """ log.info("Deleting parameter", repr(parameter), "from",repr(rmapping.basename)) parkey = rmapping.parkey i, j = get_parameter_index(parkey, parameter) del_parkey = parkey[:i] + ((parkey[i][:j] + parkey[i][j+1:]),) + parkey[i+1:] log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = del_parkey rmapping.selector.delete_match_param(parameter) rmapping.write(new_filename)
def set_rmap_header(rmapping, new_filename, header_key, header_value, *args, **keys): """Set the value of `key` in `filename` to `new_value` and rewrite the rmap. This is potentially lossy since rewriting the rmap may/will lose comments and formatting quirks. """ log.verbose("Setting header value in", srepr(rmapping.basename), "for", srepr(header_key), "=", srepr(header_value)) try: rmapping.header[header_key] = eval(header_value) except Exception: rmapping.header[header_key] = header_value rmapping.write(new_filename)
def get_reftypes(exp_type, cal_ver=None, context=None): """Given `exp_type` and `cal_ver` and `context`, locate the appropriate SYSTEM CRDSCFG reference file and determine the reference types required to process every pipeline Step nominally associated with that exp_type. """ context = _get_missing_context(context) cal_ver = _get_missing_calver(cal_ver) with log.warn_on_exception("Failed determining required reftypes from", "EXP_TYPE", srepr(exp_type), "CAL_VER", srepr(cal_ver)): config_manager = _get_config_manager(context, cal_ver) return config_manager.exptype_to_reftypes(exp_type) return []
def rmap_apply(self, func, *args, **keys): """Apply `func()` to *args and **keys, adding the pmap, imap, and rmap values associated with the elaboration of args.source_context, args.instruments, args.types. """ keywords = dict(keys) self._setup_source_context() if self.args.rmaps: for rmap_name in self.args.rmaps: with log.error_on_exception("Failed processing rmap", srepr(rmap_name)): log.info("=" * 20, "Refactoring rmap", srepr(rmap_name), "=" * 20) rmapping = rmap.load_mapping(rmap_name) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify(rmapping=rmapping, new_filename=new_filename, source_context=self.source_context, **keywords) else: pmapping = rmap.load_mapping(self.source_context) instruments = pmapping.selections.keys( ) if "all" in self.args.instruments else self.args.instruments for instr in instruments: with log.augment_exception("Failed loading imap for", repr(instr), "from", repr(self.source_context)): imapping = pmapping.get_imap(instr) types = imapping.selections.keys( ) if "all" in self.args.types else self.args.types for filekind in types: with log.error_on_exception("Failed processing rmap for", repr(filekind)): #, "from", # repr(imapping.basename), "of", repr(self.source_context)): try: rmapping = imapping.get_rmap(filekind).copy() except crds.exceptions.IrrelevantReferenceTypeError as exc: log.info("Skipping type", srepr(filekind), "as N/A") continue log.info("=" * 20, "Refactoring rmap", srepr(rmapping.basename), "=" * 20) new_filename = self._process_rmap(func, rmapping=rmapping, **keywords) self._diff_and_certify( rmapping=rmapping, source_context=self.source_context, new_filename=new_filename, **keywords)
def set_rmap_header(rmapping, new_filename, header_key, header_value, *args, **keys): """Set the value of `key` in `filename` to `new_value` and rewrite the rmap. This is potentially lossy since rewriting the rmap may/will lose comments and formatting quirks. """ log.verbose("Setting header value in", srepr(rmapping.basename), "for", srepr(header_key), "=", srepr(header_value)) try: rmapping.header[header_key] = eval(header_value) except Exception: rmapping.header[header_key] = header_value rmapping.write(new_filename)
def _poll_status(self): """Use network API to pull status messages from server.""" try: messages = api.jpoll_pull_messages(self.args.key, since_id=str(self._last_id)) if messages: self._last_id = np.max([int(msg.id) for msg in messages]) return messages except exceptions.StatusChannelNotFoundError: log.verbose("Channel", srepr(self.args.key), "not found. Waiting for processing to start.") return [] except exceptions.ServiceError as exc: log.verbose("Unhandled RPC exception for", srepr(self.args.key), "is", str(exc)) raise
def get_level_pipeline(self, level, exp_type): """Interpret the level_pipeline_exptypes data structure relative to processing `level` and `exp_type` to determine a pipeline .cfg file. Return [ pipeline .cfg ] or [] """ pipeline_exptypes = self.loaded_cfg.level_pipeline_exptypes[level] for mapping in pipeline_exptypes: for pipeline, exptypes in mapping.items(): for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): return [pipeline] log.error("Unhandled EXP_TYPE", srepr(exp_type), "for", srepr(level)) return []
def _poll_status(self): """Use network API to pull status messages from server.""" try: messages = api.jpoll_pull_messages(self.args.key, since_id=str(self._last_id)) if messages: self._last_id = np.max([int(msg.id) for msg in messages]) return messages except exceptions.StatusChannelNotFoundError: log.verbose("Channel", srepr(self.args.key), "not found. Waiting for processing to start.") return [] except exceptions.ServiceError as exc: log.verbose("Unhandled RPC exception for", srepr(self.args.key), "is", str(exc)) raise
def get_level_pipeline(self, level, exp_type): """Interpret the level_pipeline_exptypes data structure relative to processing `level` and `exp_type` to determine a pipeline .cfg file. Return [ pipeline .cfg ] or [] """ pipeline_exptypes = self.loaded_cfg.level_pipeline_exptypes[level] for mapping in pipeline_exptypes: for pipeline, exptypes in mapping.items(): for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): return [pipeline] log.error("Unhandled EXP_TYPE", srepr(exp_type), "for", srepr(level)) return []
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys): """Delete `parameter_name` from the parkey item of the `types` of the specified `instruments` in `context`. """ log.info("Deleting parameter", repr(parameter), "from", repr(rmapping.basename)) parkey = rmapping.parkey i, j = get_parameter_index(parkey, parameter) del_parkey = parkey[:i] + ( (parkey[i][:j] + parkey[i][j + 1:]), ) + parkey[i + 1:] log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = del_parkey rmapping.selector.delete_match_param(parameter) rmapping.write(new_filename)
def load_tpn_lines(fname, replacements=()): """Load the lines of a CDBS .tpn file, ignoring #-comments, blank lines, and joining lines ending in \\. If a line begins with "include", the second word should be a base filename that refers to a file in the same directory as `fname`. The lines of the include file are recursively included. """ log.verbose("Loading .tpn lines from", log.srepr(fname), "with replacements", log.srepr(replacements), verbosity=80) lines = [] append = False dirname = os.path.dirname(fname) with open(fname) as pfile: for line in pfile: line = line.strip() if line.startswith("#") or not line: continue if line.startswith("include"): # include tpn_file fname2 = os.path.join(dirname, line.split()[1]) lines += load_tpn_lines(fname2, replacements) continue elif line.startswith("replace"): # replace orig_str new_str orig, replaced = replacement = tuple(line.split()[1:]) if replacement not in replacements: for replacement2 in replacements: orig2, replaced2 = replacement2 if orig == orig2 and replaced != replaced2: raise exceptions.InconsistentTpnReplaceError( "In", repr(fname), "Tpn replacement directive", repr(replacement), "conflicts with directive", repr(replacement2)) else: replacements = replacements + (replacement, ) else: log.verbose("Duplicate replacement", replacement, verbosity=80) continue for (orig, new) in replacements: line = re.sub(orig, new, line) if append: lines[-1] = lines[-1][:-1].strip() + line else: lines.append(line) append = line.endswith("\\") return lines
def get_flex_uri(filename, observatory=None): """If environment variables define the base URI for `filename`, append filename and return the combined URI. If no environment override has been specified, obtain the base URI from the server_info config, append filename, and return the combined URI. If `filename` is a config file and no environment override is defined, return "none". """ if observatory is None: observatory = get_default_observatory() uri = config.get_uri(filename) if uri == "none": info = get_server_info() if config.is_config(filename): uri = _unpack_info(info, "config_url", observatory) elif config.is_pickle(filename): uri = _unpack_info(info, "pickle_url", observatory) elif config.is_mapping(filename): uri = _unpack_info(info, "mapping_url", observatory) elif config.is_reference(filename): uri = _unpack_info(info, "reference_url", observatory) else: raise CrdsError("Can't identify file type for:", srepr(filename)) if uri == "none": return uri if not uri.endswith("/"): uri += "/" uri += filename return uri
def set_rmap_parkey(rmapping, new_filename, parkey, *args, **keys): """Set the parkey of `rmapping` to `parkey` and write out to `new_filename`. """ log.info("Setting parkey, removing all references from", srepr(rmapping.basename)) pktuple = eval(parkey) required_keywords = tuple(utils.flatten(pktuple)) refnames = rmapping.reference_names() references_headers = { refname : get_refactoring_header(rmapping.filename, refname, required_keywords) for refname in refnames } rmapping = rmap_delete_references(rmapping.filename, new_filename, refnames) log.info("Setting parkey", srepr(parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = pktuple rmapping.write(new_filename) rmapping = rmap.load_mapping(new_filename) rmapping = rmap_insert_references_by_matches(new_filename, new_filename, references_headers) return rmapping
def scan_for_nonsubmitted_ingests(self, ingest_info): """Check for junk in the submitter's ingest directory, left over files not in the current submission and fail if found. """ submitted_basenames = [ os.path.basename(filepath) for filepath in self.files ] for ingested in ingest_info.keys(): if ingested not in submitted_basenames: log.fatal_error( "Non-submitted file", log.srepr(ingested), "is already in the CRDS server's ingest directory. Delete it (--wipe-existing-files?) or submit it.")
def load_tpn_lines(fname, replacements=()): """Load the lines of a CDBS .tpn file, ignoring #-comments, blank lines, and joining lines ending in \\. If a line begins with "include", the second word should be a base filename that refers to a file in the same directory as `fname`. The lines of the include file are recursively included. """ log.verbose("Loading .tpn lines from", log.srepr(fname), "with replacements", log.srepr(replacements), verbosity=80) lines = [] append = False dirname = os.path.dirname(fname) with open(fname) as pfile: for line in pfile: line = line.strip() if line.startswith("#") or not line: continue if line.startswith("include"): # include tpn_file fname2 = os.path.join(dirname, line.split()[1]) lines += load_tpn_lines(fname2, replacements) continue elif line.startswith("replace"): # replace orig_str new_str orig, replaced = replacement = tuple(line.split()[1:]) if replacement not in replacements: for replacement2 in replacements: orig2, replaced2 = replacement2 if orig == orig2 and replaced != replaced2: raise exceptions.InconsistentTpnReplaceError( "In", repr(fname), "Tpn replacement directive", repr(replacement), "conflicts with directive", repr(replacement2)) else: replacements = replacements + (replacement,) else: log.verbose("Duplicate replacement", replacement, verbosity=80) continue for (orig, new) in replacements: line = re.sub(orig, new, line) if append: lines[-1] = lines[-1][:-1].strip() + line else: lines.append(line) append = line.endswith("\\") return lines
def _submission(self, relative_url): """Do a generic submission re-post to the specified relative_url.""" assert self.args.description is not None, "You must supply a --description for this function." self.ingest_files() log.info("Posting web request for", srepr(relative_url)) submission_args = self.get_submission_args() completion_args = self.connection.repost_start(relative_url, **submission_args) # give POST time to complete send, not response time.sleep(10) return completion_args
def download(self, name, localpath): """Download a single file.""" # This code is complicated by the desire to blow away failed downloads. For the specific # case of KeyboardInterrupt, the file needs to be blown away, but the interrupt should not # be re-characterized so it is still un-trapped elsewhere under normal idioms which try *not* # to trap KeyboardInterrupt. assert not config.get_cache_readonly(), "Readonly cache, cannot download files " + repr(name) try: utils.ensure_dir_exists(localpath) return proxy.apply_with_retries(self.download_core, name, localpath) except Exception as exc: self.remove_file(localpath) raise CrdsDownloadError( "Error fetching data for", srepr(name), "at CRDS server", srepr(get_crds_server()), "with mode", srepr(config.get_download_mode()), ":", str(exc)) from exc except: # mainly for control-c, catch it and throw it. self.remove_file(localpath) raise
def download(self, name, localpath): """Download a single file.""" # This code is complicated by the desire to blow away failed downloads. For the specific # case of KeyboardInterrupt, the file needs to be blown away, but the interrupt should not # be re-characterized so it is still un-trapped elsewhere under normal idioms which try *not* # to trap KeyboardInterrupt. assert not config.get_cache_readonly(), "Readonly cache, cannot download files " + repr(name) try: utils.ensure_dir_exists(localpath) return proxy.apply_with_retries(self.download_core, name, localpath) except Exception as exc: self.remove_file(localpath) raise CrdsDownloadError( "Error fetching data for", srepr(name), "at CRDS server", srepr(get_crds_server()), "with mode", srepr(config.get_download_mode()), ":", str(exc)) from exc except: # mainly for control-c, catch it and throw it. self.remove_file(localpath) raise
def verify_file(self, filename, localpath): """Check that the size and checksum of downloaded `filename` match the server.""" remote_info = self.info_map[filename] local_length = os.stat(localpath).st_size original_length = int(remote_info["size"]) if original_length != local_length and config.get_length_flag(): raise CrdsDownloadError( "downloaded file size", local_length, "does not match server size", original_length) if not config.get_checksum_flag(): log.verbose("Skipping sha1sum with CRDS_DOWNLOAD_CHECKSUMS=False") elif remote_info["sha1sum"] not in ["", "none"]: original_sha1sum = remote_info["sha1sum"] local_sha1sum = utils.checksum(localpath) if original_sha1sum != local_sha1sum: raise CrdsDownloadError( "downloaded file", srepr(filename), "sha1sum", srepr(local_sha1sum), "does not match server sha1sum", srepr(original_sha1sum)) else: log.verbose("Skipping sha1sum check since server doesn't know it.")
def _get_pipelines(exp_type, cal_ver=None, context=None): """Given `exp_type` and `cal_ver` and `context`, locate the appropriate SYSTEM CRDSCFG reference file and determine the sequence of pipeline .cfgs required to process that exp_type. NOTE: This is an uncorrected result, config_manager.pipeline_exceptions is used to alter this based on other header parameters. """ with log.augment_exception("Failed determining required pipeline .cfgs for", "EXP_TYPE", srepr(exp_type)): config_manager = _get_config_manager(context, cal_ver) return config_manager.exptype_to_pipelines(exp_type)
def verify_file(self, filename, localpath): """Check that the size and checksum of downloaded `filename` match the server.""" remote_info = self.info_map[filename] local_length = os.stat(localpath).st_size original_length = int(remote_info["size"]) if original_length != local_length and config.get_length_flag(): raise CrdsDownloadError( "downloaded file size", local_length, "does not match server size", original_length) if not config.get_checksum_flag(): log.verbose("Skipping sha1sum with CRDS_DOWNLOAD_CHECKSUMS=False") elif remote_info["sha1sum"] not in ["", "none"]: original_sha1sum = remote_info["sha1sum"] local_sha1sum = utils.checksum(localpath) if original_sha1sum != local_sha1sum: raise CrdsDownloadError( "downloaded file", srepr(filename), "sha1sum", srepr(local_sha1sum), "does not match server sha1sum", srepr(original_sha1sum)) else: log.verbose("Skipping sha1sum check since server doesn't know it.")
def rmap_check_modifications(old_rmap, new_rmap, old_ref, new_ref, expected=("add",)): """Check the differences between `old_rmap` and `new_rmap` and make sure they're limited to the types listed in `expected`. expected should be "add" or "replace". Returns as_expected, True IFF all rmap modifications match `expected`. """ diffs = diff.mapping_diffs(old_rmap, new_rmap) as_expected = True for difference in diffs: actual = diff.diff_action(difference) if actual in expected: pass # white-list so it will fail when expected is bogus. else: log.error("Expected one of", srepr(expected), "but got", srepr(actual), "from change", srepr(difference)) as_expected = False with open(old_rmap) as pfile: old_count = len([line for line in pfile.readlines() if os.path.basename(old_ref) in line]) with open(new_rmap) as pfile: new_count = len([line for line in pfile.readlines() if os.path.basename(new_ref) in line]) if "replace" in expected and old_count != new_count: log.error("Replacement COUNT DIFFERENCE replacing", srepr(old_ref), "with", srepr(new_ref), "in", srepr(old_rmap), old_count, "vs.", new_count) as_expected = False return as_expected
def scan_for_nonsubmitted_ingests(self, ingest_info): """Check for junk in the submitter's ingest directory, left over files not in the current submission and fail if found. """ submitted_basenames = [ os.path.basename(filepath) for filepath in self.files ] for ingested in ingest_info.keys(): if ingested not in submitted_basenames: log.fatal_error( "Non-submitted file", log.srepr(ingested), "is already in the CRDS server's ingest directory. Delete it (--wipe-existing-files?) or submit it." )
def rmap_delete_references(old_rmap, new_rmap, deleted_references): """Given the full path of starting rmap `old_rmap`, modify it by deleting all files in `deleted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.load_mapping(old_rmap, ignore_checksum=True) for reference in deleted_references: baseref = os.path.basename(reference) log.info("Deleting", srepr(baseref), "from", srepr(new.name)) with log.augment_exception("In reference", srepr(baseref)): new = new.delete(reference) new.header["derived_from"] = old.basename log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in deleted_references: reference = os.path.basename(reference) assert reference not in formatted, \ "Rules update failure. Deleted " + srepr(reference) + " still appears in new rmap." return new
def set_rmap_parkey(rmapping, new_filename, parkey, *args, **keys): """Set the parkey of `rmapping` to `parkey` and write out to `new_filename`. """ log.info("Setting parkey, removing all references from", srepr(rmapping.basename)) pktuple = eval(parkey) required_keywords = tuple(utils.flatten(pktuple)) refnames = rmapping.reference_names() references_headers = { refname: get_refactoring_header(rmapping.filename, refname, required_keywords) for refname in refnames } rmapping = rmap_delete_references(rmapping.filename, new_filename, refnames) log.info("Setting parkey", srepr(parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = pktuple rmapping.write(new_filename) rmapping = rmap.load_mapping(new_filename) rmapping = rmap_insert_references_by_matches(new_filename, new_filename, references_headers) return rmapping
def _setup_source_context(self): """Default the --source-context if necessary and then translate any symbolic name to a literal .pmap name. e.g. jwst-edit --> jwst_0109.pmap. Then optionally sync the files to a local cache. """ if self.args.source_context is None: self.source_context = self.observatory + "-edit" log.info("Defaulting --source-context to", srepr(self.source_context)) else: self.source_context = self.args.source_context self.source_context = self.resolve_context(self.source_context) if self.args.sync_files: errs = sync.SyncScript("crds.sync --contexts {}".format(self.source_context))() assert not errs, "Errors occurred while syncing all rules to CRDS cache."
def rmap_delete_references(old_rmap, new_rmap, deleted_references): """Given the full path of starting rmap `old_rmap`, modify it by deleting all files in `deleted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.load_mapping(old_rmap, ignore_checksum=True) for reference in deleted_references: baseref = os.path.basename(reference) log.info("Deleting", srepr(baseref), "from", srepr(new.name)) with log.augment_exception("In reference", srepr(baseref)): new = new.delete(reference) new.header["derived_from"] = old.basename log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in deleted_references: reference = os.path.basename(reference) assert reference not in formatted, \ "Rules update failure. Deleted " + srepr(reference) + " still appears in new rmap." return new
def scan_for_nonsubmitted_ingests(self, ingest_info): """Check for junk in the submitter's ingest directory, left over files not in the current submission and fail if found. """ submitted_basenames = [ os.path.basename(filepath) for filepath in self.files ] msg = None for ingested in ingest_info.keys(): if ingested not in submitted_basenames: msg = log.format("Non-submitted file", log.srepr(ingested), "is already in the CRDS server's ingest directory. Delete it (--wipe-existing-files or web page Upload Files panel) or submit it.") log.error(msg) if msg is not None: raise exceptions.CrdsExtraneousFileError( "Unexpected files already delivered to CRDS server. See ERROR messages.")
def rmap_insert_references(old_rmap, new_rmap, inserted_references): """Given the full path of starting rmap `old_rmap`, modify it by inserting or replacing all files in `inserted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) new.header["derived_from"] = old.basename for reference in inserted_references: baseref = os.path.basename(reference) with log.augment_exception("In reference", srepr(baseref)): log.info("Inserting", srepr(baseref), "into", srepr(new.name)) new = new.insert_reference(reference) log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in inserted_references: reference = os.path.basename(reference) assert reference in formatted, \ "Rules update failure. " + srepr(reference) + " does not appear in new rmap." \ " May be identical match with other submitted references." return new
def rmap_insert_references(old_rmap, new_rmap, inserted_references): """Given the full path of starting rmap `old_rmap`, modify it by inserting or replacing all files in `inserted_references` and write out the result to `new_rmap`. If no actions are performed, don't write out `new_rmap`. Return new ReferenceMapping named `new_rmap` """ new = old = rmap.fetch_mapping(old_rmap, ignore_checksum=True) new.header["derived_from"] = old.basename for reference in inserted_references: baseref = os.path.basename(reference) with log.augment_exception("In reference", srepr(baseref)): log.info("Inserting", srepr(baseref), "into", srepr(new.name)) new = new.insert_reference(reference) log.verbose("Writing", srepr(new_rmap)) new.write(new_rmap) formatted = new.format() for reference in inserted_references: reference = os.path.basename(reference) assert reference in formatted, \ "Rules update failure. " + srepr(reference) + " does not appear in new rmap." \ " May be identical match with other submitted references." return new
def get_server_info(): """Return a dictionary of critical parameters about the server such as: operational_context - the context in use in the operational pipeline edit_context - the context which was last edited, not necessarily archived or operational yet. crds* - the CRDS package versions on the server. This is intended as a single flexible network call which can be used to initialize a higher level getreferences() call, providing information on what context, software, and network mode should be used for processing. """ try: info = S.get_server_info() info["server"] = get_crds_server() info["reference_url"] = info.pop("reference_url")["unchecked"] info["mapping_url"] = info.pop("mapping_url")["unchecked"] return info except ServiceError as exc: raise CrdsNetworkError( "network connection failed:", srepr(get_crds_server()), ":", srepr(exc)) from exc
def get_refactoring_header(rmapping, refname, required_keywords): """Create a composite header which is derived from the file contents overidden by any values as they appear in the rmap. """ rmapping = rmap.asmapping(rmapping) # A fallback source of information is the reference file headers header = rmapping.get_refactor_header( rmap.locate_file(refname, rmapping.observatory), extra_keys=("META.OBSERVATION.DATE", "META.OBSERVATION.TIME", "DATE-OBS","TIME-OBS") + required_keywords) # The primary source of information is the original rmap and the matching values defined there headers2 = matches.find_match_paths_as_dict(rmapping.filename, refname) # Combine the two, using the rmap values to override anything duplicated in the reffile header assert len(headers2) == 1, "Can't refactor file with more than one match: " + srepr(refname) header.update(headers2[0]) return header
def plugin_download(self, filename, localpath): """Run an external program defined by CRDS_DOWNLOAD_PLUGIN to download filename to localpath.""" url = self.get_url(filename) plugin_cmd = config.get_download_plugin() plugin_cmd = plugin_cmd.replace("${SOURCE_URL}", url) plugin_cmd = plugin_cmd.replace("${OUTPUT_PATH}", localpath) log.verbose("Running download plugin:", repr(plugin_cmd)) status = os.system(plugin_cmd) if status != 0: if status == 2: raise KeyboardInterrupt("Interrupted plugin.") else: raise CrdsDownloadError( "Plugin download fail status =", repr(status), "with command:", srepr(plugin_cmd))
def get_data_http(self, filename): """Yield the data returned from `filename` of `pipeline_context` in manageable chunks.""" url = self.get_url(filename) try: infile = request.urlopen(url) file_size = utils.human_format_number(self.catalog_file_size(filename)).strip() stats = utils.TimingStats() data = infile.read(config.CRDS_DATA_CHUNK_SIZE) while data: stats.increment("bytes", len(data)) status = stats.status("bytes") bytes_so_far = " ".join(status[0].split()[:-1]) log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20) yield data data = infile.read(config.CRDS_DATA_CHUNK_SIZE) except Exception as exc: raise CrdsDownloadError( "Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc)) from exc finally: try: infile.close() except UnboundLocalError: # maybe the open failed. pass
def _process_rmap(self, func, rmapping, *args, **keys): """Execute `func` on a single `rmapping` passing along *args and **keys""" keywords = dict(keys) rmapping_org = rmapping new_filename = rmapping.filename if self.args.inplace else os.path.join(".", rmapping.basename) if os.path.exists(new_filename): log.info("Continuing refactoring from local copy", srepr(new_filename)) rmapping = rmap.load_mapping(new_filename) keywords.update(locals()) fixers = self.args.fixers if fixers: rmapping = rmap.load_mapping(rmapping.filename) keywords.update(locals()) apply_rmap_fixers(*args, **keywords) func(*args, **keywords) return new_filename