def rewrite(self, filename, uniqname): """Add a FITS checksum to `filename.`""" with data_file.fits_open(filename, mode="readonly", checksum=self.args.verify_file, do_not_scale_image_data=True) as hdus: verify_mode = "fix+warn" if not self.args.fits_errors else "fix+exception" if self.args.verify_file: hdus.verify(verify_mode) basefile = os.path.basename(filename) baseuniq = os.path.basename(uniqname) if self.args.add_keywords: now = datetime.datetime.utcnow() hdus[0].header["FILENAME"] = baseuniq hdus[0].header["ROOTNAME"] = os.path.splitext(baseuniq)[0].upper() hdus[0].header["HISTORY"] = "{0} renamed to {1} on {2} {3} {4}".format( basefile, baseuniq, MONTHS[now.month - 1], now.day, now.year) if self.args.output_path: uniqname = os.path.join(self.args.output_path, baseuniq) try: log.info("Rewriting", self.format_file(filename), "-->", self.format_file(uniqname)) hdus.writeto(uniqname, output_verify=verify_mode, checksum=self.args.add_checksum) except Exception as exc: if os.path.exists(uniqname): os.remove(uniqname) if "buffer is too small" in str(exc): raise CrdsError( "Failed to rename/rewrite", repr(basefile), "as", repr(baseuniq), ":", "probable file truncation", ":", str(exc)) from exc else: raise CrdsError("Failed to rename/rewrite", repr(basefile), "as", repr(baseuniq), ":", str(exc)) from exc
def check_exptypes(self): """Based on EXP_TYPEs defined by CAL schema and the specified instrument contexts, print out log info on missing or unexpected coverage. """ for imap_name in self.contexts: i_loaded = crds.get_cached_mapping(imap_name) s_exp_types = self.locator.get_exptypes(i_loaded.instrument) for exp_type in s_exp_types: reftypes = self.locator.get_reftypes(exp_type) for filekind in i_loaded.selections: ufilekind = (i_loaded.instrument.upper(), filekind.upper()) rmap_name = i_loaded.selections[filekind] if rmap_name == 'N/A': if filekind in reftypes: log.verbose("Reftype rmap", repr(ufilekind), "is defined as N/A for", repr(exp_type)) else: r_loaded = i_loaded.get_rmap(filekind) r_exp_types = r_loaded.get_parkey_map().get("META.EXPOSURE.TYPE", None) if r_exp_types is None: # ??? log.verbose("Reftype", repr(ufilekind), "does not match using EXP_TYPE.") elif exp_type in r_exp_types: if filekind in reftypes: log.verbose("Reftype", repr(ufilekind), "explicitly mentions", repr(exp_type)) else: log.warning("Reftype", repr(ufilekind), "has unexpected coverage for", repr(exp_type)) elif "ANY" in r_exp_types or "N/A" in r_exp_types: log.verbose("Reftype", repr(ufilekind), "is satisfied by ANY or N/A for", repr(exp_type)) elif filekind in reftypes: log.info("Reftype", repr(ufilekind), "is missing coverage for", repr(exp_type)) else: log.verbose("Reftype", repr(ufilekind), "has no expected coverage for", repr(exp_type))
def wfpc2_flatfile_filter(kmap): log.info("Hacking WFPC2 Flatfile.") # : ('MODE', 'FILTER1', 'FILTER2', 'IMAGETYP', 'FILTNAM1', 'FILTNAM2', 'LRFWAVE'), ('DATE-OBS', 'TIME-OBS')), kmap[('*', '*', '*', 'EXT', 'FR*', '*', '# >3000 and <=4200 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')] kmap[('*', '*', '*', 'EXT', '*', 'FR*', '# >3000 and <=4200 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')] kmap[('*', '*', '*', 'EXT', 'FR*', '*', '# >4200 and <=5800 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')] kmap[('*', '*', '*', 'EXT', '*', 'FR*', '# >4200 and <=5800 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')] kmap[('*', '*', '*', 'EXT', 'FR*', '*', '# >5800 and <=7600 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')] kmap[('*', '*', '*', 'EXT', '*', 'FR*', '# >5800 and <=7600 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')] kmap[('*', '*', '*', 'EXT', 'FR*', '*', '# >7600 and <=10000 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')] kmap[('*', '*', '*', 'EXT', '*', 'FR*', '# >7600 and <=10000 #')] = \ [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')] header_additions = [ ("hooks", { "fallback_header" : "fallback_header_wfpc2_flatfile_v1", }), ] return kmap, header_additions
def ingest_files(self): """Copy self.files into the user's ingest directory on the CRDS server.""" stats = self._start_stats() destination = self.submission_info.ingest_dir host, path = destination.split(":") self.ensure_ingest_exists(host, path) total_size = utils.total_size(self.files) ingest_info = self.get_ingested_files() self.scan_for_nonsubmitted_ingests(ingest_info) remaining_files = self.keep_existing_files(ingest_info, self.files) \ if self.args.keep_existing_files else self.files for i, filename in enumerate(remaining_files): file_size = utils.file_size(filename) log.info("Copy started", repr(filename), "[", i+1, "/", len(self.files), " files ]", "[", utils.human_format_number(file_size), "/", utils.human_format_number(total_size), " bytes ]") self.copy_file(filename, path, destination) stats.increment("bytes", file_size) stats.increment("files", 1) stats.log_status("files", "Copy complete", len(self.files)) stats.log_status("bytes", "Copy complete", total_size) log.divider(func=log.verbose) stats.report() log.divider(char="=")
def ingest_files(self): """Copy self.files into the user's ingest directory on the CRDS server.""" stats = self._start_stats() destination = self.submission_info.ingest_dir host, path = destination.split(":") self.ensure_ingest_exists(host, path) total_size = utils.total_size(self.files) ingest_info = self.get_ingested_files() self.scan_for_nonsubmitted_ingests(ingest_info) remaining_files = self.keep_existing_files(ingest_info, self.files) \ if self.args.keep_existing_files else self.files for i, filename in enumerate(remaining_files): file_size = utils.file_size(filename) log.info("Copy started", repr(filename), "[", i + 1, "/", len(self.files), " files ]", "[", utils.human_format_number(file_size), "/", utils.human_format_number(total_size), " bytes ]") self.copy_file(filename, path, destination) stats.increment("bytes", file_size) stats.increment("files", 1) stats.log_status("files", "Copy complete", len(self.files)) stats.log_status("bytes", "Copy complete", total_size) log.divider(func=log.verbose) stats.report() log.divider(char="=")
def polled(self): """Output the latest affected datasets taken from the history starting item onward. Since the history drives and ultimately precedes any affected datasets computation, there's no guarantee that every history item is available. """ assert 0 <= self.history_start < len( self.history ), "Invalid history interval with starting index " + repr( self.history_start) assert 0 <= self.history_stop < len( self.history ), "Invalid history interval with stopping index " + repr( self.history_stop) assert self.history_start <= self.history_stop, "Invalid history interval, start >= stop." effects = [] for i in range(self.history_start, self.history_stop): old_context = self.history[i][1] new_context = self.history[i + 1][1] if old_context > new_context: # skip over backward transitions, no output. continue log.info("Fetching effects for", (i, ) + self.history[i + 1]) affected = self.get_affected(old_context, new_context) if affected: effects.append((i, affected)) return effects
def sync_datasets(self): """Sync mappings and references for datasets with respect to `self.contexts`.""" if not self.contexts: log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""") sys.exit(-1) active_references = [] for context in self.contexts: if self.args.dataset_ids: if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"): with open(self.args.dataset_ids[0][1:]) as pfile: self.args.dataset_ids = pfile.read().splitlines() with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids): id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids) for dataset in self.args.dataset_files or self.args.dataset_ids: log.info("Syncing context '%s' dataset '%s'." % (context, dataset)) with log.error_on_exception("Failed to get matching parameters from", repr(dataset)): if self.args.dataset_files: headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) } else: headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if dataset.upper() in dataset_id } for assc_dataset, header in headers.items(): with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), "under context", repr(context)): bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, ignore_cache=self.args.ignore_cache) log.verbose("Best references for", repr(assc_dataset), "are", bestrefs) active_references.extend(bestrefs.values()) active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ] log.verbose("Syncing references:", repr(active_references)) return list(set(active_references))
def verify_context_change(self, old_context): """Verify that the starting and post-sync contexts are different, or issue an error.""" new_context = heavy_client.load_server_info(self.observatory).operational_context if old_context == new_context: log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context)) else: log.info("Operational context updated from", repr(old_context), "to", repr(new_context))
def wipe_files(self): """Delete all files from the user's ingest directory on the CRDS server.""" log.divider(name="wipe files", char="=") ingest_info = self.get_ingested_files() for basename in ingest_info: log.info("Wiping file", repr(basename)) self.connection.get(ingest_info[basename]["deleteUrl"])
def ingest_files(self): """Upload self.files to the user's ingest directory on the CRDS server.""" stats = self._start_stats() total_size = utils.total_size(self.files) ingest_info = self.get_ingested_files() self.scan_for_nonsubmitted_ingests(ingest_info) remaining_files = self.keep_existing_files(ingest_info, self.files) \ if self.args.keep_existing_files else self.files for i, filename in enumerate(remaining_files): file_size = utils.file_size(filename) log.info("Upload started", repr(filename), "[", i + 1, "/", len(self.files), " files ]", "[", utils.human_format_number(file_size), "/", utils.human_format_number(total_size), " bytes ]") self.connection.upload_file("/upload/new/", filename) stats.increment("bytes", file_size) stats.increment("files", 1) stats.log_status("files", "Upload complete", len(self.files)) stats.log_status("bytes", "Upload complete", total_size) log.divider(func=log.verbose) stats.report() log.divider(char="=")
def download_files(self, downloads, localpaths): """Serial file-by-file download.""" download_metadata = get_download_metadata() self.info_map = {} for filename in downloads: self.info_map[filename] = download_metadata.get( filename, "NOT FOUND unknown to server") if config.writable_cache_or_verbose( "Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70): bytes_so_far = 0 total_files = len(downloads) total_bytes = get_total_bytes(self.info_map) for nth_file, name in enumerate(downloads): try: if "NOT FOUND" in self.info_map[name]: raise CrdsDownloadError( "file is not known to CRDS server.") bytes, path = self.catalog_file_size( name), localpaths[name] log.info( file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files)) self.download(name, path) bytes_so_far += os.stat(path).st_size except Exception as exc: if self.raise_exceptions: raise else: log.error("Failure downloading file", repr(name), ":", str(exc)) return bytes_so_far return 0
def main(self): """Generate names corrsponding to files listed on the command line.""" if self.args.standard: self.args.add_keywords = True self.args.verify_file = True if not self.args.files: return for filename in self.files: assert config.is_reference(filename), \ "File " + repr(filename) + " does not appear to be a reference file. Only references can be renamed." uniqname = naming.generate_unique_name(filename, self.observatory) if self.args.dry_run: log.info("Would rename", self.format_file(filename), "-->", self.format_file(uniqname)) else: self.rewrite(filename, uniqname) if self.args.remove_original: os.remove(filename) # XXXX script returns filename result not suitable as program exit status # XXXX filename result is insufficient if multiple files are specified. # XXXX filename result supports embedded use on web server returning new name. return uniqname
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text, new_text, *args, **keys): log.info("Adding substitution for", srepr(parameter_name), "from", srepr(old_text), "to", srepr(new_text), "in", srepr(rmapping.basename)) new_mapping = rmapping.copy() if "substitutions" not in new_mapping.header: new_mapping.header["substitutions"] = {} new_mapping.header["substitutions"][parameter_name] = { old_text : new_text } new_mapping.write(new_filename)
def push_context(self): """Push the final context recorded in the local cache to the CRDS server so it can be displayed as the operational state of a pipeline. """ info = heavy_client.load_server_info(self.observatory) with log.error_on_exception("Failed pushing cached operational context name to CRDS server"): api.push_remote_context(self.observatory, "operational", self.args.push_context, info.operational_context) log.info("Pushed cached operational context name", repr(info.operational_context), "to CRDS server")
def error_and_repair(self, file, *args, **keys): """Issue an error message and repair `file` if requested by command line args.""" log.error(*args, **keys) if self.args.repair_files: if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)): log.info("Repairing file", repr(file)) utils.remove(file, observatory=self.observatory) self.dump_files(self.default_context, [file])
def get_ingested_files(self): """Return the server-side JSON info on the files already in the submitter's ingest directory.""" log.info("Determining existing files.") result = self.connection.get('/upload/list/').json() log.verbose("JSON info on existing ingested files:\n", log.PP(result)) if "files" in result and isinstance(result["files"], list): return { info["name"] : info for info in result["files"] } else: return { info["name"] : info for info in result }
def _start_stats(self): """Helper method to initialize stats keeping for ingest.""" total_bytes = utils.total_size(self.files) stats = utils.TimingStats(output=log.verbose) stats.start() log.divider(name="ingest files", char="=") log.info("Copying", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes") log.divider(func=log.verbose) return stats
def get_ingested_files(self): """Return the server-side JSON info on the files already in the submitter's ingest directory.""" log.info("Determining existing files.") result = self.connection.get('/upload/list/').json() log.verbose("JSON info on existing ingested files:\n", log.PP(result)) if "files" in result and isinstance(result["files"], list): return {info["name"]: info for info in result["files"]} else: return {info["name"]: info for info in result}
def save_json_specs(specs, combined_specs_path): """Write out the specs dictionary returned by _load_specs() as .json in one combined file.""" specs_json = json.dumps(specs, indent=4, sort_keys=True, separators=(',', ':')) with open(combined_specs_path, "w+") as specs_file: specs_file.write(specs_json) log.info("Saved combined type specs to", repr(combined_specs_path))
def _start_stats(self): """Helper method to initialize stats keeping for ingest.""" total_bytes = utils.total_size(self.files) stats = utils.TimingStats(output=log.verbose) stats.start() log.divider(name="ingest files", char="=") log.info("Uploading", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes") log.divider(func=log.verbose) return stats
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text, new_text, *args, **keys): log.info("Adding substitution for", srepr(parameter_name), "from", srepr(old_text), "to", srepr(new_text), "in", srepr(rmapping.basename)) new_mapping = rmapping.copy() if "substitutions" not in new_mapping.header: new_mapping.header["substitutions"] = {} new_mapping.header["substitutions"][parameter_name] = {old_text: new_text} new_mapping.write(new_filename)
def clear_pickles(self): """Remove all pickles.""" log.info( "Removing all context pickles. Use --save-pickles to recreate for specified contexts." ) for path in rmap.list_pickles("*.pmap", self.observatory, full_path=True): if os.path.exists(path): utils.remove(path, self.observatory)
def _submission(self, relative_url): """Do a generic submission re-post to the specified relative_url.""" assert self.args.description is not None, "You must supply a --description for this function." self.ingest_files() log.info("Posting web request for", srepr(relative_url)) submission_args = self.get_submission_args() completion_args = self.connection.repost_start(relative_url, **submission_args) # give POST time to complete send, not response time.sleep(10) return completion_args
def update_header_names(name_map): """Update the .name and .derived_from fields in mapping new_path.header to reflect derivation from old_path and name new_path. """ for old_path, new_path in sorted(name_map.items()): old_base, new_base = os.path.basename(old_path), os.path.basename(new_path) refactor.update_derivation(new_path, old_base) log.info("Adjusting name", repr(new_base), "derived_from", repr(old_base), "in", repr(new_path)) return name_map # no change
def hack_in_new_maps(old, new, updated_maps): """Given mapping named `old`, create a modified copy named `new` which installs each map of `updated_maps` in place of it's predecessor. """ copy_mapping(old, new) for mapping in sorted(updated_maps): key, replaced, replacement = insert_mapping(new, mapping) if replaced: log.info("Replaced", repr(replaced), "with", repr(replacement), "for", repr(key), "in", repr(old), "producing", repr(new)) else: log.info("Added", repr(replacement), "for", repr(key), "in", repr(old), "producing", repr(new))
def get_data_model_flat_dict(filepath): """Get the header from `filepath` using the jwst data model.""" datamodels = get_datamodels() log.info("Checking JWST datamodels.") # with log.error_on_exception("JWST Data Model (jwst.datamodels)"): try: with datamodels.open(filepath) as d_model: flat_dict = d_model.to_flat_dict(include_arrays=False) except Exception as exc: raise exceptions.ValidationError("JWST Data Models:", str(exc).replace("u'","'")) from exc return flat_dict
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys): """Delete `parameter_name` from the parkey item of the `types` of the specified `instruments` in `context`. """ log.info("Deleting parameter", repr(parameter), "from",repr(rmapping.basename)) parkey = rmapping.parkey i, j = get_parameter_index(parkey, parameter) del_parkey = parkey[:i] + ((parkey[i][:j] + parkey[i][j+1:]),) + parkey[i+1:] log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = del_parkey rmapping.selector.delete_match_param(parameter) rmapping.write(new_filename)
def wipe_files(self): """Copy self.files into the user's ingest directory on the CRDS server.""" destination = self.submission_info.ingest_dir log.divider(name="wipe files", char="=") log.info("Wiping files at", repr(destination)) host, path = destination.split(":") if destination.startswith(socket.gethostname()): output = pysh.out_err("rm -vf ${path}/*") else: output = pysh.out_err("ssh ${host} rm -vf ${path}/*") if output: log.verbose(output)
def remove_checksum(file_): """Remove checksums from `file_`.""" log.info("Removing checksum for", repr(file_)) if config.is_reference(file_): data_file.remove_checksum(file_) elif rmap.is_mapping(file_): raise exceptions.CrdsError("Mapping checksums cannot be removed for:", repr(file_)) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def main(self): """Check files for availability from the archive.""" self.require_server_connection() log.info("Mapping URL:", repr(self.mapping_url)) log.info("Reference URL:", repr(self.reference_url)) stats = utils.TimingStats() self.init_files(self.files) for filename in self.files: self.verify_archive_file(filename) stats.increment("files") self.print_files() stats.report_stat("files") log.standard_status()
def add_checksum(file_): """Add checksums to file_.""" log.info("Adding checksum for", repr(file_)) if config.is_reference(file_): with log.error_on_exception("Failed updating checksum for", repr(file_)): data_file.add_checksum(file_) elif rmap.is_mapping(file_): update_mapping_checksum(file_) else: raise exceptions.CrdsError( "File", repr(file_), "does not appear to be a CRDS reference or mapping file.")
def _setup_source_context(self): """Default the --source-context if necessary and then translate any symbolic name to a literal .pmap name. e.g. jwst-edit --> jwst_0109.pmap. Then optionally sync the files to a local cache. """ if self.args.source_context is None: self.source_context = self.observatory + "-edit" log.info("Defaulting --source-context to", srepr(self.source_context)) else: self.source_context = self.args.source_context self.source_context = self.resolve_context(self.source_context) if self.args.sync_files: errs = sync.SyncScript("crds.sync --contexts {}".format(self.source_context))() assert not errs, "Errors occurred while syncing all rules to CRDS cache."
def remove_dir(self, instrument): """Remove an instrument cache directory and any associated legacy link.""" if config.writable_cache_or_info("Skipping remove instrument", repr(instrument), "directory."): crds_refpath = config.get_crds_refpath(self.observatory) prefix = self.locator.get_env_prefix(instrument) rootdir = os.path.join(crds_refpath, instrument) refdir = os.path.join(crds_refpath, prefix[:-1]) if len(glob.glob(os.path.join(rootdir, "*"))): log.info("Residual files in '{}'. Not removing.".format(rootdir)) return if os.path.exists(refdir): # skip crds:// vs. oref utils.remove(refdir, observatory=self.observatory) utils.remove(rootdir, observatory=self.observatory)
def handle_done(self, message): """Generic "done" handler issue info() message and stops monitoring / exits.""" status = message.data["status"] result = message.data.get("result", None) if status == 0: log.info(self.format_remote("COMPLETED:", result)) elif status == 1: log.error(self.format_remote("FAILED:", result)) elif status == 2: log.error(self.format_remote("CANCELLED:", result)) else: log.info(self.format_remote("DONE:", result)) self.result = result return result
def handle_done(self, message): """Generic "done" handler issue info() message and stops monitoring / exits.""" status = message.data["status"] result = message.data.get("result", None) if status == 0: log.info(self.format_remote("COMPLETED:", result)) elif status == 1: log.fatal_error(self.format_remote("FAILED:", result)) elif status == 2: log.error(self.format_remote("CANCELLED:", result)) else: log.info(self.format_remote("DONE:", result)) self.result = result return result
def _setup_source_context(self): """Default the --source-context if necessary and then translate any symbolic name to a literal .pmap name. e.g. jwst-edit --> jwst_0109.pmap. Then optionally sync the files to a local cache. """ if self.args.source_context is None: self.source_context = self.observatory + "-edit" log.info("Defaulting --source-context to", srepr(self.source_context)) else: self.source_context = self.args.source_context self.source_context = self.resolve_context(self.source_context) if self.args.sync_files: errs = sync.SyncScript("crds.sync --contexts {}".format( self.source_context))() assert not errs, "Errors occurred while syncing all rules to CRDS cache."
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys): """Delete `parameter_name` from the parkey item of the `types` of the specified `instruments` in `context`. """ log.info("Deleting parameter", repr(parameter), "from", repr(rmapping.basename)) parkey = rmapping.parkey i, j = get_parameter_index(parkey, parameter) del_parkey = parkey[:i] + ( (parkey[i][:j] + parkey[i][j + 1:]), ) + parkey[i + 1:] log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = del_parkey rmapping.selector.delete_match_param(parameter) rmapping.write(new_filename)
def mapping_check_diffs(mapping, derived_from): """Issue warnings for *deletions* in self relative to parent derived_from mapping. Issue warnings for *reversions*, defined as replacements which where the replacement is older than the original, as defined by the names. This is intended to check for missing modes and for inadvertent reversions to earlier versions of files. For speed and simplicity, file time order is currently determined by the names themselves, not file contents, file system, or database info. """ mapping = rmap.asmapping(mapping, cached="readonly") derived_from = rmap.asmapping(derived_from, cached="readonly") log.info("Checking diffs from", repr(derived_from.basename), "to", repr(mapping.basename)) diffs = derived_from.difference(mapping) mapping_check_diffs_core(diffs)
def check_sha1sums(filepaths, observatory=None): """Given a list of filepaths which nominally will be submitted to CRDS for project `observatory`, check to see if any are bit-for-bit identical with existing files as determined by the CRDS server's catalog and sha1sum matching. filepaths [str, ...] paths of files to be checked for preexistence observartory str e.g. 'hst' or 'jwst' Returns count of duplicate files """ log.info("Checking local file sha1sums vs. CRDS server to identify files already in CRDS.") sha1sums = get_all_sha1sums(observatory) for filepath in filepaths: check_sha1sum(filepath, sha1sums, observatory)
def __init__(self, context, datasets, datasets_since): """"Contact the CRDS server and get headers for the list of `datasets` ids with respect to `context`.""" super(DatasetHeaderGenerator, self).__init__(context, datasets, datasets_since) server = api.get_crds_server() log.info("Dumping dataset parameters from CRDS server at", repr(server), "for", repr(datasets)) self.headers = api.get_dataset_headers_by_id(context, datasets) log.info("Dumped", len(self.headers), "of", len(datasets), "datasets from CRDS server at", repr(server)) # every command line id should correspond to 1 or more headers for source in self.sources: if self.matching_two_part_id(source) not in self.headers.keys(): log.warning("Dataset", repr(source), "isn't represented by downloaded parameters.") # Process according to downloaded 2-part ids, not command line ids. self.sources = sorted(self.headers.keys())
def set_rmap_parkey(rmapping, new_filename, parkey, *args, **keys): """Set the parkey of `rmapping` to `parkey` and write out to `new_filename`. """ log.info("Setting parkey, removing all references from", srepr(rmapping.basename)) pktuple = eval(parkey) required_keywords = tuple(utils.flatten(pktuple)) refnames = rmapping.reference_names() references_headers = { refname : get_refactoring_header(rmapping.filename, refname, required_keywords) for refname in refnames } rmapping = rmap_delete_references(rmapping.filename, new_filename, refnames) log.info("Setting parkey", srepr(parkey), "in", srepr(rmapping.basename)) rmapping.header["parkey"] = pktuple rmapping.write(new_filename) rmapping = rmap.load_mapping(new_filename) rmapping = rmap_insert_references_by_matches(new_filename, new_filename, references_headers) return rmapping
def _process_rmap(self, func, rmapping, *args, **keys): """Execute `func` on a single `rmapping` passing along *args and **keys""" keywords = dict(keys) rmapping_org = rmapping new_filename = rmapping.filename if self.args.inplace else os.path.join(".", rmapping.basename) if os.path.exists(new_filename): log.info("Continuing refactoring from local copy", srepr(new_filename)) rmapping = rmap.load_mapping(new_filename) keywords.update(locals()) fixers = self.args.fixers if fixers: rmapping = rmap.load_mapping(rmapping.filename) keywords.update(locals()) apply_rmap_fixers(*args, **keywords) func(*args, **keywords) return new_filename
def get_affected(self, old_context, new_context): """Return the affected datasets Struct for the transition from old_context to new_context, or None if the results aren't ready yet. """ try: affected = api.get_affected_datasets(self.observatory, old_context, new_context) except Exception as exc: if "No precomputed affected datasets results exist" in str(exc): if self.args.ignore_missing_results: log.info("No results for", old_context, "-->", new_context, "ignoring and proceeding.") affected = None else: self.fatal_error("Results for", old_context, "-->", new_context, "don't exist or are not yet complete.") else: self.fatal_error("get_affected_datasets failed: ", str(exc).replace("OtherError:","")) return affected
def cat_rmap(rmapping, new_filename, header_key, *args, **keys): """Cat/print rmapping's source text or the value of `header_key` in the rmap header.""" if header_key is not None: log.info("In", srepr(rmapping.basename), "parameter", srepr(header_key), "=", srepr(rmapping.header[header_key])) else: log.info("-"*80) log.info("Rmap", srepr(rmapping.basename), "is:") log.info("-"*80) log.write(str(rmapping))
def insert_references(self): """Insert files specified by --references into the appropriate rmaps identified by --source-context.""" self._setup_source_context() categorized = self.categorize_files(self.args.references) pmap = crds.get_pickled_mapping(self.source_context) # reviewed self.args.rmaps = [] for (instrument, filekind) in categorized: try: self.args.rmaps.append(pmap.get_imap(instrument).get_rmap(filekind).filename) except crexc.CrdsError: log.info("Existing rmap for", (instrument, filekind), "not found. Trying empty spec.") spec_file = os.path.join( os.path.dirname(self.obs_pkg.__file__), "specs", instrument + "_" + filekind + ".rmap") rmapping = rmap.asmapping(spec_file) log.info("Loaded spec file from", repr(spec_file)) self.args.rmaps.append(spec_file) self.rmap_apply(insert_rmap_references, categorized=categorized)
def polled(self): """Output the latest affected datasets taken from the history starting item onward. Since the history drives and ultimately precedes any affected datasets computation, there's no guarantee that every history item is available. """ assert 0 <= self.history_start < len(self.history), "Invalid history interval with starting index " + repr(self.history_start) assert 0 <= self.history_stop < len(self.history), "Invalid history interval with stopping index " + repr(self.history_stop) assert self.history_start <= self.history_stop, "Invalid history interval, start >= stop." effects = [] for i in range(self.history_start, self.history_stop): log.info("Fetching effects for", (i,) + self.history[i+1]) old_context = self.history[i][1] new_context = self.history[i+1][1] affected = self.get_affected(old_context, new_context) if affected: effects.append((i, affected)) return effects
def _submission(self, relative_url): """Do a generic submission re-post to the specified relative_url.""" assert self.args.description is not None, "You must supply a --description for this function." self.ingest_files() log.info("Posting web request for", srepr(relative_url)) completion_args = self.connection.repost_start( relative_url, pmap_mode = self.pmap_mode, pmap_name = self.pmap_name, instrument = self.instrument, change_level=self.args.change_level, creator=self.args.creator, description=self.args.description, auto_rename=not self.args.dont_auto_rename, compare_old_reference=not self.args.dont_compare_old_reference, ) # give POST time to complete send, not response time.sleep(10) return completion_args
def handle_misc_switches(self): """Handle command line switches with simple side-effects that should precede other sync operations. """ if self.args.dry_run: config.set_cache_readonly(True) if self.args.repair_files: self.args.check_files = True if self.args.output_dir: os.environ["CRDS_MAPPATH_SINGLE"] = self.args.output_dir os.environ["CRDS_REFPATH_SINGLE"] = self.args.output_dir os.environ["CRDS_CFGPATH_SINGLE"] = self.args.output_dir os.environ["CRDS_PICKLEPATH_SINGLE"] = self.args.output_dir if self.readonly_cache: log.info("Syncing READONLY cache, only checking functions are enabled.") log.info("All cached updates, context changes, and file downloads are inhibited.")
def log_all_ids(self, effects, ids): """PLUGIN: Summary output after all contexts processed.""" if self.args.quiet: return if not effects: log.info("No new results are available.") else: if not ids: log.info("No ids were affected.") print("#"*100, file=sys.stderr) log.info("Contributing context switches =", len(effects)) log.info("Total products affected =", len(ids)) log.standard_status()