def _get_config_refpath(context, cal_ver): """Given CRDS `context` and calibration s/w version `cal_ver`, identify the applicable SYSTEM CRDSCFG reference file, cache it, and return the file path. """ context = _get_missing_context(context) cal_ver = _get_missing_calver(cal_ver) i = 0 while (i < len(REFPATHS) - 1 and not _versions_lt(cal_ver, REFPATHS[i + 1][0])): i += 1 refpath = os.path.join(HERE, REFPATHS[i][1]) try: # Use a normal try/except because exceptions are expected. header = { "META.INSTRUMENT.NAME": "SYSTEM", "META.CALIBRATION_SOFTWARE_VERSION": cal_ver } pmap = crds.get_symbolic_mapping(context) imap = pmap.get_imap("system") rmapping = imap.get_rmap("crdscfg") ref = rmapping.get_best_ref(header) refpath = rmapping.locate_file(ref) api.dump_references(context, [ref]) except Exception: log.verbose_warning("Failed locating SYSTEM CRDSCFG reference", "under context", repr(context), "and cal_ver", repr(cal_ver) + ". Using built-in references.") log.verbose("Using", srepr(os.path.basename(refpath)), "to determine applicable default reftypes for", srepr(cal_ver)) return refpath
def get_pipeline_types(self, pipeline, exp_type): """Based on a pipeline .cfg filename and an EXP_TYPE, look up the Steps corresponding to the .cfg and extrapolate those to the reftypes used by those Steps. If there are exceptions to the reftypes assigned for a particular Step that depend on EXP_TYPE, return the revised types for that Step instead. Return [reftypes, ...] """ steps = self.pipeline_cfgs_to_steps[pipeline] reftypes = [] for step in steps: if step not in self.loaded_cfg.steps_to_reftypes_exceptions: reftypes.extend(self.steps_to_reftypes[step]) else: for case in self.loaded_cfg.steps_to_reftypes_exceptions[step]: item = list(case.values())[0] more_reftypes = item["reftypes"][:] exptypes = item["exp_types"][:] found = False for exptype_pattern in exptypes: if glob_match(exptype_pattern, exp_type): log.verbose("Adding exceptional types", more_reftypes, "for step", srepr(step), "case", srepr(exptype_pattern), "based on exp_type", srepr(exp_type)) found = True reftypes.extend(more_reftypes) break if found: break else: raise exceptions.CrdsPipelineTypeDeterminationError("Unhandled EXP_TYPE for exceptional Step", srepr(step)) return reftypes
def is_applicable(self, header): """Return True IFF this Validator is applicable based upon header and the presence field of the TpnInfo. The presence field can contain an expression which is evaluated in the context of `header`. There are variations of "True" which can be returned. Some checks are designated optional (O), warning (W), or as only applying to FULL (F) frame or true SUBARRAY (S) cases. These cases return the presence character which as a non-zero length string also evaluates to True but carries extra information, particularly "optional" or "warning". """ SUBARRAY = header.get('SUBARRAY','UNDEFINED') if self._presence_condition_code: try: presence = eval(self._presence_condition_code, header, self._eval_namespace) log.verbose("Validator", self.info, "is", "applicable." if presence else "not applicable.", verbosity=70) if not presence: return False except Exception as exc: log.warning("Failed checking applicability of", repr(self.info),"skipping check : ", str(exc)) return False else: presence = self.info.presence if presence in ["O","W"]: return presence # return header.get(self.name, False) != "UNDEFINED" elif presence == "F": # IF_FULL_FRAME return validator_helpers.is_full_frame(SUBARRAY) elif presence == "S": # IF_SUBARRAY return validator_helpers.is_subarray(SUBARRAY) elif presence == "A": return validator_helpers.subarray_defined(header) else: return True
def _get_cache_filelist_and_report_errors(bestrefs): """Compute the list of files to download based on the `bestrefs` dictionary, skimming off and reporting errors, and raising an exception on the last error seen. Return the list of files to download, collapsing complex return types like tuples and dictionaries into a list of simple filenames. """ wanted = [] last_error = None for filetype, refname in bestrefs.items(): if isinstance(refname, tuple): wanted.extend(list(refname)) elif isinstance(refname, dict): wanted.extend(refname.values()) elif isinstance(refname, str): if "NOT FOUND" in refname: if "n/a" in refname.lower(): log.verbose("Reference type", srepr(filetype), "NOT FOUND. Skipping reference caching/download.", verbosity=70) else: last_error = CrdsLookupError( "Error determining best reference for", srepr(filetype), " = ", str(refname)[len("NOT FOUND"):]) log.error(str(last_error)) else: log.verbose("Reference type", srepr(filetype), "defined as", srepr(refname)) wanted.append(refname) else: last_error = CrdsLookupError( "Unhandled bestrefs return value type for", srepr(filetype)) log.error(str(last_error)) if last_error is not None: raise last_error return wanted
def _write_last_processed(self, hist): """Write down the history tuple of the last context processed.""" log.verbose("Saving last processed:", repr(hist)) log.verbose("Storing last processed state at", repr(self.last_processed_path)) utils.ensure_dir_exists(self.last_processed_path) with open(self.last_processed_path, "w+") as last: last.write(str(hist))
def locate_dir(instrument, mode=None): """Locate the instrument specific directory for a reference file.""" if mode is None: mode = config.get_crds_ref_subdir_mode(observatory="hst") else: config.check_crds_ref_subdir_mode(mode) crds_refpath = config.get_crds_refpath("hst") prefix = get_env_prefix(instrument) if mode == "legacy": # Locate cached files at the appropriate CDBS-style iref$ locations try: rootdir = os.environ[prefix] except KeyError: try: rootdir = os.environ[prefix[:-1]] except KeyError: raise KeyError("Reference location not defined for " + repr(instrument) + ". Did you configure " + repr(prefix) + "?") elif mode == "instrument" and instrument != "synphot": # use simple names inside CRDS cache. rootdir = os.path.join(crds_refpath, instrument) refdir = os.path.join(crds_refpath, prefix[:-1]) if not os.path.exists(refdir): if config.writable_cache_or_verbose("Skipping making instrument directory link for", repr(instrument)): log.verbose("Creating legacy cache link", repr(refdir), "-->", repr(rootdir)) with log.verbose_warning_on_exception("Failed creating legacy symlink:", refdir, "-->", rootdir): utils.ensure_dir_exists(rootdir + "/locate_dir.fits") os.symlink(rootdir, refdir) elif mode == "instrument" and instrument == "synphot": rootdir = os.path.join(crds_refpath, instrument) elif mode == "flat": # use original flat cache structure, all instruments in same directory. rootdir = crds_refpath else: raise ValueError("Unhandled reference file location mode " + repr(mode)) return rootdir
def upload_file(self, filepath): abs_url = self.abs_url("/upload/chunked/") response = self.session.get(abs_url) log.verbose("COOKIES:", log.PP(response.cookies)) csrf_token = response.cookies["csrftoken"] file_size = os.stat(filepath).st_size filename = os.path.basename(filepath) if file_size < _UPLOAD_CHUNK_SIZE: files = {"files": (filename, open(filepath, "rb"))} data = {"csrfmiddlewaretoken": csrf_token} self.session.post(abs_url, files=files, data=data) else: with open(filepath, "rb") as f: start_byte = 0 while True: chunk = f.read(_UPLOAD_CHUNK_SIZE) if len(chunk) == 0: break files = {"files": (filename, io.BytesIO(chunk))} data = {"csrfmiddlewaretoken": csrf_token} end_byte = start_byte + len(chunk) - 1 content_range = f"bytes {start_byte}-{end_byte}/{file_size}" headers = {"Content-Range": content_range} response = self.session.post(abs_url, files=files, data=data, headers=headers) csrf_token = response.cookies["csrftoken"] start_byte = end_byte + 1
def check_header(self, filename, header): """Evalutate the header expression associated with this validator (as its sole value) with respect to the given `header`. Read `header` from `filename` if `header` is None. """ # super(KernelunityValidator, self).check_header(filename, header) array_name = self.complex_name all_data = header[array_name].DATA.transpose() images = int(np.product(all_data.shape[:-2])) images_shape = (images,) + all_data.shape[-2:] images_data = np.reshape(all_data, images_shape) log.verbose("File=" + repr(os.path.basename(filename)), "Checking", len(images_data), repr(array_name), "kernel(s) of size", images_data[0].shape, "for individual sums of 1+-1e-6. Center pixels >= 1.") center_0 = images_data.shape[-2]//2 center_1 = images_data.shape[-1]//2 center_pixels = images_data[..., center_0, center_1] if not np.all(center_pixels >= 1.0): log.warning("Possible bad IPC Kernel: One or more kernel center pixel value(s) too small, should be >= 1.0") # raise BadKernelCenterPixelTooSmall( # "One or more kernel center pixel value(s) too small, should be >= 1.0") for (i, image) in enumerate(images_data): if abs(image.sum()-1.0) > 1.0e-6: raise BadKernelSumError("Kernel sum", image.sum(), "is not 1+-1e-6 for kernel #" + str(i), ":", repr(image))
def locate_dir(instrument, mode=None): """Locate the instrument specific directory for a reference file.""" if mode is None: mode = config.get_crds_ref_subdir_mode(observatory="tobs") else: config.check_crds_ref_subdir_mode(mode) crds_refpath = config.get_crds_refpath("tobs") prefix = get_env_prefix(instrument) if mode == "legacy": # Locate cached files at the appropriate CDBS-style iref$ locations try: rootdir = os.environ[prefix] except KeyError: try: rootdir = os.environ[prefix[:-1]] except KeyError as exc: raise KeyError("Reference location not defined for", repr(instrument), ". Did you configure", repr(prefix) + "?") from exc elif mode == "instrument": # use simple names inside CRDS cache. rootdir = os.path.join(crds_refpath, instrument) refdir = os.path.join(crds_refpath, prefix[:-1]) if not os.path.exists(refdir): if config.writable_cache_or_verbose( "Skipping making instrument directory link for", repr(instrument)): log.verbose("Creating legacy cache link", repr(refdir), "-->", repr(rootdir)) utils.ensure_dir_exists(rootdir + "/locate_dir.fits") os.symlink(rootdir, refdir) elif mode == "flat": # use original flat cache structure, all instruments in same directory. rootdir = crds_refpath else: raise ValueError("Unhandled reference file location mode " + repr(mode)) return rootdir
def fetch_files(self, context, files): """Downloads `files` as needed relative to `context` nominally used to identify the observatory to the server. If the CRDS cache is currently configured as READONLY, prints an estimate about which files will download and the total size. The estimate does not (currently) include the nested mappings associated with the closure of `files`, but the dominant costs of downloads are reference files. """ files = set([os.path.basename(_file) for _file in files]) if config.get_cache_readonly(): log.info("READONLY CACHE estimating required downloads.") if not self.args.ignore_cache: already_have = (set(rmap.list_references("*", self.observatory)) | set(rmap.list_mappings("*", self.observatory))) else: already_have = set() fetched = [ x for x in sorted(files - already_have) if not x.startswith("NOT FOUND") ] for _file in files: if _file in already_have: log.verbose("File", repr(_file), "is already in the CRDS cache.", verbosity=55) else: log.verbose("File", repr(_file), "would be downloaded.", verbosity=55) if fetched: with log.info_on_exception("File size information not available."): info_map = api.get_file_info_map(self.observatory, fetched, fields=["size"]) total_bytes = api.get_total_bytes(info_map) log.info("READONLY CACHE would download", len(fetched), "files totalling", utils.human_format_number(total_bytes).strip(), "bytes.") else: log.info("READONLY CACHE no reference downloads expected.") else: self.dump_files(context, files, self.args.ignore_cache)
def sync_datasets(self): """Sync mappings and references for datasets with respect to `self.contexts`.""" if not self.contexts: log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""") sys.exit(-1) active_references = [] for context in self.contexts: if self.args.dataset_ids: if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"): with open(self.args.dataset_ids[0][1:]) as pfile: self.args.dataset_ids = pfile.read().splitlines() with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids): id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids) for dataset in self.args.dataset_files or self.args.dataset_ids: log.info("Syncing context '%s' dataset '%s'." % (context, dataset)) with log.error_on_exception("Failed to get matching parameters from", repr(dataset)): if self.args.dataset_files: headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) } else: headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if dataset.upper() in dataset_id } for assc_dataset, header in headers.items(): with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), "under context", repr(context)): bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, ignore_cache=self.args.ignore_cache) log.verbose("Best references for", repr(assc_dataset), "are", bestrefs) active_references.extend(bestrefs.values()) active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ] log.verbose("Syncing references:", repr(active_references)) return list(set(active_references))
def get_affected(self): """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types. Returns { affected_instrument : { affected_type, ... } } """ instrs = defaultdict(set) diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which # will define both instrument and type. pmaps and imaps leave at least one blank. if len(step) == 2 and config.is_mapping(step[0]): instrument, filekind = utils.get_file_properties(self.observatory, step[0]) # This is inefficient since diff doesn't vary by step, but set logic cleans up the redundancy # New rmaps imply reprocessing the entire type. elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \ diff[-1].endswith(".rmap'"): rmap_name = diff[-1].split()[-1].replace("'","") rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True) instrument, filekind = rmapping.instrument, rmapping.filekind if instrument.strip() and filekind.strip(): if filekind not in instrs[instrument]: log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20) instrs[instrument].add(filekind) return { key:list(val) for (key, val) in instrs.items() }
def _flat_to_tpns(flat=None, schema_name=None): """Convert flat representation of DM schema to list of all TpnInfo objects.""" if flat is None: flat = _schema_to_flat(_load_schema(schema_name)) tpns = [] for key, value in flat.items(): if key.endswith(".TYPE"): basekey = str(key[:-len(".TYPE")]) legal_values = [ str(val) for val in flat.get(basekey + ".ENUM", []) ] if legal_values: legal_values += ["ANY", "N/A"] legal_values = tuple(sorted(set(legal_values))) if isinstance(value, list): value = tuple(value) datatype = SCHEMA_TYPE_TO_TPN.get(value, None) if datatype is not None: tpn = TpnInfo(name=basekey.upper(), keytype="H", datatype=datatype[0], presence=datatype[1], values=legal_values) log.verbose("Adding tpn constraint from DM schema:", repr(tpn), verbosity=65) tpns.append(tpn) else: log.warning("No TPN form for", repr(key), repr(value)) return sorted(tpns)
def remove_file(self, localpath): """Removes file at `localpath`.""" log.verbose("Removing file", repr(localpath)) try: os.remove(localpath) except Exception: log.verbose("Exception during file removal of", repr(localpath))
def header_to_pipelines(header, context=None): """Given a dataset `header`, extract the EXP_TYPE or META.EXPOSURE.TYPE keyword from and use it to look up the pipelines required to process it. Return a list of pipeline .cfg names. """ with log.augment_exception( "Failed determining exp_type, cal_ver from header", log.PP(header)): exp_type, cal_ver = _header_to_exptype_calver(header) config_manager = _get_config_manager(context, cal_ver) pipelines = _get_pipelines(exp_type, cal_ver, context) # uncorrected if config_manager.pipeline_exceptions: # correction based on extra non-EXP_TYPE params pipelines2 = [] for cfg in pipelines: for param, exceptions in config_manager.pipeline_exceptions.items( ): exceptions = dict(exceptions) dont_replace = exceptions.pop("dont_replace") default_missing = exceptions.pop("default_missing") paramval = header.get(param.upper(), default_missing) if paramval not in dont_replace: cfg = exceptions.get(cfg, cfg) pipelines2.append(cfg) pipelines = pipelines2 log.verbose("Applicable pipelines for", srepr(exp_type), "are", srepr(pipelines)) return pipelines
def verify_files(self, files): """Check `files` against the CRDS server database to ensure integrity and check reject status.""" basenames = [os.path.basename(file) for file in files] try: log.verbose("Downloading verification info for", len(basenames), "files.", verbosity=10) infos = api.get_file_info_map( observatory=self.observatory, files=basenames, fields=["size", "rejected", "blacklisted", "state", "sha1sum"]) except Exception as exc: log.error( "Failed getting file info. CACHE VERIFICATION FAILED. Exception: ", repr(str(exc))) return bytes_so_far = 0 total_bytes = api.get_total_bytes(infos) for nth_file, file in enumerate(files): bfile = os.path.basename(file) if infos[bfile] == "NOT FOUND": log.error("CRDS has no record of file", repr(bfile)) else: self.verify_file(file, infos[bfile], bytes_so_far, total_bytes, nth_file, len(files)) bytes_so_far += int(infos[bfile]["size"])
def get_row_keys(self, instrument, filekind): """Return the row_keys which define unique table rows corresponding to mapping. These are used for "mode" checks to issue warnings when unique rows are deleted in a certify comparison check against the preceding version of a table. row_keys are now also utlized to perform "affected datasets" table row lookups which essentially requires emulating that aspect of the calibration software. Consequently, row_keys now have a requirement for a higher level of fidelity since they were originally defined for mode checks, since the consequences of inadequate row keys becomes failed "affects checks" and not merely an extraneous warning. In their capacity as affected datasets parameters, row_keys must be supported by the interface which connects the CRDS server to the appropriate system dataset parameter database, DADSOPS for HST. That interface must be updated when row_keys.dat is changed. The certify mode checks have a shaky foundation since the concept of mode doesn't apply to all tables and sometimes "data" parameters are required to render rows unique. The checks only issue warnings however so they can be ignored by file submitters. For HST calibration references mapping is an rmap. """ try: return self.row_keys[instrument][filekind] except KeyError: log.verbose("No unique row keys defined for", repr((instrument, filekind))) return []
def get_affected(self): """Examine the diffs between `old_pmap` and `new_pmap` and return sorted lists of affected instruments and types. Returns { affected_instrument : { affected_type, ... } } """ instrs = defaultdict(set) diffs = self.mapping_diffs() diffs = remove_boring(diffs) for diff in diffs: for step in diff: # Walking down the diff steps 1-by-1 eventually hits an rmap comparison which # will define both instrument and type. pmaps and imaps leave at least one blank. if len(step) == 2 and rmap.is_mapping(step[0]): instrument, filekind = utils.get_file_properties(self.observatory, step[0]) # This is inefficient since diff doesn't vary by step, but set logic cleans up the redundancy # New rmaps imply reprocessing the entire type. elif isinstance(diff[-1],str) and diff[-1].startswith(("added","deleted")) and \ diff[-1].endswith(".rmap'"): rmap_name = diff[-1].split()[-1].replace("'","") rmapping = rmap.fetch_mapping(rmap_name, ignore_checksum=True) instrument, filekind = rmapping.instrument, rmapping.filekind if instrument.strip() and filekind.strip(): if filekind not in instrs[instrument]: log.verbose("Affected", (instrument, filekind), "based on diff", diff, verbosity=20) instrs[instrument].add(filekind) return { key:list(val) for (key, val) in instrs.items() }
def get_best_references(pipeline_context, header, reftypes=None): """Get best references for dict-like `header` relative to `pipeline_context`. pipeline_context CRDS context for lookup, e.g. 'hst_0001.pmap' header dict-like mapping { lookup_parameter : value } reftypes If None, return all reference types; otherwise return best refs for the specified list of reftypes. Returns { reftype : reference_basename ... } Raises CrdsLookupError, typically for problems with header values """ header = { str(key):str(value) for (key,value) in header.items() } try: bestrefs = S.get_best_references(pipeline_context, dict(header), reftypes) except Exception as exc: raise CrdsLookupError(str(exc)) from exc # Due to limitations of jsonrpc, exception handling is kludged in here. for filetype, refname in bestrefs.items(): if "NOT FOUND" in refname: if refname.upper() == "NOT FOUND N/A": log.verbose("Reference type", srepr(filetype), "not applicable.", verbosity=80) else: exc_str = str(refname)[len("NOT FOUND"):] raise CrdsLookupError( "Error determining best reference for", srepr(filetype), "=", repr(exc_str)) return bestrefs
def _get_config_refpath(context, cal_ver): """Given CRDS `context` and calibration s/w version `cal_ver`, identify the applicable SYSTEM CRDSCFG reference file, cache it, and return the file path. """ i = 0 while (i < len(REFPATHS)-1 and not _versions_lt(cal_ver, REFPATHS[i+1][0])): i += 1 refpath = os.path.join(HERE, REFPATHS[i][1]) try: # Use a normal try/except because exceptions are expected. header = { "META.INSTRUMENT.NAME" : "SYSTEM", "META.CALIBRATION_SOFTWARE_VERSION": cal_ver } pmap = crds.get_symbolic_mapping(context) imap = pmap.get_imap("system") rmapping = imap.get_rmap("crdscfg") ref = rmapping.get_best_ref(header) refpath = rmapping.locate_file(ref) api.dump_references(context, [ref]) except Exception: log.verbose_warning( "Failed locating SYSTEM CRDSCFG reference", "under context", repr(context), "and cal_ver", repr(cal_ver) + ". Using built-in references.") log.verbose("Using", srepr(os.path.basename(refpath)), "to determine applicable default reftypes for", srepr(cal_ver)) return refpath
def _call(self, *args, **kwargs): """Core of RPC dispatch without error interpretation, logging, or return value decoding.""" params = kwargs if len(kwargs) else args # if Any.kind(params) == Object and self.__version != '2.0': # raise Exception('Unsupport arg type for JSON-RPC 1.0 ' # '(the default version for this client, ' # 'pass version="2.0" to use keyword arguments)') jsonrpc_params = {"jsonrpc": self.__version, "method": self.__service_name, 'params': params, 'id': message_id() } parameters = json.dumps(jsonrpc_params) url = self._get_url(jsonrpc_params) if "serverless" in url or "server-less" in url: raise exceptions.ServiceError("Configured for server-less mode. Skipping JSON RPC " + repr(self.__service_name)) if log.get_verbose() <= 50: log.verbose("CRDS JSON RPC", self.__service_name, params if len(str(params)) <= 60 else "(...)", "-->") else: log.verbose("CRDS JSON RPC to", url, "parameters", params, "-->") response = apply_with_retries(self._call_service, parameters, url) try: rval = json.loads(response) except Exception as exc: log.warning("Invalid CRDS jsonrpc response:\n", response) raise return rval
def get_local_files(self, names): """Given a list of basename `mapping_names` which are pertinent to the given `pipeline_context`, cache the mappings locally where they can be used by CRDS. """ if isinstance(names, dict): names = names.values() localpaths = {} # Add in GEIS format "conjugate" data files, .rmaps specify only .rXh names2 = names[:] for refname in names2: if re.match(r"\w+\.r[0-9]h$", refname): names.append(refname[:-1]+"d") downloads = [] for name in names: localpath = self.locate(name) if name.lower() in ["n/a", "undefined"]: continue if not os.path.exists(localpath): downloads.append(name) elif self.ignore_cache: utils.remove(localpath, observatory=self.observatory) downloads.append(name) utils.remove(localpath, observatory=self.observatory) localpaths[name] = localpath if downloads: n_bytes = self.download_files(downloads, localpaths) else: log.verbose("Skipping download for cached files", sorted(names), verbosity=60) n_bytes = 0 return localpaths, len(downloads), n_bytes
def get_dataset_headers_by_instrument(context, instrument, datasets_since=None): """return { dataset_id:header, ...} for every `dataset_id` for `instrument`.""" log.verbose("Dumping datasets for", repr(instrument)) ids = get_dataset_ids(context, instrument, datasets_since) return dict(get_dataset_headers_unlimited(context, ids))
def get_data_http(self, filename): """Yield the data returned from `filename` of `pipeline_context` in manageable chunks.""" url = self.get_url(filename) try: infile = request.urlopen(url) file_size = utils.human_format_number( self.catalog_file_size(filename)).strip() stats = utils.TimingStats() data = infile.read(config.CRDS_DATA_CHUNK_SIZE) while data: stats.increment("bytes", len(data)) status = stats.status("bytes") bytes_so_far = " ".join(status[0].split()[:-1]) log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20) yield data data = infile.read(config.CRDS_DATA_CHUNK_SIZE) except Exception as exc: raise CrdsDownloadError("Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc)) from exc finally: try: infile.close() except UnboundLocalError: # maybe the open failed. pass
def submit(self): '''Validate submission form, upload to CRDS staging, handle server-side submission errors.''' # Client-side validation # Upload the form # Handle returned server-side errors self.validate() argv = [ "crds.submit", "--files" ] + self.files + [ "--monitor-processing", "--wait-for-completion", "--wipe-existing-files", "--certify-files", "--log-time", "--stats", "--creator", "{} Team".format(self['instrument']), "--change-level", self["change_level"], "--description", self["description"], ] log.verbose(argv) script = RedCatApiScript(argv) script._extra_redcat_parameters = dict(self) script() return SubmissionResult( error_count=script._error_count, warning_count=script._warning_count, ready_url=script._ready_url )
def _get_server_info(): """Fetch the server info dict. If CRDS_CONFIG_URI is set then download that URL and load json from the contents. Otherwise, call the CRDS server JSONRPC get_server_info() API. Returns server info dict """ config_uri = config.get_uri("server_config") try: if config_uri != "none": log.verbose(f"Loading config from URI '{config_uri}'.") content = utils.get_uri_content(config_uri) info = ast.literal_eval(content) info["status"] = "uri" info["connected"] = False else: config_uri = f"JSON RPC service at '{get_crds_server()}'" info = S.get_server_info() log.verbose("Connected to server at", srepr(get_crds_server())) info["status"] = "server" info["connected"] = True except Exception as exc: raise CrdsNetworkError( f"Failed downloading cache config from: {config_uri}:", srepr(exc)) from exc return info
def check_exptypes(self): """Based on EXP_TYPEs defined by CAL schema and the specified instrument contexts, print out log info on missing or unexpected coverage. """ for imap_name in self.contexts: i_loaded = crds.get_cached_mapping(imap_name) s_exp_types = self.locator.get_exptypes(i_loaded.instrument) for exp_type in s_exp_types: reftypes = self.locator.get_reftypes(exp_type) for filekind in i_loaded.selections: ufilekind = (i_loaded.instrument.upper(), filekind.upper()) rmap_name = i_loaded.selections[filekind] if rmap_name == 'N/A': if filekind in reftypes: log.verbose("Reftype rmap", repr(ufilekind), "is defined as N/A for", repr(exp_type)) else: r_loaded = i_loaded.get_rmap(filekind) r_exp_types = r_loaded.get_parkey_map().get("META.EXPOSURE.TYPE", None) if r_exp_types is None: # ??? log.verbose("Reftype", repr(ufilekind), "does not match using EXP_TYPE.") elif exp_type in r_exp_types: if filekind in reftypes: log.verbose("Reftype", repr(ufilekind), "explicitly mentions", repr(exp_type)) else: log.warning("Reftype", repr(ufilekind), "has unexpected coverage for", repr(exp_type)) elif "ANY" in r_exp_types or "N/A" in r_exp_types: log.verbose("Reftype", repr(ufilekind), "is satisfied by ANY or N/A for", repr(exp_type)) elif filekind in reftypes: log.info("Reftype", repr(ufilekind), "is missing coverage for", repr(exp_type)) else: log.verbose("Reftype", repr(ufilekind), "has no expected coverage for", repr(exp_type))
def check_header(self, filename, header): """Evalutate the header expression associated with this validator (as its sole value) with respect to the given `header`. Read `header` from `filename` if `header` is None. """ # super(KernelunityValidator, self).check_header(filename, header) array_name = self.complex_name all_data = header[array_name].DATA.transpose() images = int(np.product(all_data.shape[:-2])) images_shape = (images, ) + all_data.shape[-2:] images_data = np.reshape(all_data, images_shape) log.verbose("File=" + repr(os.path.basename(filename)), "Checking", len(images_data), repr(array_name), "kernel(s) of size", images_data[0].shape, "for individual sums of 1+-1e-6. Center pixels >= 1.") center_0 = images_data.shape[-2] // 2 center_1 = images_data.shape[-1] // 2 center_pixels = images_data[..., center_0, center_1] if not np.all(center_pixels >= 1.0): log.warning( "Possible bad IPC Kernel: One or more kernel center pixel value(s) too small, should be >= 1.0" ) # raise BadKernelCenterPixelTooSmall( # "One or more kernel center pixel value(s) too small, should be >= 1.0") for (i, image) in enumerate(images_data): if abs(image.sum() - 1.0) > 1.0e-6: raise BadKernelSumError("Kernel sum", image.sum(), "is not 1+-1e-6 for kernel #" + str(i), ":", repr(image))
def check_header(self, filename, header): """Evalutate the header expression associated with this validator (as its sole value) with respect to the given `header`. Note that array-based checkers are not automatically loaded during a classic header fetch and expressions can involve operations on multiple keywords or arrays. """ log.verbose("File=" + repr(os.path.basename(filename)), "Checking", repr(self.name), "condition", str(self._expr)) for keyword in expr_identifiers(self._expr): if header.get(keyword, "UNDEFINED") == "UNDEFINED": log.verbose_warning("Keyword or Array", repr(keyword), "is 'UNDEFINED'. Skipping ", repr(self._expr)) return True # fake satisfied try: satisfied = eval(self._expr_code, header, self._eval_namespace) except Exception as exc: raise RequiredConditionError("Failed checking constraint", repr(self._expr), ":", str(exc)) if not satisfied: raise RequiredConditionError("Constraint", str(self._expr), "is not satisfied.") elif satisfied == "W": # from warn_only() helper log.warning("Constraint", str(self._expr), "is not satisfied.") satisfied = True return satisfied
def _call(self, *args, **kwargs): """Core of RPC dispatch without error interpretation, logging, or return value decoding.""" params = kwargs if len(kwargs) else args jsonrpc_params = {"jsonrpc": self.__version, "method": self.__service_name, 'params': params, 'id': message_id() } parameters = json.dumps(jsonrpc_params) url = self._get_url(jsonrpc_params) if "serverless" in url or "server-less" in url: raise exceptions.ServiceError("Configured for server-less mode. Skipping JSON RPC " + repr(self.__service_name)) if log.get_verbose() <= 50: log.verbose("CRDS JSON RPC", self.__service_name, params if len(str(params)) <= 60 else "(...)", "-->") else: log.verbose("CRDS JSON RPC to", url, "parameters", params, "-->") response = apply_with_retries(self._call_service, parameters, url) try: rval = json.loads(response) except Exception as exc: log.warning("Invalid CRDS jsonrpc response:\n", response) raise return rval
def handle_updates(self, all_updates): """Write best reference updates back to dataset file headers.""" super(FileHeaderGenerator, self).handle_updates(all_updates) for source in sorted(all_updates): updates = all_updates[source] if updates: log.verbose("-" * 120) update_file_bestrefs(self.context, source, updates)
def condition(self, value): crds_name = value if "$" in value: # remove IRAF-style path prefix from SYNPHOT TMC and TMT filename column values crds_name = crds_name.split("$")[-1] if "[" in value: # split off HDU index trailer, or SYNPHOT parameterization trailer crds_name = crds_name.split("[")[0] log.verbose("Conditioned filepath", repr(value), "to", repr(crds_name)) return crds_name
def upload_file(self, relative_url, filepath): abs_url = self.abs_url(relative_url) response = self.session.get(abs_url) log.verbose("COOKIES:", log.PP(response.cookies)) csrf_token = response.cookies['csrftoken'] files = { "files" : open(filepath, "rb") } data = {'csrfmiddlewaretoken': csrf_token} self.session.post(abs_url, files=files, data=data)
def upload_file(self, relative_url, filepath): abs_url = self.abs_url(relative_url) response = self.session.get(abs_url) log.verbose("COOKIES:", log.PP(response.cookies)) csrf_token = response.cookies['csrftoken'] files = {"files": open(filepath, "rb")} data = {'csrfmiddlewaretoken': csrf_token} self.session.post(abs_url, files=files, data=data)
def fail_if_existing_lock(self): """Issue a warning if self.locked_instrument is already locked.""" response = self.get("/lock_status/"+self.username+"/") log.verbose("lock_status:", response) json_dict = utils.Struct(response.json()) if (json_dict.name and (not json_dict.is_expired) and (json_dict.type == "instrument") and (json_dict.user == self.username)): log.fatal_error("User", repr(self.username), "has already locked", repr(json_dict.name), ". Failing to avert collisions. User --logout or logout on the website to bypass.")
def del_rmap_header(rmapping, new_filename, header_key): """Set the value of `key` in `filename` to `new_value` and rewrite the rmap. This is potentially lossy since rewriting the rmap may/will lose comments and formatting quirks. """ log.verbose("Deleting header value in", srepr(rmapping.basename), "for", srepr(header_key)) del rmapping.header[header_key] rmapping.write(new_filename)
def get_ingested_files(self): """Return the server-side JSON info on the files already in the submitter's ingest directory.""" log.verbose("Querying for existing files.") result = self.connection.get('/upload/list/').json() log.verbose("JSON info on existing ingested files:\n", log.PP(result)) if "files" in result and isinstance(result["files"], list): return {info["name"]: info for info in result["files"]} return {info["name"]: info for info in result}
def possibly_remote_command(self, host, cmd, verbosity=65): """If `host` is the localhost, excecute `cmd` in subshell. Otherwise execute `cmd` by ssh.""" if host.startswith(socket.gethostname()): output = pysh.out_err(cmd, trace_commands=log.get_verbose() >= verbosity) else: output = pysh.out_err("ssh ${host} ${cmd}", trace_commands=log.get_verbose() >= verbosity) if output: log.verbose(output, verbosity=verbosity)
def get_validators(observatory, refpath): """Given `observatory` and a path to a reference file `refpath`, load the corresponding validators that define individual constraints that reference should satisfy. """ tpns = get_reffile_tpninfos(observatory, refpath) checkers = [validator(x) for x in tpns] log.verbose("Validators for", repr(refpath), "("+str(len(checkers))+"):\n", log.PP(checkers), verbosity=65) return checkers
def run_query(self, query): """Run the string `query` on the downloaded CRDS sqlite database.""" connection = sqlite3.connect(self.sqlite_db_path) cursor = connection.cursor() log.verbose("querying:", repr(query)) for row in cursor.execute(query): print(self.format_row(row)) connection.commit() connection.close()
def from_filekind(cls, instrument, filekind): """Create the appropriate object for the type of reference file""" name = (instrument + '_' + filekind).lower() log.verbose('Instantiating rules for reference type {}.'.format(name), verbosity=25) if name in cls.rules: return cls.rules[name]() else: raise DeepLookError('No rules for instrument {} and reference file kind {}'.format(instrument, filekind))
def get_tables(self): """Return the list of database table names.""" connection = sqlite3.connect(self.sqlite_db_path) cursor = connection.cursor() query = 'select name from sqlite_master where type=\'table\'' log.verbose("querying:", repr(query)) tables = [row[0] for row in cursor.execute(query)] connection.close() return tables
def rmdir(self, subdir): """If it exists, remove `subdir` of the <synphot_dir> specified. subdir string subdirectory of <synphot_dir> to remove. """ path = os.path.join(self.args.synphot_dir, subdir) log.verbose("rmdir: ", repr(path)) with log.verbose_warning_on_exception("Failed removing", repr(path)): shutil.rmtree(path)
def get_ingested_files(self): """Return the server-side JSON info on the files already in the submitter's ingest directory.""" log.info("Determining existing files.") result = self.connection.get('/upload/list/').json() log.verbose("JSON info on existing ingested files:\n", log.PP(result)) if "files" in result and isinstance(result["files"], list): return { info["name"] : info for info in result["files"] } else: return { info["name"] : info for info in result }
def test_2_delete_fails(self): log.verbose("-"*60) r = rmap.ReferenceMapping.from_string(self.rmap_str, "./test.rmap", ignore_checksum=True) try: result = r.delete("shazaam.fits") except crds.CrdsError: pass else: assert False, "Expected delete to fail."
def fallback_header_wfpc2_flatfile_v1(rmap, header): """Compute a fallback header for WFPC2 BIASFILE.""" filter1 = header["FILTER1"] filter2 = header["FILTER2"] log.verbose("Computing fallback header wfpc2 ", rmap.filekind, "swapping filter1 was" , filter1, "filter2 was", filter2) header["FILTER1"] = filter2 header["FILTER2"] = filter1 return header
def get_extra_parameters(self): '''Return the form dictionary mapping form variables to value strings for new variables being added by the streamlining project.''' with open(self.args.redcat_parameters) as f: text = f.read() log.verbose("Raw YAML read:\n", text, verbosity=75) loaded = yaml.safe_load(text) log.verbose("ReDCaT parameters:\n", log.PP(loaded)) return loaded
def precondition_header_wfc3_biasfile_v1(rmap, header_in): """Mutate the incoming dataset header based upon hard coded rules and the header's contents. """ header = dict(header_in) if header["SUBARRAY"] == "T" and "SUB" not in header["APERTURE"]: header["APERTURE"] = "N/A" log.verbose("Mutated APERTURE to ", repr(header["APERTURE"]), "based on SUBARRAY='T' and 'SUB' not in APERTURE.") return header
def scan_exp_type_coverage(): """Verify that there is some get_reftypes() response for all available exp_types.""" from . import schema as crds_schema exp_types = crds_schema.get_exptypes() for exp_type in exp_types: if exp_type in ["ANY","N/A"]: continue with log.warn_on_exception("failed determining reftypes for", repr(exp_type)): reftypes = get_reftypes(exp_type) log.verbose("Reftypes for", repr(exp_type), "=", repr(reftypes))