def gen_hybrid_insert(dbconfig, datadict, table_name): """ Return appropriate data structures for inserting the given data dictionary into a database via a database access library. Currently using Psycopg2, so return a tuple of an INSERT template string and a sequence of values. Returns (None, None) if the given data dictionary does not contain the field names required for the hybrid table (including the 'metadata' field). """ if (not datadict): # sanity check errMsg = "(gen_hybrid_insert): Empty data dictionary cannot be inserted into table." raise errors.ProcessingError(errMsg) schema_clean = clean_id(dbconfig.get('db_schema_name')) table_clean = clean_id(table_name) required = SQL_FIELDS_HYBRID.copy() fieldnames = [clean_id(field) for field in required] num_keys = len(fieldnames) # number of keys minus 1 (w/o metadata) fieldnames.append('metadata') # add name of the JSON metadata field keys = ', '.join(fieldnames) # made from cleaned fieldnames values = [ datadict.get(key) for key in required if datadict.get(key) is not None ] num_vals = len(values) # number of values minus 1 (no metadata yet) if (num_keys == num_vals): # must have a value for each key values.append(to_JSON( datadict, sort_keys=True)) # add the JSON for the metadata field place_holders = ', '.join(['%s' for v in values]) sql_fmt_str = f"insert into {schema_clean}.{table_clean} ({keys}) values ({place_holders});" return (sql_fmt_str, values) else: # there was a mismatch of keys and values errMsg = f"Unable to find values for all {num_keys} required fields: {required}" raise errors.ProcessingError(errMsg)
def load_aliases(alias_file, debug=False, tool_name=''): """ Load field name aliases from the given alias filepath. """ if (debug): print("({}): Loading from aliases file '{}'".format( tool_name, alias_file), file=sys.stderr) try: config = configparser.ConfigParser(strict=False, empty_lines_in_values=False) config.optionxform = lambda option: option config.read_file(open(alias_file)) except FileNotFoundError: errMsg = "Aliases file '{}' not found or not readable.".format( alias_file) raise errors.ProcessingError(errMsg) try: aliases = config['aliases'] except KeyError: errMsg = "No 'aliases' section found in aliases file '{}'.".format( alias_file) raise errors.ProcessingError(errMsg) if (debug): print("({}): Read {} field name aliases.".format( tool_name, len(aliases)), file=sys.stderr) return dict(aliases)
def process(self, metadata): """ Perform the main work of the task and return the results as a Python data structure. This method overrides JWST_ObsCoreCalcTask method to use iRods file access. """ if (self._DEBUG): print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # use the specified HDU of the FITS file to compute the WCS information which_hdu = self.args.get('which_hdu', 0) # get the iRods file path argument of the file to be opened irff_path = self.args.get('irods_fits_file') try: # get the FITS file at the specified path irff = self.irods.getf(irff_path, absolute=True) # sanity check on the given FITS file if (irff.size < FITS_BLOCK_SIZE): errMsg = "Skipping file too small to be a valid FITS file: '{}'".format( irff_path) raise errors.UnsupportedType(errMsg) if (self._DEBUG): print("({}): Reading iRods FITS file '{}'.".format( self.TOOL_NAME, irff_path), file=sys.stderr) # try to get the specified header and read WCS info from it header = self.irods.get_header(irff, which_hdu) if (header): wcs_info = self.irods.get_WCS(header) else: # unable to read the specified header errMsg = "Unable to find or read HDU {} of FITS file '{}'.".format( which_hdu, irff_path) raise errors.ProcessingError(errMsg) except DataObjectDoesNotExist as dodne: errMsg = "Unable to find the specified iRods FITS file '{}'.".format( irff_path) raise errors.ProcessingError(errMsg) except OSError as oserr: errMsg = "Unable to read WCS info from iRods FITS file '{}': {}.".format( irff_path, oserr) raise errors.ProcessingError(errMsg) # check that we got the WCS information from the file if (wcs_info is None): errMsg = "No WCS info found in iRods FITS file '{}'.".format( irff_path) raise errors.ProcessingError(errMsg) # try to produce values for each of the desired result fields calculated = self.calculate_results(wcs_info, metadata) metadata['calculated'] = calculated # add calculations to metadata return metadata # return the results of processing
def process(self, _): """ Perform the main work of the task and return the results as a Python data structure. """ if (self._DEBUG): print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # process the given, already validated FITS file fits_file = self.args.get('fits_file') ignore_list = self.args.get( 'ignore_list') or fits_utils.FITS_IGNORE_KEYS which_hdu = self.args.get('which_hdu', 0) try: with fits.open(fits_file) as hdus_list: if (not fits_utils.has_image_data(hdus_list)): errMsg = f"Skipping FITS file '{fits_file}': no image data in primary HDU" raise errors.UnsupportedType(errMsg) hdrs = fits_utils.get_header_fields(hdus_list, which_hdu, ignore_list) except OSError as oserr: errMsg = "Unable to read image metadata from FITS file '{}': {}.".format( fits_file, oserr) raise errors.ProcessingError(errMsg) metadata = dict() # create overall metadata structure finfo = gather_file_info(fits_file) if (finfo is not None): # add common file information metadata['file_info'] = finfo if (hdrs is not None): # add the headers to the metadata metadata['headers'] = hdrs return metadata # return the results of processing
def update_metadata(self, imd_path, sink_data, remove_only=False): """ Attach or remove the items in the given data dictionary to/from the iRods file at the specified path. If the remove_only flag is True, file metadata items with keys matching input item keys are removed from the iRods file. """ if (self._DEBUG): print( f"({self.TOOL_NAME}.update_metadata): imd_path={imd_path}, remove_only={remove_only} metadata={sink_data}", file=sys.stderr) try: if (remove_only): # try to remove the specified metadata from the iRods file node self.irods.remove_metaf(imd_path, sink_data, absolute=True) action = 'removed from' else: # try to attach the given metadata to the iRods file node self.irods.put_metaf(imd_path, sink_data, absolute=True) action = 'attached to' except (NetworkException, Exception) as ex: errMsg = f"Unable to alter the metadata of the iRods file at '{imd_path}'. Exception: {ex}" raise errors.ProcessingError(errMsg) if (self._VERBOSE): print( f"({self.TOOL_NAME}): Metadata {action} iRods file '{imd_path}'", file=sys.stderr)
def output_results(self, metadata): """ Output the given metadata in the configured output format. """ genfile = self.args.get('gen_file_path') outfile = self.args.get('output_file') out_fmt = self.args.get('output_format') or DEFAULT_OUTPUT_FORMAT if (out_fmt == 'json'): if (genfile): # if generating the output filename/path file_info = md_utils.get_file_info(metadata) fname = file_info.get( 'file_name') if file_info else "NO_FILENAME" outfile = self.gen_output_file_path(fname, out_fmt, self.TOOL_NAME) self.output_JSON(metadata, outfile) elif (outfile is not None): # else if using the given filepath self.output_JSON(metadata, outfile) else: # else using standard output self.output_JSON(metadata) else: errMsg = "({}.process): Invalid output format '{}'.".format( self.TOOL_NAME, out_fmt) raise errors.ProcessingError(errMsg) if (self._VERBOSE): out_dest = outfile if (outfile) else STDOUT_NAME print("({}): Results output to '{}'".format( self.TOOL_NAME, out_dest), file=sys.stderr)
def input_data(self): """ Read data from a file or stream and return it as a Python data structure. """ if (self._DEBUG): print("({}.input_data): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # process the given, already validated input file input_file = self.args.get('input_file') if (self._VERBOSE): if (input_file is None): print("({}): Reading data from {}".format( self.TOOL_NAME, STDIN_NAME), file=sys.stderr) else: print("({}): Reading data file '{}'".format( self.TOOL_NAME, input_file), file=sys.stderr) input_format = self.args.get('input_format') or DEFAULT_INPUT_FORMAT if (input_format == 'json'): data = self.input_JSON(input_file) else: # currently, no other input formats errMsg = "({}.process): Invalid input format '{}'.".format( self.TOOL_NAME, input_format) raise errors.ProcessingError(errMsg) return data # return the input data
def output_results (self, indata): """ Store the given data into the configured database OR just output SQL to do so, depending on the 'output-only' flag. """ if (self._DEBUG): print("({}.output_results): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # load the database configuration from a given or default file path dbconfig_file = self.args.get('dbconfig_file') or DEFAULT_DBCONFIG_FILEPATH dbconfig = self.load_sql_db_config(dbconfig_file) # check table name to see if it is still available in the database catalog_table = self.args.get('catalog_table') if (not self.table_exists(dbconfig, catalog_table)): errMsg = "Catalog table to fill '{}' does not exist.".format(catalog_table) raise errors.ProcessingError(errMsg) # file information is needed by the SQL generation methods below file_info = md_utils.get_file_info(indata) # read the catalog table data from the input data structure data = md_utils.get_data(indata) # Decide whether we are creating a table in the DB or just outputting SQL statements. sql_only = self.args.get('output_only') if (sql_only): # if just outputting SQL self.write_table(dbconfig, data, catalog_table, file_info) else: # else creating the table in the database self.fill_table(dbconfig, data, catalog_table)
def output_results(self, metadata): """ Store the given data into the configured database OR just output SQL to do so, depending on the 'output-only' flag. """ if (self._DEBUG): print("({}.output_results): ARGS={}".format( self.TOOL_NAME, self.args), file=sys.stderr) # get the iRods file path argument of the file to be annotated imd_path = self.args.get( 'irods_md_file', self.args.get('irods_fits_file')) # default is iRods input file if ((imd_path is None) or (not imd_path.strip())): errMsg = "A full iRods path to an annotatable iRods file must be specified." raise errors.ProcessingError(errMsg) # check the iRods metadata target file path for validity try: self.irods.getf(imd_path, absolute=True) except (CollectionDoesNotExist, DataObjectDoesNotExist, NoResultFound): errMsg = "Unable to find iRods file for metadata alteration at '{}'.".format( imd_path) raise errors.ProcessingError(errMsg) # extract the data to be output from the given metadata sink_data = self.get_data_for_output(metadata) # When adding/replacing metadata, skip items in the skip list, # otherwise do not skip any items: remove all user specified items remove_only = self.args.get('remove_only') or False if (not remove_only): remove_entries(sink_data, ignore=self.skip_list) # decide whether we are changing metadata in iRods or just outputting it output_only = self.args.get('output_only') if (not output_only): # if storing metadata to iRods self.update_metadata(imd_path, sink_data, remove_only) else: # else just outputting metadata oodata = dict() oodata['file_info'] = md_utils.get_file_info(metadata) oodata['to_sink'] = sink_data super().output_results(oodata)
def load_sql_db_config(self, dbconfig_file): """ Load the database configuration from the given filepath. Returns a dictionary of database configuration parameters. """ if (self._DEBUG): print("({}): Reading DB configuration file '{}'".format( self.TOOL_NAME, dbconfig_file), file=sys.stderr) try: config = configparser.ConfigParser( interpolation=configparser.ExtendedInterpolation(), strict=False, empty_lines_in_values=False) config.read_file( open(dbconfig_file)) # try to read the DB config file except FileNotFoundError: errMsg = "DB configuration file '{}' not found or not readable.".format( dbconfig_file) raise errors.ProcessingError(errMsg) try: dbconfig = dict( config['db_properties']) # try to fetch DB properties except KeyError: e1 = 'DB storage specified but no database parameters (db_properties) found' errMsg = "{} in DB configuration file '{}'.".format( e1, dbconfig_file) raise errors.ProcessingError(errMsg) if (dbconfig.get('db_uri') is None): e1 = 'DB storage specified but no database URI (db_uri) parameter found' errMsg = "{} in DB configuration file '{}'.".format( e1, dbconfig_file) raise errors.ProcessingError(errMsg) if (self._DEBUG): print("({}): Read {} DB configuration properties.".format( self.TOOL_NAME, len(dbconfig)), file=sys.stderr) return dbconfig
def check_missing_parameters(config, required=REQUIRED_DB_PARAMETERS): """ Check the given configuration dictionary for all database parameters required by this module. :raises ProcessingError if a required database parameter is missing. """ missing = missing_entries(config, required) if (missing): errMsg = f"Missing required parameters: {missing}" raise errors.ProcessingError(errMsg)
def clean_id(identifier, allowed=DB_ID_CHARS): """ Clean the given SQL identifier to prevent SQL injection attacks. Note: that this method is specifically for simple SQL identifiers and is NOT a general solution which prevents SQL injection attacks. """ if (identifier): return keep_characters(identifier, allowed) else: errMsg = "Identifier to be cleaned cannot be empty or None." raise errors.ProcessingError(errMsg)
def load_fields_info(self, fields_file): """ Load the fields info dictionary from the given filepath and return it. The fields info file is assumed to define a single dictionary in TOML format. """ try: return toml.load( fields_file) # load fields info file as a dictionary except Exception: errMsg = "Field Information file '{}' not found or not readable.".format( fields_file) raise errors.ProcessingError(errMsg)
def select_data_for_output(self, metadata): """ Select a subset of data, from the given metadata, for output. Returns a single dictionary of selected data. """ calculated = md_utils.get_calculated(metadata) if (not calculated): errMsg = "The 'calculated' data, required by this program, is missing from the input." raise errors.ProcessingError(errMsg) selected = calculated.copy() return selected # return selected dataset
def get_data_for_output(self, metadata): """ Extract the data to be output from the given metadata. Currently, just returns a copy of the "calculated" dictionary. """ calculated = md_utils.get_calculated(metadata) if (not calculated): errMsg = "The 'calculated' data, required by this program, is missing from the input." raise errors.ProcessingError(errMsg) copied = calculated.copy() return copied
def fill_table(dbconfig, data, catalog_table): """ Insert the given list of data row lists into the named catalog table using the given DB parameters. """ if (not data): # sanity check errMsg = "(fill_table): Empty data list cannot be inserted into table." raise errors.ProcessingError(errMsg) sql_fmt_str = pg_gen.gen_insert_rows(dbconfig, catalog_table) insert_rows_sql(dbconfig, sql_fmt_str, data) # data already in the correct form return len(data) # assume all rows correctly inserted
def test_pe_code(self): pe = xcpt.ProcessingError(self.EMSG, self.ECODE) print(pe) print(type(pe)) assert pe.error_code == self.ECODE pedict = pe.to_dict() assert 'message' in pedict assert 'error_code' in pedict assert pedict.get('message') == self.EMSG assert pedict.get('error_code') == self.ECODE petup = pe.to_tuple() assert petup[0] == self.EMSG assert petup[1] == self.ECODE
def output_report(self, report): """ Output the given list of report strings in the selected format. """ rpt_fmt = self.args.get('report_format') or 'text' if (rpt_fmt == 'json'): self.output_JSON(report, sys.stderr) elif (rpt_fmt == 'text'): for line in report: print(line, file=sys.stderr) else: errMsg = "Invalid report format '{}'.".format(rpt_fmt) raise errors.ProcessingError(errMsg)
def process(self, metadata): """ Perform the main work of the task on the given metadata and return the results as a Python data structure. """ if (self._DEBUG): print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # process the given, already validated FITS file fits_file = self.args.get('fits_file') if (self._DEBUG): print("({}): Reading FITS file '{}'".format( self.TOOL_NAME, fits_file), file=sys.stderr) # compute the WCS information from the specified HDU of the FITS file which_hdu = self.args.get('which_hdu', 0) try: with fits.open(fits_file) as hdus_list: wcs_info = fits_utils.get_WCS(hdus_list, which_hdu) except OSError as oserr: errMsg = "Unable to read WCS info FITS file '{}': {}.".format( fits_file, oserr) raise errors.ProcessingError(errMsg) if (wcs_info is None): errMsg = "No WCS info found in FITS file '{}'.".format(fits_file) raise errors.ProcessingError(errMsg) # try to produce values for each of the desired result fields calculated = self.calculate_results(wcs_info, metadata) metadata['calculated'] = calculated # add calculations to metadata return metadata # return the results of processing
def calc_wcs_coordinates(wcs_info, metadata, calculations): """ Extract the WCS coordinates for the reference pixel of the current image file. Sets both s_ra and s_dec fields simultaneously when either field is processed. This method assumes that neither s_ra nor s_dec fields have a value yet and it will overwrite current values for both s_ra and s_dec if that assumption is not valid. """ if (('s_ra' in calculations) and ('s_dec' in calculations)): # avoid repetition return # exit out now ctype = list(wcs_info.wcs.ctype) md = metadata.get('headers') if (md is None): return # exit out now crval1 = md.get('CRVAL1') crval2 = md.get('CRVAL2') if (crval1 is not None and (crval2 is not None)): try: val1 = float(crval1) val2 = float(crval2) except ValueError: # on string to number conversion errMsg = "(calc_wcs_coords) Unable to convert CRVAL1 or CRVAL2 to a number." raise errors.ProcessingError(errMsg) if (ctype[0].startswith('RA')): # if CRVAL1 has the RA value calculations['s_ra'] = val1 # put CRVAL1 value into s_ra calculations['s_dec'] = val2 # put CRVAL2 value into s_dec elif (ctype[0].startswith('DEC')): # else if CRVAL1 has the DEC value calculations['s_dec'] = val1 # put CRVAL1 value into s_dec calculations['s_ra'] = val2 # put CRVAL2 value into s_ra else: errMsg = "(calc_wcs_coords) Unable to assign RA/DEC axes from ctype={}".format( ctype) raise errors.ProcessingError(errMsg)
def create_table_sql(args, dbconfig, column_names, column_formats): """ Create a new table with the given table name, columns, and types as specified by the given catalog metadata dictionary using the given DB parameters. Returns a list of cleaned SQL strings to be executed to create the table Raises ProcessingError if the column name or format vectors are not present in the input OR if the vectors are not the same size. """ if (column_names is not None and column_formats is not None and (len(column_names) == len(column_formats))): return pg_gen.gen_create_table_sql(args, dbconfig, column_names, column_formats) else: errMsg = 'Column name and format lists must be the same length.' raise errors.ProcessingError(errMsg)
def test_pe(self): pe = xcpt.ProcessingError(self.EMSG) print(pe) print(type(pe)) assert pe.error_code == xcpt.ProcessingError.ERROR_CODE pedict = pe.to_dict() assert 'message' in pedict assert 'error_code' in pedict assert pedict.get('message') == self.EMSG assert pedict.get('error_code') == xcpt.ProcessingError.ERROR_CODE petup = pe.to_tuple() assert petup[0] == self.EMSG assert petup[1] == xcpt.ProcessingError.ERROR_CODE pestr = str(pe) print(pestr) assert str(xcpt.ProcessingError.ERROR_CODE) in pestr assert 'wrong' in pestr
def process(self, _): """ Perform the main work of the task and return the results as a Python data structure. """ if (self._DEBUG): print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # process the given, already validated FITS file fits_file = self.args.get('fits_file') ignore_list = self.args.get( 'ignore_list') or fits_utils.FITS_IGNORE_KEYS catalog_hdu = self.args.get('catalog_hdu', 1) try: with fits.open(fits_file) as hdus_list: if (not fits_utils.has_catalog_data(hdus_list)): errMsg = f"Skipping FITS file '{fits_file}': no catalog in HDU 1" raise errors.UnsupportedType(errMsg) hdrs = fits_utils.get_header_fields(hdus_list, catalog_hdu, ignore_list) cinfo = fits_utils.get_column_info(hdus_list, catalog_hdu) fits_rec = hdus_list[catalog_hdu].data data = fits_utils.rows_from_data(fits_rec) table = Table.read(hdus_list, hdu=catalog_hdu) meta = fits_utils.get_table_meta_attribute(table) except OSError as oserr: errMsg = "Unable to read catalog data from FITS file '{}': {}.".format( fits_file, oserr) raise errors.ProcessingError(errMsg) outdata = dict() # create overall ouput structure finfo = gather_file_info(fits_file) if (finfo is not None): # add common file information outdata['file_info'] = finfo if (hdrs is not None): # add the headers to the output outdata['headers'] = hdrs if (cinfo is not None): # add column metadata to the output outdata['column_info'] = cinfo outdata['meta'] = meta # add extra table metadata to the output outdata['data'] = data # add the data table to the output return outdata # return the results of processing
def fits_format_to_sql(tform): """ Map the given FITS column format field into the corresponding SQL type declaration. :param tform: a FITS columnn format field for translation. :return an SQL type declaration string, corresponding to the given FITS format code. :raises ProcessingError if tform specifies a type not supported by the database. """ fmt_code = tform if (tform and len(tform) > 1): fmt_code = tform[0:1] sql_decl = _FITS_FORMAT_TO_SQL.get(fmt_code, UNSUPPORTED) if (sql_decl != UNSUPPORTED): return sql_decl else: errMsg = f"FITS data column format '{tform}' is not supported." raise errors.ProcessingError(errMsg)
def gen_column_decls_sql(column_names, column_formats): """ Generate the SQL column declarations for a table, given lists of column names and FITS format specs. :param column_names: a list of column name strings :param column_formats: a list of FITS format specifiers strings :return a list of SQL declaration strings for the table columns (no trailing commas!) :raises ProcessingError if the given column name and format lists are not the same size. """ if (len(column_names) != len(column_formats)): errMsg = "Column name and format lists must be the same length." raise errors.ProcessingError(errMsg) col_types = [fits_format_to_sql(fmt) for fmt in column_formats] col_names_clean = [clean_id(name) for name in column_names] # clean the column names return ["{0} {1}".format(n, t) for n, t in zip(col_names_clean, col_types)]
def check_missing(self, metadata): """ Check for missing fields in the results part of the given metadata. Returns a list of warning message strings, to be later formatted and output. """ report = [] calculated = md_utils.get_calculated(metadata) if (not calculated): errMsg = "The 'calculated' data, required by this program, is missing from the input." raise errors.ProcessingError(errMsg) fields_info = md_utils.get_fields_info(metadata) for field_name, props in fields_info.items(): if (field_name not in calculated): req_fld = 'Required' if props.get('required') else 'Optional' msg = "WARNING: {0} field '{1}' still does not have a value.".format( req_fld, field_name) report.append(msg) return report # return list of message strings
def gen_insert_row(dbconfig, datadict, table_name): """ Return appropriate data structures for inserting the given data dictionary into a database via a database access library. Currently using Psycopg2, so return a tuple of an INSERT template string and a sequence of values. """ if (not datadict): # sanity check errMsg = "(gen_insert_row): Empty data dictionary cannot be inserted into table." raise errors.ProcessingError(errMsg) schema_clean = clean_id(dbconfig.get('db_schema_name')) table_clean = clean_id(table_name) keys_clean = [clean_id(key) for key in datadict.keys()] keys = ', '.join(keys_clean) values = list(datadict.values()) place_holders = ', '.join(['%s' for v in values]) sql_fmt_str = f"insert into {schema_clean}.{table_clean} ({keys}) values ({place_holders});" return (sql_fmt_str, values)
def output_results(self, metadata): """ Output the given metadata in the configured output format. """ genfile = self.args.get('gen_file_path') outfile = self.args.get('output_file') if (genfile): # if generating the output filename/path file_info = md_utils.get_file_info(metadata) fname = file_info.get('file_name') if file_info else "NO_FILENAME" outfile = self.gen_output_file_path(fname, PICKLE_EXTENSION, self.TOOL_NAME) self.output_pickle(metadata, outfile) elif (outfile is not None): # else if using the given filepath self.output_pickle(metadata, outfile) else: # else trying to use standard output errMsg = "Pickle cannot be written to {}.".format(STDOUT_NAME) raise errors.ProcessingError(errMsg) if (self._VERBOSE): print("({}): Pickled data output to '{}'".format( self.TOOL_NAME, outfile), file=sys.stderr)
def process(self, _): """ Perform the main work of the task and return the results as a Python data structure. """ if (self._DEBUG): print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr) # get the selection and filtering arguments which_hdu = self.args.get('which_hdu', 0) ignore_list = self.args.get( 'ignore_list') or fits_utils.FITS_IGNORE_KEYS # get the iRods file path argument of the file to be opened irff_path = self.args.get('irods_fits_file') try: # get the FITS file at the specified path irff = self.irods.getf(irff_path, absolute=True) # sanity check on the given FITS file if (irff.size < FITS_BLOCK_SIZE): errMsg = "Skipping file too small to be a valid FITS file: '{}'".format( irff_path) raise errors.UnsupportedType(errMsg) # actually read the file to get the specified header header = self.irods.get_header(irff, which_hdu) if (header): if (not self.irods.is_image_header(header)): errMsg = "HDU {} is not an image header. Skipping FITS file '{}'.".format( which_hdu, irff_path) raise errors.ProcessingError(errMsg) # get and save some common file information file_info = self.irods.get_irods_file_info(irff) # get additional metadata ABOUT the iRods file itself irods_metadata = self.irods.get_irods_metadata(irff) # get any content metadata attached to the file content_metadata = self.irods.get_content_metadata(irff) # now try to read the FITS header from the FITS file hdrs = fits_utils.get_fields_from_header(header, ignore_list) else: # unable to read the specified header errMsg = "Unable to read image metadata from HDU {} of FITS file '{}'.".format( which_hdu, irff_path) raise errors.ProcessingError(errMsg) except DataObjectDoesNotExist as dodne: errMsg = "Unable to find the specified iRods FITS file '{}'.".format( irff_path) raise errors.ProcessingError(errMsg) except OSError as oserr: errMsg = "Unable to read image metadata from iRods FITS file '{}': {}.".format( irff_path, oserr) raise errors.ProcessingError(errMsg) metadata = dict() # create overall metadata structure metadata[ 'file_info'] = file_info # add previously gathered remote file information metadata['irods_metadata'] = irods_metadata metadata['content_metadata'] = content_metadata if (hdrs is not None): metadata['headers'] = hdrs # add the headers to the metadata return metadata # return the results of processing