def __init__(self, wclfile, debug=1): """ Read input wcl to initialize object """ self.input_filename = wclfile self.inputwcl = WCL() with open(wclfile, 'r') as infh: self.inputwcl.read(infh) self.debug = debug # note: WGB handled by file registration using OW_OUTPUTS_BY_SECT provdict = collections.OrderedDict({ provdefs.PROV_USED: collections.OrderedDict(), provdefs.PROV_WDF: collections.OrderedDict() }) self.outputwcl = WCL({ 'wrapper': collections.OrderedDict(), intgdefs.OW_PROV_SECT: provdict, intgdefs.OW_OUTPUTS_BY_SECT: {} }) self.last_num_derived = 0 self.last_num_meta = 0 self.curr_task = [] self.curr_exec = None
def get_filemgmt_class(args): """ Figure out which filemgmt class to use """ filemgmt_class = None archive = args['archive_name'] if args['classmgmt']: filemgmt_class = args['classmgmt'] elif args['wclfile']: if args['wclfile'] is not None: from intgutils.wcl import WCL config = WCL() with open(args['wclfile'], 'r') as configfh: config.read(configfh) if archive in config['archive']: filemgmt_class = config['archive'][archive]['filemgmt'] else: miscutils.fwdie(f"Invalid archive name ({archive})", 1) else: import despydmdb.desdmdbi as desdmdbi with desdmdbi.DesDmDbi(args['des_services'], args['section']) as dbh: curs = dbh.cursor() sql = f"select filemgmt from ops_archive where name='{archive}'" curs.execute(sql) rows = curs.fetchall() if rows: filemgmt_class = rows[0][0] else: miscutils.fwdie(f"Invalid archive name ({archive})", 1) if filemgmt_class is None or '.' not in filemgmt_class: print(f"Error: Invalid filemgmt class name ({filemgmt_class})", flush=True) print("\tMake sure it contains at least 1 period.", flush=True) miscutils.fwdie("Invalid filemgmt class name", 1) return filemgmt_class
def __init__(self, initvals=None, fullconfig=None): if not miscutils.use_db(initvals): miscutils.fwdie("Error: FileMgmtDB class requires DB but was told not to use DB", 1) self.desservices = None if 'des_services' in initvals: self.desservices = initvals['des_services'] self.section = None if 'des_db_section' in initvals: self.section = initvals['des_db_section'] elif 'section' in initvals: self.section = initvals['section'] if 'threaded' in initvals: self.threaded = initvals['threaded'] have_connect = False if 'connection' in initvals: try: desdmdbi.DesDmDbi.__init__(self, connection=initvals['connection']) have_connect = True except: miscutils.fwdebug_print('Could not connect to DB using transferred connection, falling back to new connection.') if not have_connect: try: desdmdbi.DesDmDbi.__init__(self, self.desservices, self.section) except Exception as err: miscutils.fwdie((f"Error: problem connecting to database: {err}\n" + "\tCheck desservices file and environment variables"), 1) # precedence - db, file, params self.config = WCL() if miscutils.checkTrue('get_db_config', initvals, False): self._get_config_from_db() if 'wclfile' in initvals and initvals['wclfile'] is not None: fileconfig = WCL() with open(initvals['wclfile'], 'r') as infh: fileconfig.read(infh) self.config.update(fileconfig) if fullconfig is not None: self.config.update(fullconfig) self.config.update(initvals) self.filetype = None self.ftmgmt = None self.filepat = None
def _get_config_from_db(self): """ reads some configuration values from the database """ self.config = WCL() self.config['archive'] = self.get_archive_info() self.config['filetype_metadata'] = self.get_all_filetype_metadata() self.config[fmdefs.FILE_HEADER_INFO] = self.query_results_dict('select * from OPS_FILE_HEADER', 'name')
class FileMgmtDB(desdmdbi.DesDmDbi): """ Extend core DM db class with functionality for managing files (metadata ingestion, "location" registering) """ ########################################################################### @staticmethod def requested_config_vals(): """ return dictionary describing what values this class uses along with whether they are optional or required """ return {'use_db': 'opt', 'archive': 'req', fmdefs.FILE_HEADER_INFO: 'opt', 'filetype_metadata': 'req', 'des_services': 'opt', 'des_db_section': 'req', 'connection': 'opt', 'threaded': 'opt'} ########################################################################### def __init__(self, initvals=None, fullconfig=None): if not miscutils.use_db(initvals): miscutils.fwdie("Error: FileMgmtDB class requires DB but was told not to use DB", 1) self.desservices = None if 'des_services' in initvals: self.desservices = initvals['des_services'] self.section = None if 'des_db_section' in initvals: self.section = initvals['des_db_section'] elif 'section' in initvals: self.section = initvals['section'] if 'threaded' in initvals: self.threaded = initvals['threaded'] have_connect = False if 'connection' in initvals: try: desdmdbi.DesDmDbi.__init__(self, connection=initvals['connection']) have_connect = True except: miscutils.fwdebug_print('Could not connect to DB using transferred connection, falling back to new connection.') if not have_connect: try: desdmdbi.DesDmDbi.__init__(self, self.desservices, self.section) except Exception as err: miscutils.fwdie((f"Error: problem connecting to database: {err}\n" + "\tCheck desservices file and environment variables"), 1) # precedence - db, file, params self.config = WCL() if miscutils.checkTrue('get_db_config', initvals, False): self._get_config_from_db() if 'wclfile' in initvals and initvals['wclfile'] is not None: fileconfig = WCL() with open(initvals['wclfile'], 'r') as infh: fileconfig.read(infh) self.config.update(fileconfig) if fullconfig is not None: self.config.update(fullconfig) self.config.update(initvals) self.filetype = None self.ftmgmt = None self.filepat = None ########################################################################### def _get_config_from_db(self): """ reads some configuration values from the database """ self.config = WCL() self.config['archive'] = self.get_archive_info() self.config['filetype_metadata'] = self.get_all_filetype_metadata() self.config[fmdefs.FILE_HEADER_INFO] = self.query_results_dict('select * from OPS_FILE_HEADER', 'name') ########################################################################### def register_file_in_archive(self, filelist, archive_name): """ Saves filesystem information about file like relative path in archive, compression extension, etc """ # assumes files have already been declared to database (i.e., metadata) # caller of program must have already verified given filelist matches given archive # if giving fullnames, must include archive root # keys to each file dict must be lowercase column names, missing data must be None #if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): # miscutils.fwdebug_print("filelist = %s" % filelist) archivedict = self.config['archive'][archive_name] archiveroot = archivedict['root'] origfilelist = filelist if isinstance(origfilelist, str): filelist = [origfilelist] if filelist: # get id from desfile table gtt_name = self.load_filename_gtt(filelist) idsql = f"""select d.filename, d.compression, d.id from desfile d, {gtt_name} g where d.filename=g.filename and nullcmp(d.compression, g.compression) = 1""" ids = {} curs = self.cursor() curs.execute(idsql) for row in curs: ids[row[0]] = {row[1]: row[2]} #self.empty_gtt(gtt_name) # create dict of info to insert into file_archive_info insfilelist = [] for onefile in filelist: nfiledict = {} nfiledict['archive_name'] = archive_name if isinstance(onefile, dict): if 'filename' in onefile and 'path' in onefile and 'compression' in onefile: nfiledict['filename'] = onefile['filename'] nfiledict['compression'] = onefile['compression'] path = onefile['path'] elif 'fullname' in onefile: parsemask = miscutils.CU_PARSE_PATH | \ miscutils.CU_PARSE_FILENAME | \ miscutils.CU_PARSE_COMPRESSION (path, nfiledict['filename'], nfiledict['compression']) = miscutils.parse_fullname(onefile['fullname'], parsemask) else: miscutils.fwdie(f"Error: Incomplete info for a file to register. Given {onefile}", 1) elif isinstance(onefile, str): # fullname parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION (path, nfiledict['filename'], nfiledict['compression']) = miscutils.parse_fullname(onefile, parsemask) # make sure compression starts with . if nfiledict['compression'] is not None and not re.match(r'^\.', nfiledict['compression']): nfiledict['compression'] = '.' + nfiledict['compression'] # get matching desfile id if nfiledict['filename'] in ids: if nfiledict['compression'] in ids[nfiledict['filename']]: nfiledict['desfile_id'] = int(ids[nfiledict['filename']][nfiledict['compression']]) else: raise ValueError(f'Missing desfile id for file - no matching compression ({onefile})') else: raise ValueError(f'Missing desfile id for file - no matching filename ({onefile})') if re.match(r'^/', path): # if path is absolute #if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): # miscutils.fwdebug_print("absolute path = %s" % path) # miscutils.fwdebug_print("archiveroot = %s/" % archiveroot) # get rid of the archive root from the path to store if re.match(fr'^{archiveroot}/', path): nfiledict['path'] = path[len(archiveroot) + 1:] else: canon_archroot = os.path.realpath(archiveroot) canon_path = os.path.realpath(path) # get rid of the archive root from the path to store if re.match(fr'^{canon_archroot}/', canon_path): nfiledict['path'] = canon_path[len(canon_archroot) + 1:] else: miscutils.fwdie((f"Error: file's absolute path ({path}) does not " + f"contain the archive root ({archiveroot}) (filedict:{nfiledict})"), 1) else: #if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): # miscutils.fwdebug_print("relative path = %s" % path) nfiledict['path'] = path # assume only contains the relative path within the archive insfilelist.append(nfiledict) colnames = ['desfile_id', 'filename', 'compression', 'path', 'archive_name'] try: self.insert_many_indiv('FILE_ARCHIVE_INFO', colnames, insfilelist) except: print("Error from insert_many_indiv in register_file_archive") print("colnames =", colnames) print("filelist =", insfilelist) raise ########################################################################### def has_metadata_ingested(self, filetype, fullnames): """ Check whether metadata has been ingested for given file """ self.dynam_load_ftmgmt(filetype) listfullnames = fullnames if isinstance(fullnames, str): listfullnames = [fullnames] results = self.ftmgmt.has_metadata_ingested(listfullnames) if isinstance(fullnames, str): results = results[fullnames] return results ########################################################################### def check_valid(self, filetype, fullnames): """ Check whether file is a valid file for the given filetype """ self.dynam_load_ftmgmt(filetype) listfullnames = fullnames if isinstance(fullnames, str): listfullnames = [fullnames] results = self.ftmgmt.check_valid(listfullnames) if isinstance(fullnames, str): results = results[fullnames] return results ########################################################################### def has_contents_ingested(self, filetype, fullnames): """ Check whether metadata has been ingested for given files """ self.dynam_load_ftmgmt(filetype) listfullnames = fullnames if isinstance(fullnames, str): listfullnames = [fullnames] results = self.ftmgmt.has_contents_ingested(listfullnames) if isinstance(fullnames, str): results = results[fullnames] return results ###################################################################### def ingest_contents(self, filetype, fullnames): """ Call filetype specific function to ingest contents """ listfullnames = fullnames if isinstance(fullnames, str): listfullnames = [fullnames] results = self.has_contents_ingested(filetype, listfullnames) newlist = [fname for fname in results if not results[fname]] self.dynam_load_ftmgmt(filetype) self.ftmgmt.ingest_contents(newlist) ########################################################################### def is_file_in_archive(self, filelist, archive_name): """ Checks whether given files are in the specified archive according to the DB """ # TODO change to return count(*) = 0 or 1 which would preserve array # another choice is to return path, but how to make it return null for path that doesn't exist gtt_name = self.load_filename_gtt(filelist) # join to GTT_FILENAME for query sql = (f"select filename||compression from {gtt_name} g where exists " + f"(select filename from file_archive_info fai where " + f"fai.archive_name={self.get_named_bind_string('archive_name')} and fai.filename=g.filename)") #if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): # miscutils.fwdebug_print("sql = %s" % sql) curs = self.cursor() curs.execute(sql, {'archive_name': archive_name}) existslist = [] for row in curs: existslist.append(row[0]) return existslist ########################################################################### @staticmethod def _get_required_headers(filetype_dict): """ For use by ingest_file_metadata. Collects the list of required header values. """ REQUIRED = "r" all_req_headers = set() for hdu_dict in filetype_dict['hdus'].values(): if REQUIRED in hdu_dict: for cat_dict in hdu_dict[REQUIRED].values(): all_req_headers = all_req_headers.union(list(cat_dict.keys())) return all_req_headers ########################################################################### @staticmethod def _get_optional_metadata(filetype_dict): """ For use by ingest_file_metadata. Collects the list of optional metadata values. """ OPTIONAL = "o" all_opt_meta = set() for hdu_dict in filetype_dict['hdus'].values(): if OPTIONAL in hdu_dict: for cat_dict in hdu_dict[OPTIONAL].values(): all_opt_meta = all_opt_meta.union(list(cat_dict.keys())) return all_opt_meta ########################################################################### @staticmethod def _get_column_map(filetype_dict): """ For use by ingest_file_metadata. Creates a lookup from column to header. """ columnMap = collections.OrderedDict() for hdu_dict in filetype_dict['hdus'].values(): for status_dict in hdu_dict.values(): for cat_dict in status_dict.values(): for header, columns in cat_dict.items(): collist = columns.split(',') for position, column in enumerate(collist): if len(collist) > 1: columnMap[column] = header + ":" + str(position) else: columnMap[column] = header return columnMap ########################################################################### def ingest_file_metadata(self, filemeta): """ Ingests the file metadata stored in <filemeta> into the database, using <dbdict> to determine where each element belongs. This wil throw an error and abort if any of the following are missing for any file: the filename, filetype, or other required header value. It will also throw an error if the filetype given in the input data is not found in <dbdict> Any exception will abort the entire upload. """ dbdict = self.config[fmdefs.FILETYPE_METADATA] FILETYPE = "filetype" FILENAME = "filename" metatable = "metadata_table" COLMAP = "column_map" ROWS = "rows" metadataTables = collections.OrderedDict() try: if not isinstance(filemeta, dict): raise TypeError(f"Invalid type for filemeta (should be dict): {type(filemeta)}") if FILENAME not in filemeta: raise KeyError("File metadata missing FILENAME") if FILETYPE not in filemeta: raise KeyError(f"File metadata missing FILETYPE (file: {filemeta[FILENAME]})") if filemeta[FILETYPE] not in dbdict: raise ValueError(f"Unknown FILETYPE (file: {filemeta[FILENAME]}, filetype: {filemeta[FILETYPE]})") # check that all required are present all_req_headers = self._get_required_headers(dbdict[filemeta[FILETYPE]]) for dbkey in all_req_headers: if dbkey not in filemeta or filemeta[dbkey] == "": raise KeyError(f"Missing required data ({dbkey}) (file: {filemeta[FILENAME]})") # now load structures needed for upload rowdata = collections.OrderedDict() mapped_headers = set() filemetatable = dbdict[filemeta[FILETYPE]][metatable] if filemetatable not in metadataTables: metadataTables[filemetatable] = collections.OrderedDict() metadataTables[filemetatable][COLMAP] = self._get_column_map(dbdict[filemeta[FILETYPE]]) metadataTables[filemetatable][ROWS] = [] colmap = metadataTables[filemetatable][COLMAP] for column, header in colmap.items(): compheader = header.split(':') if len(compheader) > 1: hdr = compheader[0] pos = int(compheader[1]) if hdr in filemeta: rowdata[column] = filemeta[hdr].split(',')[pos] mapped_headers.add(hdr) else: if header in filemeta: rowdata[column] = filemeta[header] mapped_headers.add(header) else: rowdata[column] = None # report elements that were in the file that do not map to a DB column for notmapped in set(filemeta.keys()) - mapped_headers: if notmapped != 'fullname': print("WARN: file " + filemeta[FILENAME] + " header item " \ + notmapped + " does not match column for filetype " \ + filemeta[FILETYPE]) # add the new data to the table set of rows metadataTables[filemetatable][ROWS].append(rowdata) for metatable, metadict in metadataTables.items(): if metatable.lower() != 'genfile' and metatable.lower() != 'desfile': #self.insert_many(metatable, metadict[COLMAP].keys(), metadict[ROWS]) self.insert_many_indiv(metatable, list(metadict[COLMAP].keys()), metadict[ROWS]) except (KeyError, ValueError, TypeError): print("filemeta:", filemeta) print("metadataTables = ", metadataTables) raise # end ingest_file_metadata ########################################################################### def is_valid_filetype(self, ftype): """ Checks filetype definitions to determine if given filetype exists """ return ftype.lower() in self.config[fmdefs.FILETYPE_METADATA] ########################################################################### def is_valid_archive(self, arname): """ Checks archive definitions to determine if given archive exists """ return arname.lower() in self.config['archive'] ########################################################################### def get_file_location(self, filelist, arname, compress_order=fmdefs.FM_PREFER_COMPRESSED): """ Return relative archive paths and filename including any compression extenstion """ fileinfo = self.get_file_archive_info(filelist, arname, compress_order) rel_filenames = {} for fname, finfo in fileinfo.items(): rel_filenames[fname] = finfo['rel_filename'] return rel_filenames ########################################################################### def get_file_archive_info(self, filelist, arname, compress_order=fmdefs.FM_PREFER_COMPRESSED): """ Return information about file stored in archive (e.g., filename, size, rel_filename, ...) """ # sanity checks if 'archive' not in self.config: miscutils.fwdie('Error: Missing archive section in config', 1) if arname not in self.config['archive']: miscutils.fwdie(f'Error: Invalid archive name ({arname})', 1) if 'root' not in self.config['archive'][arname]: miscutils.fwdie(f"Error: Missing root in archive def ({self.config['archive'][arname]})", 1) if not isinstance(compress_order, list): miscutils.fwdie('Error: Invalid compress_order. ' 'It must be a list of compression extensions (including None)', 1) # query DB getting all files regardless of compression # Can't just use 'in' expression because could be more than 1000 filenames in list # ORA-01795: maximum number of expressions in a list is 1000 # insert filenames into filename global temp table to use in join for query gtt_name = self.load_filename_gtt(filelist) # join to GTT_FILENAME for query sql = ("select d.filetype,fai.path,fai.filename,fai.compression, " + "d.filesize, d.md5sum from desfile d, file_archive_info fai, " + f"{gtt_name} g where fai.archive_name={self.get_named_bind_string('archive_name')} and fai.desfile_id=d.id and " + "d.filename=g.filename") curs = self.cursor() curs.execute(sql, {'archive_name': arname}) desc = [d[0].lower() for d in curs.description] fullnames = {} for comp in compress_order: fullnames[comp] = {} for line in curs: ldict = dict(zip(desc, line)) if ldict['compression'] is None: compext = "" else: compext = ldict['compression'] ldict['rel_filename'] = f"{ldict['path']}/{ldict['filename']}{compext}" fullnames[ldict['compression']][ldict['filename']] = ldict curs.close() #self.empty_gtt(gtt_name) #print "uncompressed:", len(fullnames[None]) #print "compressed:", len(fullnames['.fz']) # go through given list of filenames and find archive location and compreesion archiveinfo = {} for name in filelist: #print name for cmpord in compress_order: # follow compression preference #print "cmpord = ", cmpord if name in fullnames[cmpord]: archiveinfo[name] = fullnames[cmpord][name] break #print "archiveinfo = ", archiveinfo return archiveinfo ########################################################################### def get_file_archive_info_path(self, path, arname, compress_order=fmdefs.FM_PREFER_COMPRESSED): """ Return information about file stored in archive (e.g., filename, size, rel_filename, ...) """ # sanity checks if 'archive' not in self.config: miscutils.fwdie('Error: Missing archive section in config', 1) if arname not in self.config['archive']: miscutils.fwdie(f'Error: Invalid archive name ({arname})', 1) if 'root' not in self.config['archive'][arname]: miscutils.fwdie(f"Error: Missing root in archive def ({self.config['archive'][arname]})", 1) if not isinstance(compress_order, list): miscutils.fwdie('Error: Invalid compress_order. ' 'It must be a list of compression extensions (including None)', 1) likestr = self.get_regex_clause('path', f'{path}/.*') # query DB getting all files regardless of compression sql = ("select filetype,file_archive_info.* from desfile, file_archive_info " + f"where archive_name='{arname}' and desfile.id=file_archive_info.desfile_id " + f"and {likestr}") curs = self.cursor() curs.execute(sql) desc = [d[0].lower() for d in curs.description] fullnames = {} for cmpord in compress_order: fullnames[cmpord] = {} list_by_name = {} for line in curs: ldict = dict(zip(desc, line)) #print "line = ", line if ldict['compression'] is None: compext = "" else: compext = ldict['compression'] ldict['rel_filename'] = f"{ldict['path']}/{ldict['filename']}{compext}" fullnames[ldict['compression']][ldict['filename']] = ldict list_by_name[ldict['filename']] = True #print "uncompressed:", len(fullnames[None]) #print "compressed:", len(fullnames['.fz']) # go through given list of filenames and find archive location and compreesion archiveinfo = {} for name in list_by_name.keys(): #print name for cmpord in compress_order: # follow compression preference #print "cmpord = ", cmpord if name in fullnames[cmpord]: archiveinfo[name] = fullnames[cmpord][name] break #print "archiveinfo = ", archiveinfo return archiveinfo ###################################################################### def dynam_load_ftmgmt(self, filetype, filepat=None): """ Dynamically load a filetype mgmt class """ #print " REG DYNLOAD" #if miscutils.fwdebug_check(1, 'FILEMGMT_DEBUG'): # miscutils.fwdebug_print("LOADING filetype = %s" % self.filetype) if self.ftmgmt is None or self.filetype is None or filetype != self.filetype: #print " REG DYNLOAD LOAD %s" % filetype classname = 'filemgmt.ftmgmt_generic.FtMgmtGeneric' if filetype in self.config['filetype_metadata']: if 'filetype_mgmt' in self.config['filetype_metadata'][filetype] and \ self.config['filetype_metadata'][filetype]['filetype_mgmt'] is not None: classname = self.config['filetype_metadata'][filetype]['filetype_mgmt'] else: miscutils.fwdie(f'Error: Invalid filetype ({filetype})', 1) # dynamically load class for the filetype filetype_mgmt = None filetype_mgmt_class = miscutils.dynamically_load_class(classname) try: filetype_mgmt = filetype_mgmt_class(filetype, self, self.config, filepat) except Exception as err: print(f"ERROR\nError: creating filemgmt object\n{err}") raise self.filetype = filetype self.filepat = filepat self.ftmgmt = filetype_mgmt ###################################################################### def register_file_data(self, ftype, fullnames, pfw_attempt_id, wgb_task_id, do_update, update_info=None, filepat=None): """ Save artifact, metadata, wgb provenance, and simple contents for given files """ self.dynam_load_ftmgmt(ftype, filepat) results = {} for fname in fullnames: metadata = {} fileinfo = {} try: metadata = self.ftmgmt.perform_metadata_tasks(fname, do_update, update_info) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print("INFO: metadata to ingest" + str(metadata)) fileinfo = diskutils.get_single_file_disk_info(fname, save_md5sum=True, archive_root=None) except IOError: miscutils.fwdebug_print(f"\n\nError: Problem gathering data for file {fname}") traceback.print_exc(1, sys.stdout) results[fname] = None continue try: fileinfo['filetype'] = ftype fileinfo['wgb_task_id'] = int(wgb_task_id) if pfw_attempt_id is None: fileinfo['pfw_attempt_id'] = None else: fileinfo['pfw_attempt_id'] = int(pfw_attempt_id) del fileinfo['path'] has_metadata = self.has_metadata_ingested(ftype, fname) if not has_metadata: self.save_file_info(fileinfo, metadata) has_contents = self.ftmgmt.has_contents_ingested([fname]) if not has_contents[fname]: self.ftmgmt.ingest_contents([fname]) elif miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"INFO: {fname} already has contents ingested") results[fname] = {'diskinfo': fileinfo, 'metadata': metadata} except: miscutils.fwdebug_print(f"\n\nError: Problem gathering metadata for file {fname}") traceback.print_exc(1, sys.stdout) results[fname] = None return results ###################################################################### def basic_register_file_data(self, ftype, fullnames, pfw_attempt_id, wgb_task_id, do_update, update_info=None, filepat=None): """ Save artifact, metadata, wgb provenance, and simple contents for given files """ self.dynam_load_ftmgmt(ftype, filepat) results = {} for fname in fullnames: metadata = {} fileinfo = {} try: metadata = self.ftmgmt.perform_metadata_tasks(fname, do_update, update_info) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print("INFO: metadata to ingest" + metadata) fileinfo = diskutils.get_single_file_disk_info(fname, save_md5sum=True, archive_root=None) fileinfo['filetype'] = ftype fileinfo['wgb_task_id'] = int(wgb_task_id) if pfw_attempt_id is None: fileinfo['pfw_attempt_id'] = None else: fileinfo['pfw_attempt_id'] = int(pfw_attempt_id) del fileinfo['path'] results[fname] = {'diskinfo': fileinfo, 'metadata': metadata} except IOError: miscutils.fwdebug_print(f"\n\nError: Problem gathering data for file {fname}") traceback.print_exc(1, sys.stdout) results[fname] = None continue return results ###################################################################### def mass_register_files(self, ftype, filedata): self.dynam_load_ftmgmt(ftype) badfiles = set() for fname, data in filedata.items(): try: has_metadata = self.has_metadata_ingested(ftype, fname) if not has_metadata: self.save_file_info(data['diskinfo'], data['metadata']) elif miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"INFO: {fname} already has metadata ingested") except: miscutils.fwdebug_print(f"\n\nError: Problem gathering metadata for file {fname}") traceback.print_exc(1, sys.stdout) badfiles.add(fname) try: has_contents = self.ftmgmt.has_contents_ingested([fname]) if not has_contents[fname]: self.ftmgmt.ingest_contents([fname]) elif miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"INFO: {fname} already has contents ingested") except: miscutils.fwdebug_print(f"\n\nError: Problem gathering metadata for file {fname}") traceback.print_exc(1, sys.stdout) badfiles.add(fname) return list(badfiles) ###################################################################### def save_file_info(self, fileinfo, metadata): """ save non-location information about file """ if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"fileinfo = {fileinfo}") miscutils.fwdebug_print(f"metadata = {metadata}") self.save_desfile(fileinfo) if metadata is not None and metadata: self.ingest_file_metadata(metadata) ########################################################################### def save_desfile(self, fileinfo): """ save non-location information about files """ if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"fileinfo = {fileinfo}") colnames = ['pfw_attempt_id', 'filetype', 'filename', 'compression', 'filesize', 'md5sum', 'wgb_task_id'] try: self.insert_many_indiv('DESFILE', colnames, [fileinfo]) except: print("Error: problems saving to table desfile") print("colnames =", colnames) print("fileinfo =", fileinfo) raise ########################################################################### def get_filename_id_map(self, prov): """ Return a mapping of filename to desfile id """ if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"prov = {prov}") allfiles = set() if provdefs.PROV_USED in prov: for filenames in prov[provdefs.PROV_USED].values(): for fname in filenames.split(provdefs.PROV_DELIM): allfiles.add(fname.strip()) if provdefs.PROV_WDF in prov: for tuples in prov[provdefs.PROV_WDF].values(): for filenames in tuples.values(): for fname in filenames.split(provdefs.PROV_DELIM): allfiles.add(fname.strip()) result = [] if allfiles: # build a map between filenames (with compression extension) and desfile ID gtt_name = self.load_filename_gtt(allfiles) sqlstr = f"""SELECT f.filename || f.compression, d.ID FROM DESFILE d, {gtt_name} f WHERE d.filename=f.filename and nullcmp(d.compression, f.compression) = 1""" cursor = self.cursor() cursor.execute(sqlstr) result = cursor.fetchall() cursor.close() return dict(result) return result # end get_filename_id_map ########################################################################### def ingest_provenance(self, prov, execids): """ Save provenance to OPM tables """ excepts = [] insert_sql = """insert into {} ({}) select {},{} {} where not exists( select * from {} n where n.{}={} and n.{}={})""" data = [] bind_str = self.get_positional_bind_string() cursor = self.cursor() filemap = self.get_filename_id_map(prov) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"filemap = {filemap}") if provdefs.PROV_USED in prov: if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print("ingesting used provenance") for execname, filenames in prov[provdefs.PROV_USED].items(): for fname in filenames.split(provdefs.PROV_DELIM): #print ' ==== ',fname rowdata = [] rowdata.append(execids[execname]) rowdata.append(filemap[fname.strip()]) rowdata.append(execids[execname]) rowdata.append(filemap[fname.strip()]) data.append(rowdata) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"Number of used records to ingest = {len(data)}") exec_sql = insert_sql.format(fmdefs.PROV_USED_TABLE, fmdefs.PROV_TASK_ID + "," + fmdefs.PROV_FILE_ID, bind_str, bind_str, self.from_dual(), fmdefs.PROV_USED_TABLE, fmdefs.PROV_TASK_ID, bind_str, fmdefs.PROV_FILE_ID, bind_str) cursor.executemany(exec_sql, data) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"Number of used rows inserted = {cursor.rowcount}") data = [] if provdefs.PROV_WDF in prov: if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print("ingesting wdf provenance") for tuples in prov[provdefs.PROV_WDF].values(): if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"tuples = {tuples}") if provdefs.PROV_PARENTS not in tuples: miscutils.fwdie(f"Error: missing {provdefs.PROV_PARENTS} in one of {provdefs.PROV_WDF}", fmdefs.FM_EXIT_FAILURE) elif provdefs.PROV_CHILDREN not in tuples: miscutils.fwdie(f"Error: missing {provdefs.PROV_CHILDREN} in one of {provdefs.PROV_WDF}", fmdefs.FM_EXIT_FAILURE) else: for parentfile in tuples[provdefs.PROV_PARENTS].split(provdefs.PROV_DELIM): for childfile in tuples[provdefs.PROV_CHILDREN].split(provdefs.PROV_DELIM): try: rowdata = [] rowdata.append(filemap[parentfile.strip()]) rowdata.append(filemap[childfile.strip()]) rowdata.append(filemap[parentfile.strip()]) rowdata.append(filemap[childfile.strip()]) data.append(rowdata) except Exception as ex: miscutils.fwdebug_print(f"Error ingesting provenance for {childfile.strip()} with parent {parentfile.strip()}") (extype, exvalue, trback) = sys.exc_info() traceback.print_exception(extype, exvalue, trback, file=sys.stdout) excepts.append(ex) exec_sql = insert_sql.format(fmdefs.PROV_WDF_TABLE, fmdefs.PROV_PARENT_ID + "," + fmdefs.PROV_CHILD_ID, bind_str, bind_str, self.from_dual(), fmdefs.PROV_WDF_TABLE, fmdefs.PROV_PARENT_ID, bind_str, fmdefs.PROV_CHILD_ID, bind_str) if data: if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"Number of wdf rows to insert = {len(data)}") cursor.executemany(exec_sql, data) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"Number of wdf rows inserted = {cursor.rowcount}") elif miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"Warn: {provdefs.PROV_WDF} section given but had 0 valid entries") return excepts
class BasicWrapper: """ Basic wrapper class """ ###################################################################### def __init__(self, wclfile, debug=1): """ Read input wcl to initialize object """ self.input_filename = wclfile self.inputwcl = WCL() with open(wclfile, 'r') as infh: self.inputwcl.read(infh) self.debug = debug # note: WGB handled by file registration using OW_OUTPUTS_BY_SECT provdict = collections.OrderedDict({ provdefs.PROV_USED: collections.OrderedDict(), provdefs.PROV_WDF: collections.OrderedDict() }) self.outputwcl = WCL({ 'wrapper': collections.OrderedDict(), intgdefs.OW_PROV_SECT: provdict, intgdefs.OW_OUTPUTS_BY_SECT: {} }) self.last_num_derived = 0 self.last_num_meta = 0 self.curr_task = [] self.curr_exec = None ###################################################################### def determine_status(self): """ Check all task status to determine wrapper status """ status = 0 execs = intgmisc.get_exec_sections(self.inputwcl, intgdefs.IW_EXEC_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: exec sections = {execs}", WRAPPER_OUTPUT_PREFIX) for ekey in sorted(execs.keys()): if ekey in self.outputwcl: if 'task_info' in self.outputwcl[ekey]: for taskd in self.outputwcl[ekey]['task_info'].values(): if 'status' in taskd: if taskd['status'] != 0: status = taskd['status'] else: if miscutils.fwdebug_check(3, "BASICWRAP_DEBUG"): miscutils.fwdebug_print( f"WARN: Missing status in outputwcl task_info for {ekey}", WRAPPER_OUTPUT_PREFIX) status = 1 else: if miscutils.fwdebug_check(3, "BASICWRAP_DEBUG"): miscutils.fwdebug_print( f"WARN: Missing task_info in outputwcl for {ekey}", WRAPPER_OUTPUT_PREFIX) status = 1 else: status = 1 return status ###################################################################### def get_status(self): """ Return status of wrapper execution """ status = 1 if 'status' in self.outputwcl['wrapper']: status = self.outputwcl['wrapper']['status'] return status ###################################################################### def check_command_line(self, exsect, exwcl): """ Ensure that certain command line arguments are specified """ # pylint: disable=unused-argument self.start_exec_task('check_command_line') #if intgdefs.IW_CHECK_COMMAND in self.inputwcl and \ # miscutils.convertBool(self.inputwcl[intgdefs.IW_CHECK_COMMAND]): # # if intgdefs.IW_EXEC_DEF in self.inputwcl: # execdefs = self.inputwcl[intgdefs.IW_EXEC_DEF] # # execsect = "%s_%s" % (intgdefs.IW_EXEC_PREFIX, execnum) # if (execsect.lower() in execdefs and # intgdefs.IW_CMD_REQ_ARGS in execdefs[execsect.lower()]): # cmd_req_args = execdefs[execsect.lower()][intgdefs.IW_CMD_REQ_ARGS] # req_args = miscutils.fwsplit(cmd_req_args, ',') # self.end_exec_task(0) return 0 ###################################################################### def create_command_line(self, execnum, exwcl): """ Create command line string handling hyphens appropriately""" if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"execnum = '{execnum}', exwcl = '{exwcl}'", WRAPPER_OUTPUT_PREFIX) self.start_exec_task('create_command_line') #print 'WCL -----' #print exwcl #print 'END -----\n' cmdstr = "" if 'execname' in exwcl: cmdlist = [exwcl['execname']] if 'cmdline' in exwcl: posargs = {} # save positional args to insert later hyphen_type = 'allsingle' if 'cmd_hyphen' in exwcl: hyphen_type = exwcl['cmd_hyphen'] # loop through command line args for key, val in exwcl['cmdline'].items(): if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"key = '{key}', val = '{val}'", WRAPPER_OUTPUT_PREFIX) # replace any variables expandval = replfuncs.replace_vars(val, self.inputwcl)[0] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"expandval = '{expandval}'", WRAPPER_OUTPUT_PREFIX) if key.startswith('_'): patmatch = re.match(r'_(\d+)', key) if patmatch: posargs[patmatch.group( 1)] = expandval # save for later else: raise ValueError( f'Invalid positional argument name: {key}') else: hyphen = intgmisc.get_cmd_hyphen(hyphen_type, key) if expandval == '_flag': cmdlist.append(f" {hyphen}{key}") else: cmdlist.append(f" {hyphen}{key} {expandval}") # insert position sensitive arguments into specified location in argument list for k in sorted(posargs.keys()): cmdlist.insert(int(k), f"{posargs[k]}") # convert list of args into string if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"cmdlist = '{cmdlist}'", WRAPPER_OUTPUT_PREFIX) cmdstr = ' '.join(cmdlist) else: print(f"Error: missing execname in wcl for exec {execnum}") print(f"exec wcl = {exwcl}") raise KeyError(f'Missing execname in wcl for exec {execnum}') self.curr_exec['cmdline'] = cmdstr self.end_exec_task(0) ###################################################################### def save_exec_version(self, exwcl): """ Run command with version flag and parse output for version information """ # assumes exit code for version is 0 self.start_exec_task('save_exec_version') ver = None execname = exwcl['execname'] if 'version_flag' in exwcl and 'version_pattern' in exwcl: verflag = exwcl['version_flag'] verpat = exwcl['version_pattern'] cmd = f"{execname} {verflag}" try: process = subprocess.Popen(shlex.split(cmd), shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) except: (exc_type, exc_value) = sys.exc_info()[0:2] print("********************") print(f"Unexpected error: {exc_type} - {exc_value}") print(f"cmd> {cmd}") print(f"Probably could not find {shlex.split(cmd)[0]} in path") print("Check for misspelled execname in submit wcl or") print( " make sure that the corresponding eups package is in the metapackage " ) print(" and it sets up the path correctly") raise process.wait() out = process.communicate()[0] if process.returncode != 0: miscutils.fwdebug_print( "INFO: problem when running code to get version", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"\t{execname} {verflag} {verpat}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"\tcmd> {cmd}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"\t{out}", WRAPPER_OUTPUT_PREFIX) ver = None else: # parse output with verpat try: vmatch = re.search(verpat, out) if vmatch: ver = vmatch.group(1) else: if miscutils.fwdebug_check(0, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"re.search didn't find version for exec {execname}", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"\tcmd output={out}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"\tcmd verpat={verpat}", WRAPPER_OUTPUT_PREFIX) except Exception as err: #print type(err) ver = None print( f"Error: Exception from re.match. Didn't find version: {err}" ) raise else: miscutils.fwdebug_print( f"INFO: Could not find version info for exec {execname}", WRAPPER_OUTPUT_PREFIX) ver = None if ver is not None: self.curr_exec['version'] = ver self.end_exec_task(0) ###################################################################### def create_output_dirs(self, exwcl): """ Make directories for output files """ self.start_exec_task('create_output_dirs') if intgdefs.IW_OUTPUTS in exwcl: for sect in miscutils.fwsplit(exwcl[intgdefs.IW_OUTPUTS]): sectkeys = sect.split('.') if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: sectkeys={sectkeys}", WRAPPER_OUTPUT_PREFIX) if sectkeys[0] == intgdefs.IW_FILE_SECT: sectname = sectkeys[1] if sectname in self.inputwcl[intgdefs.IW_FILE_SECT]: if 'fullname' in self.inputwcl[ intgdefs.IW_FILE_SECT][sectname]: fullnames = self.inputwcl[ intgdefs.IW_FILE_SECT][sectname]['fullname'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: fullname = {fullnames}", WRAPPER_OUTPUT_PREFIX) if '$RNMLST{' in fullnames: raise ValueError( 'Deprecated $RNMLST in output filename') for fname in miscutils.fwsplit(fullnames, ','): outdir = os.path.dirname(fname) miscutils.coremakedirs(outdir) elif sectkeys[0] == intgdefs.IW_LIST_SECT: (_, _, filesect) = sect.split('.') ldict = self.inputwcl[intgdefs.IW_LIST_SECT][sectkeys[1]] # check list itself exists listname = ldict['fullname'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"\tINFO: Checking existence of '{listname}'", WRAPPER_OUTPUT_PREFIX) if not os.path.exists(listname): miscutils.fwdebug_print( f"\tError: list '{listname}' does not exist.", WRAPPER_OUTPUT_PREFIX) raise IOError( f"List not found: {listname} does not exist") # get list format: space separated, csv, wcl, etc listfmt = intgdefs.DEFAULT_LIST_FORMAT if intgdefs.LIST_FORMAT in ldict: listfmt = ldict[intgdefs.LIST_FORMAT] # read fullnames from list file fullnames = intgmisc.read_fullnames_from_listfile( listname, listfmt, ldict['columns']) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"\tINFO: fullnames={fullnames}", WRAPPER_OUTPUT_PREFIX) for fname in fullnames[filesect]: outdir = os.path.dirname(fname) miscutils.coremakedirs(outdir) self.end_exec_task(0) ###################################################################### def run_exec(self): """ Run given command line """ self.start_exec_task('run_exec') cmdline = self.curr_exec['cmdline'] retcode = None procinfo = None miscutils.fwdebug_print(f"INFO: cmd = {cmdline}", WRAPPER_OUTPUT_PREFIX) print('*' * 70) sys.stdout.flush() try: (retcode, procinfo) = intgmisc.run_exec(cmdline) except OSError as exc: if exc.errno != errno.ENOENT: raise print("********************") (exc_type, exc_value, _) = sys.exc_info() print(f"{exc_type} - {exc_value}") print(f"cmd> {cmdline}") print(f"Probably could not find {cmdline.split()[0]} in path") print("Check for mispelled execname in submit wcl or") print(" make sure that the corresponding eups package is in ") print(" the metapackage and it sets up the path correctly") raise sys.stdout.flush() if retcode != 0: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"\tINFO: cmd exited with non-zero exit code = {retcode}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"\tINFO: failed cmd = {cmdline}", WRAPPER_OUTPUT_PREFIX) else: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "\tINFO: cmd exited with exit code = 0", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("END", WRAPPER_OUTPUT_PREFIX) print('*' * 70) self.curr_exec['status'] = retcode self.curr_exec['procinfo'] = procinfo self.end_exec_task(retcode) ###################################################################### def check_inputs(self, ekey): """ Check which input files/lists do not exist """ self.start_exec_task('check_inputs') existfiles = {} ins, _ = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey, get_outputs=False) for sect in ins: exists, missing = intgmisc.check_files(ins[sect]) existfiles[sect] = exists if missing: for mfile in missing: miscutils.fwdebug_print( f"ERROR: input '{mfile}' does not exist.", WRAPPER_OUTPUT_PREFIX) os.system("pwd") os.system("find . -type f") sys.exit(3) #raise IOError("At least one input file not found.") # if missing inputs, just abort self.end_exec_task(0) return existfiles ###################################################################### def get_optout(self, sect): """ Return whether file(s) are optional outputs """ optout = False sectkeys = sect.split('.') if sectkeys[0] == intgdefs.IW_FILE_SECT: if intgdefs.IW_OUTPUT_OPTIONAL in self.inputwcl.get(sect): optout = miscutils.convertBool( self.inputwcl.get(sect)[intgdefs.IW_OUTPUT_OPTIONAL]) elif sectkeys[0] == intgdefs.IW_LIST_SECT: if intgdefs.IW_OUTPUT_OPTIONAL in self.inputwcl.get( f"{intgdefs.IW_FILE_SECT}.{sectkeys[2]}"): optout = miscutils.convertBool( self.inputwcl.get(f"{intgdefs.IW_FILE_SECT}.{sectkeys[2]}") [intgdefs.IW_OUTPUT_OPTIONAL]) else: raise KeyError(f"Unknown data section {sectkeys[0]}") return optout ###################################################################### def check_outputs(self, ekey, exitcode): """ Check which output files were created, renaming if necessary """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Beg", WRAPPER_OUTPUT_PREFIX) self.start_exec_task('check_outputs') existfiles = {} missingfiles = {} _, outs = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey) for sect in outs: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: sect={sect}", WRAPPER_OUTPUT_PREFIX) exists, missing = intgmisc.check_files(outs[sect]) existfiles.update({sect: exists}) if missing: optout = self.get_optout(sect) if optout: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"\tINFO: optional output file '{missing}' does not exist (sect: {sect}).", WRAPPER_OUTPUT_PREFIX) elif exitcode != 0: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: skipping missing output due to non-zero exit code ({sect}: {missing})", WRAPPER_OUTPUT_PREFIX) else: miscutils.fwdebug_print( f"ERROR: Missing required output file(s) ({sect}:{missing})", WRAPPER_OUTPUT_PREFIX) missingfiles.update({sect: missing}) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: existfiles={existfiles}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"INFO: missingfiles={missingfiles}", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: end", WRAPPER_OUTPUT_PREFIX) if missingfiles: status = 1 else: status = 0 self.end_exec_task(status) return existfiles ###################################################################### def transform_inputs(self, exwcl): """ Transform inputs stored by DESDM into form needed by exec """ # pylint: disable=unused-argument self.start_exec_task('transform_inputs') self.end_exec_task(0) ###################################################################### def transform_outputs(self, exwcl): """ Transform outputs created by exec into form needed by DESDM """ # pylint: disable=unused-argument self.start_exec_task('transform_outputs') self.end_exec_task(0) ###################################################################### def save_provenance(self, execsect, exwcl, infiles, outfiles, exitcode): """ Create provenance wcl """ #pylint: disable=unbalanced-tuple-unpacking self.start_exec_task('save_provenance') if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Beg", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: infiles = {infiles}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"INFO: outfiles = {outfiles}", WRAPPER_OUTPUT_PREFIX) num_errs = 0 # convert probably fullnames in outexist to filename+compression new_outfiles = collections.OrderedDict() for exlabel, exlist in outfiles.items(): if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: exlabel={exlabel} exlist={exlist}", WRAPPER_OUTPUT_PREFIX) newlist = [] for fullname in exlist: basename = miscutils.parse_fullname( fullname, miscutils.CU_PARSE_BASENAME) newlist.append(basename) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: newlist={newlist}", WRAPPER_OUTPUT_PREFIX) new_outfiles[exlabel] = newlist prov = self.outputwcl[intgdefs.OW_PROV_SECT] # used new_infiles = {} if infiles: all_infiles = [] for key, sublist in infiles.items(): new_infiles[key] = [] for fullname in sublist: basename = miscutils.parse_fullname( fullname, miscutils.CU_PARSE_BASENAME) all_infiles.append(basename) new_infiles[key].append(basename) prov[provdefs.PROV_USED][execsect] = provdefs.PROV_DELIM.join( all_infiles) # was_generated_by - done by PFW when saving metadata # was_derived_from if intgdefs.IW_DERIVATION in exwcl: wdf = prov[provdefs.PROV_WDF] derived_pairs = miscutils.fwsplit(exwcl[intgdefs.IW_DERIVATION], provdefs.PROV_DELIM) for dpair in derived_pairs: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: dpair = {dpair}", WRAPPER_OUTPUT_PREFIX) (parent_sect, child_sect) = miscutils.fwsplit(dpair, ':')[:2] if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: parent_sect = {parent_sect}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"INFO: child_sect = {child_sect}", WRAPPER_OUTPUT_PREFIX) optout = self.get_optout(child_sect) #parent_key = miscutils.fwsplit(parent_sect, '.')[-1] #child_key = miscutils.fwsplit(child_sect, '.')[-1] if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): #miscutils.fwdebug_print("INFO: parent_key = %s" % parent_key, # WRAPPER_OUTPUT_PREFIX) #miscutils.fwdebug_print("INFO: child_key = %s" % child_key, # WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print(f"INFO: optout = {optout}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"INFO: new_outfiles.keys = {list(new_outfiles.keys())}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"INFO: new_outfiles = {new_outfiles}", WRAPPER_OUTPUT_PREFIX) if child_sect not in new_outfiles or \ new_outfiles[child_sect] is None or \ not new_outfiles[child_sect]: if optout: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: skipping missing optional output {parent_sect}:{child_sect}", WRAPPER_OUTPUT_PREFIX) elif exitcode != 0: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: skipping missing output due to non-zero exit code {parent_sect}:{child_sect}", WRAPPER_OUTPUT_PREFIX) else: miscutils.fwdebug_print( f"ERROR: Missing child output files in wdf tuple ({parent_sect}:{child_sect})", WRAPPER_OUTPUT_PREFIX) num_errs += 1 else: self.last_num_derived += 1 key = 'derived_%d' % self.last_num_derived if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: key = {key}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"INFO: before wdf = {prov[provdefs.PROV_WDF]}", WRAPPER_OUTPUT_PREFIX) if parent_sect not in infiles and parent_sect not in new_outfiles: miscutils.fwdebug_print(f"parent_sect = {parent_sect}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"infiles.keys() = {list(infiles.keys())}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"outfiles.keys() = {list(outfiles.keys())}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"used = {exwcl[intgdefs.IW_INPUTS]}", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( f"ERROR: Could not find parent files for {dpair}", WRAPPER_OUTPUT_PREFIX) num_errs += 1 else: wdf[key] = collections.OrderedDict() wdf[key][ provdefs.PROV_CHILDREN] = provdefs.PROV_DELIM.join( new_outfiles[child_sect]) if parent_sect in infiles: wdf[key][provdefs. PROV_PARENTS] = provdefs.PROV_DELIM.join( new_infiles[parent_sect]) elif parent_sect in new_outfiles: # this output was generated within same # program/wrapper from other output files parents = [] for outparent in outfiles[parent_sect]: parents.append( miscutils.parse_fullname( outparent, miscutils.CU_PARSE_FILENAME)) wdf[key][provdefs. PROV_PARENTS] = provdefs.PROV_DELIM.join( parents) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: after wdf = {prov[provdefs.PROV_WDF]}", WRAPPER_OUTPUT_PREFIX) if not wdf: del prov[provdefs.PROV_WDF] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: End (num_errs = {num_errs:d})", WRAPPER_OUTPUT_PREFIX) self.end_exec_task(num_errs) return prov ###################################################################### def write_outputwcl(self, outfilename=None): """ Write output wcl to file """ if outfilename is None: outfilename = self.inputwcl['wrapper']['outputwcl'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"outfilename = {outfilename}", WRAPPER_OUTPUT_PREFIX) # create output wcl directory if needed outwcldir = miscutils.parse_fullname(outfilename, miscutils.CU_PARSE_PATH) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"outwcldir = {outwcldir}", WRAPPER_OUTPUT_PREFIX) miscutils.coremakedirs(outwcldir) with open(outfilename, 'w') as wclfh: self.outputwcl.write(wclfh, True) ###################################################################### def start_exec_task(self, name): """ Save start execution info """ self.curr_task.append(name) self.curr_exec['task_info'][name] = {'start_time': time.time()} ###################################################################### def end_exec_task(self, status): """ Save end execution info """ name = self.curr_task.pop() task_info = self.curr_exec['task_info'][name] task_info['status'] = status task_info['end_time'] = time.time() # just for human reading convenience task_info['walltime'] = task_info['end_time'] - task_info['start_time'] ###################################################################### def end_all_tasks(self, status): """ End all exec tasks in case of exiting nested tasks """ end_time = time.time() for name in reversed(self.curr_task): task_info = self.curr_exec['task_info'][name] task_info['status'] = status task_info['end_time'] = end_time # just for human reading convenience task_info[ 'walltime'] = task_info['end_time'] - task_info['start_time'] self.curr_task = [] ###################################################################### def save_outputs_by_section(self, ekey, outexist): """ save fullnames from outexist to outputs by section """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: before adding outputs_by_sect={self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]}", WRAPPER_OUTPUT_PREFIX) for exlabel, exlist in outexist.items(): if exlist: if exlabel not in self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]: self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT][exlabel] = {} if ekey not in self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel]: self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel][ekey] = [] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: adding to sect={exlabel}: {exlist}", WRAPPER_OUTPUT_PREFIX) self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel][ekey].extend(exlist) else: miscutils.fwdebug_print( f"WARN: 0 output files in exlist for {exlabel}", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: after adding outputs_by_sect={self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]}", WRAPPER_OUTPUT_PREFIX) ###################################################################### def cleanup(self): """ Remove intermediate files from wrapper execution """ self.outputwcl['wrapper']['cleanup_start'] = time.time() self.outputwcl['wrapper']['cleanup_end'] = time.time() ###################################################################### def run_wrapper(self): """ Workflow for this wrapper """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Begin", WRAPPER_OUTPUT_PREFIX) self.outputwcl['wrapper']['start_time'] = time.time() try: execs = intgmisc.get_exec_sections(self.inputwcl, intgdefs.IW_EXEC_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print(f"INFO: exec sections = {execs}", WRAPPER_OUTPUT_PREFIX) for ekey, iw_exec in sorted(execs.items()): ow_exec = {'task_info': {}} self.outputwcl[ekey] = ow_exec self.curr_exec = ow_exec self.transform_inputs(iw_exec) inputs = self.check_inputs(ekey) self.check_command_line(ekey, iw_exec) self.save_exec_version(iw_exec) self.create_command_line(ekey, iw_exec) self.create_output_dirs(iw_exec) self.run_exec() self.transform_outputs(iw_exec) outexist = self.check_outputs(ekey, ow_exec['status']) self.save_outputs_by_section(ekey, outexist) self.save_provenance(ekey, iw_exec, inputs, outexist, ow_exec['status']) ow_exec['status'] = 0 self.cleanup() self.outputwcl['wrapper']['status'] = self.determine_status() except SystemExit as e: miscutils.fwdebug_print( f"INFO: wrapper called sys.exit ({str(e)}). Halting.", WRAPPER_OUTPUT_PREFIX) self.outputwcl['wrapper']['status'] = int(str(e)) self.end_all_tasks(1) except Exception: (exc_type, exc_value, exc_trback) = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_trback, file=sys.stdout) self.outputwcl['wrapper']['status'] = 1 self.end_all_tasks(1) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]={self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]}", WRAPPER_OUTPUT_PREFIX) for fsname, fssect in self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT].items(): if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( f"INFO: making string for sect {fsname}: {fssect}", WRAPPER_OUTPUT_PREFIX) for exname, exlist in fssect.items(): self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT][fsname][ exname] = provdefs.PROV_DELIM.join(exlist) self.outputwcl['wrapper']['end_time'] = time.time() miscutils.fwdebug_print( f"INFO: end - exit status = {self.get_status()}", WRAPPER_OUTPUT_PREFIX)
def __init__(self, args): """ Initialize configuration object, typically reading from wclfile """ WCL.__init__(self) # data which needs to be kept across programs must go in self # data which needs to be searched also must go in self self.set_search_order(PFW_SEARCH_ORDER) wclobj = WCL() if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading wclfile: %s" % (args['wclfile'])) try: starttime = time.time() print "\tReading submit wcl...", with open(args['wclfile'], "r") as wclfh: wclobj.read(wclfh, filename=args['wclfile']) print "DONE (%0.2f secs)" % (time.time()-starttime) #wclobj['wclfile'] = args['wclfile'] except IOError as err: miscutils.fwdie("Error: Problem reading wcl file '%s' : %s" % \ (args['wclfile'], err), pfwdefs.PF_EXIT_FAILURE) # location of des services file if 'submit_des_services' in args and args['submit_des_services'] is not None: wclobj['submit_des_services'] = args['submit_des_services'] elif 'submit_des_services' not in wclobj: if 'DES_SERVICES' in os.environ: wclobj['submit_des_services'] = os.environ['DES_SERVICES'] else: # let it default to $HOME/.desservices.init wclobj['submit_des_services'] = None # which section to use in des services file if 'submit_des_db_section' in args and args['submit_des_db_section'] is not None: wclobj['submit_des_db_section'] = args['submit_des_db_section'] elif 'submit_des_db_section' not in wclobj: if 'DES_DB_SECTION' in os.environ: wclobj['submit_des_db_section'] = os.environ['DES_DB_SECTION'] else: # let DB connection code print error message wclobj['submit_des_db_section'] = None # for values passed in on command line, set top-level config for var in (pfwdefs.PF_DRYRUN, pfwdefs.PF_VERIFY_FILES): if var in args and args[var] is not None: wclobj[var] = args[var] if 'usePFWconfig' in args: pfwconfig = os.environ['PROCESSINGFW_DIR'] + '/etc/pfwconfig.des' if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading pfwconfig: %s" % (pfwconfig)) starttime = time.time() print "\tReading config from software install...", pfwcfg_wcl = WCL() with open(pfwconfig, "r") as wclfh: pfwcfg_wcl.read(wclfh, filename=pfwconfig) self.update(pfwcfg_wcl) print "DONE (%0.2f secs)" % (time.time()-starttime) # wclfile overrides all, so must be added last if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading wclfile: %s" % (args['wclfile'])) self.update(wclobj) self.set_names() # store the file name of the top-level submitwcl in dict: if 'submitwcl' not in self and 'wclfile' in args: self['submitwcl'] = args['wclfile'] if 'processingfw_dir' not in self and \ 'PROCESSINGFW_DIR' in os.environ: self['processingfw_dir'] = os.environ['PROCESSINGFW_DIR'] if 'current' not in self: self['current'] = OrderedDict({'curr_block': '', 'curr_archive': '', #'curr_software': '', 'curr_site' : ''}) self[pfwdefs.PF_WRAPNUM] = '0' self[pfwdefs.PF_TASKNUM] = '0' self[pfwdefs.PF_JOBNUM] = '0' self.set_block_info()
def output_lines_wcl(filename, dataset): """ Writes dataset to file in WCL format """ dswcl = WCL(dataset) with open(filename, "w") as wclfh: dswcl.write(wclfh, True, 4) # print it sorted
def __init__(self, args): """ Initialize configuration object, typically reading from wclfile """ WCL.__init__(self) # data which needs to be kept across programs must go in self # data which needs to be searched also must go in self self.set_search_order(PFW_SEARCH_ORDER) wclobj = WCL() if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print(f"Reading wclfile: {args['wclfile']}") try: starttime = time.time() print("\tReading submit wcl...", ) with open(args['wclfile'], "r") as wclfh: wclobj.read(wclfh, filename=args['wclfile']) print(f"DONE ({time.time()-starttime:0.2f} secs)") #wclobj['wclfile'] = args['wclfile'] except IOError as err: miscutils.fwdie( f"Error: Problem reading wcl file '{args['wclfile']}' : {err}", pfwdefs.PF_EXIT_FAILURE) # location of des services file if 'submit_des_services' in args and args[ 'submit_des_services'] is not None: wclobj['submit_des_services'] = args['submit_des_services'] elif 'submit_des_services' not in wclobj: if 'DES_SERVICES' in os.environ: wclobj['submit_des_services'] = os.environ['DES_SERVICES'] else: # let it default to $HOME/.desservices.init wclobj['submit_des_services'] = None # which section to use in des services file if 'submit_des_db_section' in args and args[ 'submit_des_db_section'] is not None: wclobj['submit_des_db_section'] = args['submit_des_db_section'] elif 'submit_des_db_section' not in wclobj: if 'DES_DB_SECTION' in os.environ: wclobj['submit_des_db_section'] = os.environ['DES_DB_SECTION'] else: # let DB connection code print error message wclobj['submit_des_db_section'] = None # for values passed in on command line, set top-level config for var in (pfwdefs.PF_DRYRUN, pfwdefs.PF_USE_DB_IN, pfwdefs.PF_USE_DB_OUT, pfwdefs.PF_USE_QCF, pfwdefs.PF_VERIFY_FILES): if var in args and args[var] is not None: wclobj[var] = args[var] if 'usePFWconfig' in args: pfwconfig = os.environ['PROCESSINGFW_DIR'] + '/etc/pfwconfig.des' if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print(f"Reading pfwconfig: {pfwconfig}") starttime = time.time() print("\tReading config from software install...") pfwcfg_wcl = WCL() with open(pfwconfig, "r") as wclfh: pfwcfg_wcl.read(wclfh, filename=pfwconfig) self.update(pfwcfg_wcl) print(f"DONE ({time.time()-starttime:0.2f} secs)") self.use_db_in = None if pfwdefs.PF_USE_DB_IN in wclobj: self.use_db_in = miscutils.convertBool( wclobj[pfwdefs.PF_USE_DB_IN]) elif pfwdefs.PF_USE_DB_IN in self: self.use_db_in = miscutils.convertBool(self[pfwdefs.PF_USE_DB_IN]) if self.use_db_in and 'get_db_config' in args and args['get_db_config']: print("\tGetting defaults from DB...") sys.stdout.flush() starttime = time.time() self.dbh = pfwdb.PFWDB(wclobj['submit_des_services'], wclobj['submit_des_db_section']) print(f"DONE ({time.time()-starttime:0.2f} secs)") self.update(self.dbh.get_database_defaults()) else: self.dbh = None # wclfile overrides all, so must be added last if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print(f"Reading wclfile: {args['wclfile']}") self.update(wclobj) self.set_names() # store the file name of the top-level submitwcl in dict: if 'submitwcl' not in self and 'wclfile' in args: self['submitwcl'] = args['wclfile'] if 'processingfw_dir' not in self and \ 'PROCESSINGFW_DIR' in os.environ: self['processingfw_dir'] = os.environ['PROCESSINGFW_DIR'] if 'current' not in self: self['current'] = collections.OrderedDict({ 'curr_block': '', 'curr_archive': '', #'curr_software': '', 'curr_site': '' }) self[pfwdefs.PF_WRAPNUM] = '0' self[pfwdefs.PF_BLKNUM] = '1' self[pfwdefs.PF_TASKNUM] = '0' self[pfwdefs.PF_JOBNUM] = '0' if pfwdefs.SW_BLOCKLIST in self: block_array = miscutils.fwsplit(self[pfwdefs.SW_BLOCKLIST]) if int(self[pfwdefs.PF_BLKNUM]) <= len(block_array): self.set_block_info()
class BasicWrapper(object): """ Basic wrapper class for running 3rd party executables within the DES framework. Parameters ---------- wclfile : str The name of the wcl file containing the instructions for running the executable. debug : int, optional The debug level. Default is 1. """ ###################################################################### def __init__(self, wclfile, debug=1): """ Read input wcl to initialize object """ self.input_filename = wclfile self.inputwcl = WCL() with open(wclfile, 'r') as infh: self.inputwcl.read(infh) self.debug = debug # note: WGB handled by file registration using OW_OUTPUTS_BY_SECT provdict = OrderedDict({ provdefs.PROV_USED: OrderedDict(), provdefs.PROV_WDF: OrderedDict() }) self.outputwcl = WCL({ 'wrapper': OrderedDict(), intgdefs.OW_PROV_SECT: provdict, intgdefs.OW_OUTPUTS_BY_SECT: {} }) self.last_num_derived = 0 self.last_num_meta = 0 self.curr_task = [] self.curr_exec = None ###################################################################### def determine_status(self): """ Check all task status' to determine wrapper status Returns ------- int The exit status of the wrapper, 0 is success. """ status = 0 execs = intgmisc.get_exec_sections(self.inputwcl, intgdefs.IW_EXEC_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: exec sections = %s" % execs, WRAPPER_OUTPUT_PREFIX) for ekey in sorted(execs.keys()): if ekey in self.outputwcl: if 'task_info' in self.outputwcl[ekey]: for taskd in self.outputwcl[ekey]['task_info'].values(): if 'status' in taskd: if taskd['status'] != 0: status = taskd['status'] else: if miscutils.fwdebug_check(3, "BASICWRAP_DEBUG"): miscutils.fwdebug_print( "WARN: Missing status in outputwcl task_info for %s" % ekey, WRAPPER_OUTPUT_PREFIX) status = 1 else: if miscutils.fwdebug_check(3, "BASICWRAP_DEBUG"): miscutils.fwdebug_print("WARN: Missing task_info in outputwcl for %s" % \ ekey, WRAPPER_OUTPUT_PREFIX) status = 1 else: status = 1 return status ###################################################################### def get_status(self): """ Return status of wrapper execution Returns ------- int The exit status of the wrapper, 0 is success. """ status = 1 if 'status' in self.outputwcl['wrapper']: status = self.outputwcl['wrapper']['status'] return status ###################################################################### def check_command_line(self, exsect, exwcl): """ Ensure that certain command line arguments are specified Parameters ---------- exsect : str Unused exsect : str Unused Returns ------- int Always returns 0 """ # pylint: disable=unused-argument self.start_exec_task('check_command_line') self.end_exec_task(0) return 0 ###################################################################### def create_command_line(self, execnum, exwcl): """ Create command line for the executable based on entries in the wcl file, handling hyphens appropriately Parameters ---------- execnum : int The number of the task, used only for debug messages. exwcl : str The WCL to use for constructing the command line Raises ------ KeyError If there is a missing execname in the WCL """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "execnum = '%s', exwcl = '%s'" % (execnum, exwcl), WRAPPER_OUTPUT_PREFIX) self.start_exec_task('create_command_line') cmdstr = "" if 'execname' in exwcl: cmdlist = [exwcl['execname']] if 'cmdline' in exwcl: posargs = {} # save positional args to insert later hyphen_type = 'allsingle' if 'cmd_hyphen' in exwcl: hyphen_type = exwcl['cmd_hyphen'] # loop through command line args for key, val in exwcl['cmdline'].items(): if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "key = '%s', val = '%s'" % (key, val), WRAPPER_OUTPUT_PREFIX) # replace any variables expandval = replfuncs.replace_vars(val, self.inputwcl)[0] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "expandval = '%s'" % (expandval), WRAPPER_OUTPUT_PREFIX) if key.startswith('_'): patmatch = re.match(r'_(\d+)', key) if patmatch: posargs[patmatch.group( 1)] = expandval # save for later else: raise ValueError( 'Invalid positional argument name: %s' % key) else: hyphen = intgmisc.get_cmd_hyphen(hyphen_type, key) if expandval == '_flag': cmdlist.append(" %s%s" % (hyphen, key)) else: cmdlist.append(" %s%s %s" % (hyphen, key, expandval)) # insert position sensitive arguments into specified location in argument list for k in sorted(posargs.iterkeys()): cmdlist.insert(int(k), "%s" % posargs[k]) # convert list of args into string if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("cmdlist = '%s'" % (cmdlist), WRAPPER_OUTPUT_PREFIX) cmdstr = ' '.join(cmdlist) else: print "Error: missing execname in wcl for exec #%d" % execnum print "exec wcl = %s" % exwcl raise KeyError('Missing execname in wcl for exec #%d' % execnum) self.curr_exec['cmdline'] = cmdstr self.end_exec_task(0) ###################################################################### def save_exec_version(self, exwcl): """ Run command with version flag and parse output for version information Parameters ---------- exwcl : str The WCL to use for constructing the command line Raises ------ Exception If any error is encountered """ # assumes exit code for version is 0 self.start_exec_task('save_exec_version') ver = None execname = exwcl['execname'] if 'version_flag' in exwcl and 'version_pattern' in exwcl: verflag = exwcl['version_flag'] verpat = exwcl['version_pattern'] cmd = "%s %s" % (execname, verflag) try: process = subprocess.Popen(shlex.split(cmd), shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except: (exc_type, exc_value) = sys.exc_info()[0:2] print "********************" print "Unexpected error: %s - %s" % (exc_type, exc_value) print "cmd> %s" % cmd print "Probably could not find %s in path" % shlex.split( cmd)[0] print "Check for mispelled execname in submit wcl or" print " make sure that the corresponding eups package is in the metapackage " print " and it sets up the path correctly" raise process.wait() out = process.communicate()[0] if process.returncode != 0: miscutils.fwdebug_print( "INFO: problem when running code to get version", WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "\t%s %s %s" % (execname, verflag, verpat), WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("\tcmd> %s" % cmd, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("\t%s" % out, WRAPPER_OUTPUT_PREFIX) ver = None else: # parse output with verpat try: vmatch = re.search(verpat, out) if vmatch: ver = vmatch.group(1) else: if miscutils.fwdebug_check(1, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("re.search didn't find version for exec %s" % \ execname, WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("\tcmd output=%s" % out, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("\tcmd verpat=%s" % verpat, WRAPPER_OUTPUT_PREFIX) except Exception as err: #print type(err) ver = None print "Error: Exception from re.match. Didn't find version: %s" % err raise else: miscutils.fwdebug_print( "INFO: Could not find version info for exec %s" % execname, WRAPPER_OUTPUT_PREFIX) ver = None if ver is not None: self.curr_exec['version'] = ver self.end_exec_task(0) ###################################################################### def create_output_dirs(self, exwcl): """ Make directories for output files Parameters ---------- exwcl : str The WCL to use for determining the output directories Raises ------ ValueError If a deprecated format is used """ self.start_exec_task('create_output_dirs') if intgdefs.IW_OUTPUTS in exwcl: for sect in miscutils.fwsplit(exwcl[intgdefs.IW_OUTPUTS]): sectkeys = sect.split('.') if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: sectkeys=%s" % sectkeys, WRAPPER_OUTPUT_PREFIX) if sectkeys[0] == intgdefs.IW_FILE_SECT: sectname = sectkeys[1] if sectname in self.inputwcl[intgdefs.IW_FILE_SECT]: if 'fullname' in self.inputwcl[ intgdefs.IW_FILE_SECT][sectname]: fullnames = self.inputwcl[ intgdefs.IW_FILE_SECT][sectname]['fullname'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: fullname = %s" % fullnames, WRAPPER_OUTPUT_PREFIX) if '$RNMLST{' in fullnames: raise ValueError( 'Deprecated $RNMLST in output filename') else: for fname in miscutils.fwsplit(fullnames, ','): outdir = os.path.dirname(fname) miscutils.coremakedirs(outdir) elif sectkeys[0] == intgdefs.IW_LIST_SECT: (_, _, filesect) = sect.split('.') ldict = self.inputwcl[intgdefs.IW_LIST_SECT][sectkeys[1]] # check list itself exists listname = ldict['fullname'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "\tINFO: Checking existence of '%s'" % listname, WRAPPER_OUTPUT_PREFIX) if not os.path.exists(listname): miscutils.fwdebug_print( "\tError: list '%s' does not exist." % listname, WRAPPER_OUTPUT_PREFIX) raise IOError("List not found: %s does not exist" % listname) # get list format: space separated, csv, wcl, etc listfmt = intgdefs.DEFAULT_LIST_FORMAT if intgdefs.LIST_FORMAT in ldict: listfmt = ldict[intgdefs.LIST_FORMAT] # read fullnames from list file fullnames = intgmisc.read_fullnames_from_listfile( listname, listfmt, ldict['columns']) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "\tINFO: fullnames=%s" % fullnames, WRAPPER_OUTPUT_PREFIX) for fname in fullnames[filesect]: outdir = os.path.dirname(fname) miscutils.coremakedirs(outdir) self.end_exec_task(0) ###################################################################### def run_exec(self): """ Run the generated command line Raises ------ OSError If there is an error running the command, but not if the command ran and returned a non zero exit status. """ self.start_exec_task('run_exec') cmdline = self.curr_exec['cmdline'] retcode = None procinfo = None miscutils.fwdebug_print("INFO: cmd = %s" % cmdline, WRAPPER_OUTPUT_PREFIX) print '*' * 70 sys.stdout.flush() try: (retcode, procinfo) = intgmisc.run_exec(cmdline) except OSError as exc: if exc.errno != errno.ENOENT: raise print "********************" (exc_type, exc_value, _) = sys.exc_info() print "%s - %s" % (exc_type, exc_value) print "cmd> %s" % cmdline print "Probably could not find %s in path" % cmdline.split()[0] print "Check for mispelled execname in submit wcl or" print " make sure that the corresponding eups package is in " print " the metapackage and it sets up the path correctly" raise sys.stdout.flush() if retcode != 0: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "\tINFO: cmd exited with non-zero exit code = %s" % retcode, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("\tINFO: failed cmd = %s" % cmdline, WRAPPER_OUTPUT_PREFIX) else: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "\tINFO: cmd exited with exit code = 0", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("END", WRAPPER_OUTPUT_PREFIX) print '*' * 70 self.curr_exec['status'] = retcode self.curr_exec['procinfo'] = procinfo self.end_exec_task(retcode) ###################################################################### def check_inputs(self, ekey): """ Check which input files/lists do not exist Parameters ---------- ekey : str The section of the WCL to look in. Returns ------- list The input files that were found. """ self.start_exec_task('check_inputs') existfiles = {} ins, _ = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey) for sect in ins: exists, missing = intgmisc.check_files(ins[sect]) existfiles[sect] = exists if missing: for mfile in missing: miscutils.fwdebug_print( "ERROR: input '%s' does not exist." % mfile, WRAPPER_OUTPUT_PREFIX) os.system("pwd") os.system("find . -type f") sys.exit(3) #raise IOError("At least one input file not found.") # if missing inputs, just abort self.end_exec_task(0) return existfiles ###################################################################### def get_optout(self, sect): """ Return whether file(s) are optional outputs Parameters ---------- sect : str The section of the WCL to use for finding file data Returns ------- bool Whether or not the files in the specified section are optional. Raises ------ KeyError If the specified `sect` does not exist. """ optout = False sectkeys = sect.split('.') if sectkeys[0] == intgdefs.IW_FILE_SECT: if intgdefs.IW_OUTPUT_OPTIONAL in self.inputwcl.get(sect): optout = miscutils.convertBool( self.inputwcl.get(sect)[intgdefs.IW_OUTPUT_OPTIONAL]) elif sectkeys[0] == intgdefs.IW_LIST_SECT: if intgdefs.IW_OUTPUT_OPTIONAL in self.inputwcl.get( "%s.%s" % (intgdefs.IW_FILE_SECT, sectkeys[2])): optout = miscutils.convertBool( self.inputwcl.get( "%s.%s" % (intgdefs.IW_FILE_SECT, sectkeys[2]))[intgdefs.IW_OUTPUT_OPTIONAL]) else: raise KeyError("Unknown data section %s" % sectkeys[0]) return optout ###################################################################### def check_outputs(self, ekey, exitcode): """ Check which output files were created, renaming if necessary Parameters ---------- ekey : str The exec section to use from the WCL exitcode : int The exit code of the executable run Returns ------- dict Dictionary containing the files that exist and some descriptor information """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Beg", WRAPPER_OUTPUT_PREFIX) self.start_exec_task('check_outputs') existfiles = {} missingfiles = {} _, outs = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey) for sect in outs: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: sect=%s" % sect, WRAPPER_OUTPUT_PREFIX) exists, missing = intgmisc.check_files(outs[sect]) existfiles.update({sect: exists}) if missing: optout = self.get_optout(sect) if optout: if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("\tINFO: optional output file '%s' does not exist (sect: %s)." % \ (missing, sect), WRAPPER_OUTPUT_PREFIX) elif exitcode != 0: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: skipping missing output due to non-zero exit code (%s: %s)" % (sect, missing), WRAPPER_OUTPUT_PREFIX) else: miscutils.fwdebug_print( "ERROR: Missing required output file(s) (%s:%s)" % (sect, missing), WRAPPER_OUTPUT_PREFIX) missingfiles.update({sect: missing}) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: existfiles=%s" % existfiles, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("INFO: missingfiles=%s" % missingfiles, WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: end", WRAPPER_OUTPUT_PREFIX) if missingfiles: status = 1 else: status = 0 self.end_exec_task(status) return existfiles ###################################################################### def save_provenance(self, execsect, exwcl, infiles, outfiles, exitcode): #pylint: disable=unbalanced-tuple-unpacking """ Create provenance wcl """ self.start_exec_task('save_provenance') if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Beg", WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: infiles = %s" % infiles, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("INFO: outfiles = %s" % outfiles, WRAPPER_OUTPUT_PREFIX) num_errs = 0 # convert probably fullnames in outexist to filename+compression new_outfiles = OrderedDict() for exlabel, exlist in outfiles.items(): if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: exlabel=%s exlist=%s" % (exlabel, exlist), WRAPPER_OUTPUT_PREFIX) newlist = [] for fullname in exlist: basename = miscutils.parse_fullname( fullname, miscutils.CU_PARSE_BASENAME) newlist.append(basename) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: newlist=%s" % (newlist), WRAPPER_OUTPUT_PREFIX) new_outfiles[exlabel] = newlist prov = self.outputwcl[intgdefs.OW_PROV_SECT] # used new_infiles = {} if infiles: all_infiles = [] for key, sublist in infiles.items(): new_infiles[key] = [] for fullname in sublist: basename = miscutils.parse_fullname( fullname, miscutils.CU_PARSE_BASENAME) all_infiles.append(basename) new_infiles[key].append(basename) prov[provdefs.PROV_USED][execsect] = provdefs.PROV_DELIM.join( all_infiles) # was_generated_by - done by PFW when saving metadata # was_derived_from if intgdefs.IW_DERIVATION in exwcl: wdf = prov[provdefs.PROV_WDF] derived_pairs = miscutils.fwsplit(exwcl[intgdefs.IW_DERIVATION], provdefs.PROV_DELIM) for dpair in derived_pairs: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: dpair = %s" % dpair, WRAPPER_OUTPUT_PREFIX) (parent_sect, child_sect) = miscutils.fwsplit(dpair, ':')[:2] if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: parent_sect = %s" % parent_sect, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "INFO: child_sect = %s" % child_sect, WRAPPER_OUTPUT_PREFIX) optout = self.get_optout(child_sect) #parent_key = miscutils.fwsplit(parent_sect, '.')[-1] #child_key = miscutils.fwsplit(child_sect, '.')[-1] if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): #miscutils.fwdebug_print("INFO: parent_key = %s" % parent_key, # WRAPPER_OUTPUT_PREFIX) #miscutils.fwdebug_print("INFO: child_key = %s" % child_key, # WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("INFO: optout = %s" % optout, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "INFO: new_outfiles.keys = %s" % new_outfiles.keys(), WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "INFO: new_outfiles = %s" % new_outfiles, WRAPPER_OUTPUT_PREFIX) if child_sect not in new_outfiles or new_outfiles[child_sect] is None or \ not new_outfiles[child_sect]: if optout: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: skipping missing optional output %s:%s" % (parent_sect, child_sect), WRAPPER_OUTPUT_PREFIX) elif exitcode != 0: if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: skipping missing output due to non-zero exit code %s:%s" % (parent_sect, child_sect), WRAPPER_OUTPUT_PREFIX) else: miscutils.fwdebug_print( "ERROR: Missing child output files in wdf tuple (%s:%s)" % (parent_sect, child_sect), WRAPPER_OUTPUT_PREFIX) num_errs += 1 else: self.last_num_derived += 1 key = 'derived_%d' % self.last_num_derived if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: key = %s" % key, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "INFO: before wdf = %s" % prov[provdefs.PROV_WDF], WRAPPER_OUTPUT_PREFIX) if parent_sect not in infiles and parent_sect not in new_outfiles: miscutils.fwdebug_print( "parent_sect = %s" % parent_sect, WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "infiles.keys() = %s" % infiles.keys(), WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "outfiles.keys() = %s" % outfiles.keys(), WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print( "used = %s" % exwcl[intgdefs.IW_INPUTS], WRAPPER_OUTPUT_PREFIX) miscutils.fwdebug_print("ERROR: Could not find parent files for %s" % \ (dpair), WRAPPER_OUTPUT_PREFIX) num_errs += 1 else: wdf[key] = OrderedDict() wdf[key][ provdefs.PROV_CHILDREN] = provdefs.PROV_DELIM.join( new_outfiles[child_sect]) if parent_sect in infiles: wdf[key][provdefs. PROV_PARENTS] = provdefs.PROV_DELIM.join( new_infiles[parent_sect]) elif parent_sect in new_outfiles: # this output was generated within same # program/wrapper from other output files parents = [] for outparent in outfiles[parent_sect]: parents.append( miscutils.parse_fullname( outparent, miscutils.CU_PARSE_FILENAME)) wdf[key][provdefs. PROV_PARENTS] = provdefs.PROV_DELIM.join( parents) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: after wdf = %s" % prov[provdefs.PROV_WDF], WRAPPER_OUTPUT_PREFIX) if not wdf: del prov[provdefs.PROV_WDF] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: End (num_errs = %d)" % num_errs, WRAPPER_OUTPUT_PREFIX) self.end_exec_task(num_errs) return prov ###################################################################### def write_outputwcl(self, outfilename=None): """ Write output wcl to file Parameters ---------- outfilename : str, optional The anem of the output wcl file to write. Default is ``None`` which indicates that the file name is stored in the inputwcl. """ if outfilename is None: outfilename = self.inputwcl['wrapper']['outputwcl'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("outfilename = %s" % outfilename, WRAPPER_OUTPUT_PREFIX) # create output wcl directory if needed outwcldir = miscutils.parse_fullname(outfilename, miscutils.CU_PARSE_PATH) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("outwcldir = %s" % outwcldir, WRAPPER_OUTPUT_PREFIX) miscutils.coremakedirs(outwcldir) with open(outfilename, 'w') as wclfh: self.outputwcl.write(wclfh, True) ###################################################################### def start_exec_task(self, name): """ Save start execution info Parameters ---------- name : str The name of the task. """ self.curr_task.append(name) self.curr_exec['task_info'][name] = {'start_time': time.time()} ###################################################################### def end_exec_task(self, status): """ Save end execution info Parameters ---------- status : int The exit status of the task. """ name = self.curr_task.pop() task_info = self.curr_exec['task_info'][name] task_info['status'] = status task_info['end_time'] = time.time() # just for human reading convenience task_info['walltime'] = task_info['end_time'] - task_info['start_time'] ###################################################################### def end_all_tasks(self, status): """ End all exec tasks in case of exiting nested tasks Parameters ---------- status : int The exit status for the tasks """ end_time = time.time() for name in reversed(self.curr_task): task_info = self.curr_exec['task_info'][name] task_info['status'] = status task_info['end_time'] = end_time # just for human reading convenience task_info[ 'walltime'] = task_info['end_time'] - task_info['start_time'] self.curr_task = [] ###################################################################### def save_outputs_by_section(self, ekey, outexist): """ Save full file names from outexist to outputs by section Parameters ---------- ekey : str The exec section of the WCL to use. outexist : dict Dictionary of the output files and their info. """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: before adding outputs_by_sect=%s" % \ (self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]), WRAPPER_OUTPUT_PREFIX) for exlabel, exlist in outexist.items(): if exlist: if exlabel not in self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]: self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT][exlabel] = {} if ekey not in self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel]: self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel][ekey] = [] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: adding to sect=%s: %s" % (exlabel, exlist), WRAPPER_OUTPUT_PREFIX) self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT][exlabel][ekey].extend(exlist) else: miscutils.fwdebug_print( "WARN: 0 output files in exlist for %s" % (exlabel), WRAPPER_OUTPUT_PREFIX) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: after adding outputs_by_sect=%s" % \ (self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]), WRAPPER_OUTPUT_PREFIX) ###################################################################### def cleanup(self): """ Remove intermediate files from wrapper execution """ self.outputwcl['wrapper']['cleanup_start'] = time.time() self.outputwcl['wrapper']['cleanup_end'] = time.time() ###################################################################### def run_wrapper(self): """ Complete workflow for the wrapper. This inscludes input checking execution, and output checking. """ if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: Begin", WRAPPER_OUTPUT_PREFIX) self.outputwcl['wrapper']['start_time'] = time.time() try: execs = intgmisc.get_exec_sections(self.inputwcl, intgdefs.IW_EXEC_PREFIX) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: exec sections = %s" % execs, WRAPPER_OUTPUT_PREFIX) for ekey, iw_exec in sorted(execs.items()): ow_exec = {'task_info': {}} self.outputwcl[ekey] = ow_exec self.curr_exec = ow_exec inputs = self.check_inputs(ekey) self.check_command_line(ekey, iw_exec) self.save_exec_version(iw_exec) self.create_command_line(ekey, iw_exec) self.create_output_dirs(iw_exec) self.run_exec() outexist = self.check_outputs(ekey, ow_exec['status']) self.save_outputs_by_section(ekey, outexist) self.save_provenance(ekey, iw_exec, inputs, outexist, ow_exec['status']) ow_exec['status'] = 0 self.cleanup() self.outputwcl['wrapper']['status'] = self.determine_status() except SystemExit as e: miscutils.fwdebug_print( "INFO: wrapper called sys.exit (%s). Halting." % str(e), WRAPPER_OUTPUT_PREFIX) self.outputwcl['wrapper']['status'] = int(str(e)) self.end_all_tasks(1) except Exception: (exc_type, exc_value, exc_trback) = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_trback, file=sys.stdout) self.outputwcl['wrapper']['status'] = 1 self.end_all_tasks(1) if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("INFO: outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]=%s" % \ (self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT]), WRAPPER_OUTPUT_PREFIX) for fsname, fssect in self.outputwcl[ intgdefs.OW_OUTPUTS_BY_SECT].items(): if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print( "INFO: making string for sect %s: %s" % (fsname, fssect), WRAPPER_OUTPUT_PREFIX) for exname, exlist in fssect.items(): self.outputwcl[intgdefs.OW_OUTPUTS_BY_SECT][fsname][ exname] = provdefs.PROV_DELIM.join(exlist) self.outputwcl['wrapper']['end_time'] = time.time() miscutils.fwdebug_print( "INFO: end - exit status = %s" % self.get_status(), WRAPPER_OUTPUT_PREFIX)