def endrun(configfile): """Program entry point. """ miscutils.fwdebug_print("BEG") config = pfwconfig.PfwConfig({'wclfile': configfile}) os.chdir('../uberctrl') retval = pfwdefs.PF_EXIT_SUCCESS if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \ config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'run': if pfwdefs.ATTEMPT_ARCHIVE_PATH not in config: print("Error: Cannot find %s in config" % pfwdefs.ATTEMPT_ARCHIVE_PATH) print("\tIt is needed for the mass copy of the run back to the " \ "home archive at the end of the run") return pfwdefs.PF_EXIT_FAILURE archpath = config.getfull(config[pfwdefs.ATTEMPT_ARCHIVE_PATH]) print("archpath =", archpath) # call archive transfer for target archive to home archive # check if using target archive target_info = None if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \ config.getfull(pfwdefs.USE_TARGET_ARCHIVE_OUTPUT).lower() != 'never': if pfwdefs.TARGET_ARCHIVE in config and \ config.getfull(pfwdefs.TARGET_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]: target_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.TARGET_ARCHIVE)] else: print("Error: cannot determine info for target archive") return pfwdefs.PF_EXIT_FAILURE else: print("Error: Asked to transfer outputs at end of run, but not using target archive") return pfwdefs.PF_EXIT_FAILURE home_info = None print(config[pfwdefs.HOME_ARCHIVE]) if pfwdefs.HOME_ARCHIVE in config and \ config[pfwdefs.HOME_ARCHIVE] in config[pfwdefs.SW_ARCHIVESECT]: home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)] # call transfer archive_transfer_utils.archive_copy_dir(target_info, home_info, config.getfull('archive_transfer'), archpath, config) if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): miscutils.fwdebug_print("Calling update_attempt_end: retval = %s" % retval) dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) dbh.end_task(config['task_id']['attempt'], retval, True) dbh.commit() dbh.close() miscutils.fwdebug_print("END - exiting with code %s" % retval) return retval
def endblock(configfile): """Program entry point. """ miscutils.fwdebug_print("BEG") config = pfwconfig.PfwConfig({'wclfile': configfile}) blkdir = config.getfull('block_dir') os.chdir(blkdir) if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \ config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'block': # check if using target archive target_info = None if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \ config[pfwdefs.USE_TARGET_ARCHIVE_OUTPUT].lower() != 'never': print(config[pfwdefs.TARGET_ARCHIVE]) if pfwdefs.TARGET_ARCHIVE in config and \ config[pfwdefs.TARGET_ARCHIVE] in config.getfull(pfwdefs.SW_ARCHIVESECT): target_info = config.getfull(pfwdefs.SW_ARCHIVESECT)[config.getfull(pfwdefs.TARGET_ARCHIVE)] else: print("Error: cannot determine info for target archive") return pfwdefs.PF_EXIT_FAILURE else: print("Error: Asked to transfer outputs at end of block, but not using target archive") return pfwdefs.PF_EXIT_FAILURE home_info = None print(config.getfull(pfwdefs.HOME_ARCHIVE)) if pfwdefs.HOME_ARCHIVE in config and \ config.getfull(pfwdefs.HOME_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]: home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)] # get file list of files to transfer if pfwdefs.PF_USE_DB_OUT in config and miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): dbh = pfwdb.PFWDB() filelist = dbh.get_run_filelist(config.getfull(pfwdefs.REQNUM), config.getfull(pfwdefs.UNITNAME), config.getfull(pfwdefs.ATTNUM), config.getfull(pfwdefs.PF_BLKNUM), config.getfull(pfwdefs.TARGET_ARCHIVE)) else: print("Error: Asked to transfer outputs at end of block, but not using database.") print(" Currently not supported.") return pfwdefs.PF_EXIT_FAILURE # call transfer archive_transfer_utils.archive_copy(target_info, home_info, config.getfull('archive_transfer'), filelist, config) miscutils.fwdebug_print("END - exiting with code %s" % pfwdefs.PF_EXIT_SUCCESS) return pfwdefs.PF_EXIT_SUCCESS
def print_job_info(argv): """ """ args = parse_args(argv) try: dbh = pfwdb.PFWDB(args['des_services'], args['section']) except configparser.NoSectionError: print( "Can't determine section of services file to get DB connection info" ) print( "\tEither set environment variable DES_DB_SECTION or add command-line option --section" ) sys.exit(1) # get the run info for run in args['runs']: print(run) reqnum, unitname, attnum = parse_attempt_str(run) attinfo = dbh.get_attempt_info(reqnum, unitname, attnum) if attinfo is None: print("No DB information about the processing attempt") print( "(Double check which DB querying vs which DB the attempt used)" ) else: if 'endtime' in attinfo and attinfo['endtime'] is not None: print("Note: run has finished with status %s" % attinfo['status']) # get the block info blockinfo = dbh.get_block_info(pfw_attempt_id=attinfo['id']) # get job info jobinfo = dbh.get_job_info({'pfw_attempt_id': attinfo['id']}) # index jobinfo by blknum job_byblk = pfwutils.index_job_info(jobinfo) jobwrapinfo = dbh.get_jobwrapper_info(id=attinfo['id']) jwrap_byjob, jwrap_bywrap = pfwutils.index_jobwrapper_info( jobwrapinfo) # get wrapper instance information wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attinfo['id']) wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo) for blknum in list(blockinfo.keys()): print_single_block(blknum, blockinfo[blknum], job_byblk, jwrap_byjob, wrap_byjob, args['verbose'])
def get_task_info(args): """Query the DB for task information. """ dbh = pfwdb.PFWDB(args['des_services'], args['section']) # get the run info attinfo = dbh.get_attempt_info(args['reqnum'], args['unitname'], args['attnum']) attid = attinfo['task_id'] print("attempt task id = ", attid) #sql = """WITH alltasks (id, parent_task_id, root_task_id, lvl, #name, info_table, start_time, end_time, status, exec_host,label) #AS (SELECT c.id, c.parent_task_id, c.root_task_id, 1, c.name, #c.info_table, c.start_time, c.end_time, c.status, c.exec_host,c.label #FROM task c WHERE c.id=%s UNION ALL SELECT r.id, r.parent_task_id, #r.root_task_id, a.lvl+1, r.name, r.info_table, r.start_time, #r.end_time, r.status, r.exec_host,r.label FROM alltasks a INNER #JOIN task r ON r.parent_task_id = a.id) SELECT id, parent_task_id, #root_task_id, lvl, name, info_table, start_time, end_time, status, #exec_host, label FROM alltasks ORDER BY lvl ASC""" % (attid) sql = "select * from task where root_task_id=%d order by id" % attid print(sql) curs = dbh.cursor() curs.execute(sql) desc = [d[0].lower() for d in curs.description] #print desc tasks = {} for line in curs: lined = dict(list(zip(desc, line))) lined['children'] = [] tasks[lined['id']] = lined return attid, tasks
def begrun(argv): """TODO: short summary. Performs steps executed on submit machine at beginning of processing attempt. """ pfw_dbh = None try: configfile = argv[0] config = pfwconfig.PfwConfig({'wclfile': configfile}) if miscutils.fwdebug_check(6, 'BEGRUN_DEBUG'): miscutils.fwdebug_print( 'use_home_archive_output = %s' % config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT)) if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): import processingfw.pfwdb as pfwdb pfw_dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) pfw_dbh.begin_task(config['task_id']['attempt'], True) # the three wcl files to copy to the home archive origwcl = config['origwcl'] expwcl = config['expwcl'] fullwcl = config['fullwcl'] # if not a dryrun and using a home archive for output if (config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never' and 'submit_files_mvmt' in config and (pfwdefs.PF_DRYRUN not in config or not miscutils.convertBool(config.getfull(pfwdefs.PF_DRYRUN)))): # get home archive info home_archive = config.getfull('home_archive') archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive] # load filemgmt class attempt_tid = config['task_id']['attempt'] filemgmt = pfwutils.pfw_dynam_load_class(pfw_dbh, config, attempt_tid, attempt_tid, "filemgmt", archive_info['filemgmt'], archive_info) # save file information filemgmt.register_file_data('wcl', [origwcl, expwcl, fullwcl], config['pfw_attempt_id'], attempt_tid, False, None, None) copy_files_home(config, archive_info, filemgmt) filemgmt.commit() if pfw_dbh is not None: print("Saving attempt's archive path into PFW tables...", end=' ') pfw_dbh.update_attempt_archive_path(config) pfw_dbh.commit() except Exception as exc: msg = "begrun: %s: %s" % (exc.__class__.__name__, str(exc)) if pfw_dbh is not None: Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'], config['task_id']['attempt'], msg, pfw_utils.PFW_DB_WARN, 'begrun.out', 0) send_failed_email(config, msg) raise except SystemExit as exc: msg = "begrun: SysExit=%s" % str(exc) if pfw_dbh is not None: Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'], config['task_id']['attempt'], msg, pfw_utils.PFW_DB_WARN, 'begrun.out', 0) send_failed_email(config, msg) raise
def create_master_list(config, configfile, modname, moddict, search_name, search_dict, search_type): """Create master data list for a module's list or file def. """ miscutils.fwdebug_print("BEG") if 'qouttype' in search_dict: qouttype = search_dict['qouttype'] else: qouttype = intgdefs.DEFAULT_QUERY_OUTPUT_FORMAT qoutfile = config.get_filename('qoutput', {pfwdefs.PF_CURRVALS: {'modulename': modname, 'searchname': search_name, 'suffix': qouttype}}) qlog = config.get_filename('qoutput', {pfwdefs.PF_CURRVALS: {'modulename': modname, 'searchname': search_name, 'suffix': 'out'}}) prog = None if 'exec' in search_dict: prog = search_dict['exec'] if 'args' not in search_dict: print("\t\tWarning: %s in module %s does not have args defined\n" % \ (search_name, modname)) args = "" else: args = search_dict['args'] elif 'query_fields' in search_dict: if 'processingfw_dir' in config: dirgenquery = config['processingfw_dir'] elif 'PROCESSINGFW_DIR' in os.environ: dirgenquery = os.environ['PROCESSINGFW_DIR'] else: miscutils.fwdie("Error: Could not determine base path for genquerydb.py", pfwdefs.PF_EXIT_FAILURE) prog = "%s/libexec/genquerydb.py" % (dirgenquery) args = "--qoutfile %s --qouttype %s --config %s --module %s --search %s" % \ (qoutfile, qouttype, configfile, modname, search_name) if not prog: print("\tWarning: %s in module %s does not have exec or %s defined" % \ (search_name, modname, pfwdefs.SW_QUERYFIELDS)) return search_dict['qoutfile'] = qoutfile search_dict['qlog'] = qlog prog = replfuncs.replace_vars_single(prog, config, {pfwdefs.PF_CURRVALS: {pfwdefs.SW_MODULESECT: modname}, 'searchobj': search_dict}) # handle both outputxml and outputfile args args = replfuncs.replace_vars_single(args, config, {pfwdefs.PF_CURRVALS: {pfwdefs.SW_MODULESECT: modname, 'outputxml': qoutfile, 'outputfile': qoutfile, 'qoutfile': qoutfile}, #intgdefs.REPLACE_VARS: True, 'searchobj': search_dict}) # get version for query code query_version = None if prog in config[pfwdefs.SW_EXEC_DEF]: query_version = pfwutils.get_version(prog, config[pfwdefs.SW_EXEC_DEF]) if search_type == pfwdefs.SW_LISTSECT: datatype = 'L' elif search_type == pfwdefs.SW_FILESECT: datatype = 'F' else: datatype = search_type[0].upper() # call code query_tid = None if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): pfw_dbh = pfwdb.PFWDB() query_tid = pfw_dbh.insert_data_query(config, modname, datatype, search_name, prog, args, query_version) #pfw_dbh.close() else: pfw_dbh = None cwd = os.getcwd() print("\t\tCalling code to create master list for obj %s in module %s" % \ (search_name, modname)) print("\t\t", prog, args) print("\t\tSee output in %s/%s" % (cwd, qlog)) print("\t\tSee master list will be in %s/%s" % (cwd, qoutfile)) print("\t\tCreating master list - start ", time.time()) cmd = "%s %s" % (prog, args) exitcode = None try: exitcode = pfwutils.run_cmd_qcf(cmd, qlog, query_tid, os.path.basename(prog), config.getfull(pfwdefs.PF_USE_QCF), pfw_dbh, config['pfw_attempt_id']) #exitcode = pfwutils.run_cmd_qcf(cmd, qlog, query_tid, os.path.basename(prog), # 5000, config.getfull(pfwdefs.PF_USE_QCF)) except: print("******************************") print("Error: ") (extype, exvalue, trback) = sys.exc_info() print("******************************") traceback.print_exception(extype, exvalue, trback, file=sys.stdout) exitcode = pfwdefs.PF_EXIT_FAILURE print("\t\tCreating master list - end ", time.time()) sys.stdout.flush() if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): pfw_dbh = pfwdb.PFWDB() pfw_dbh.end_task(query_tid, exitcode, True) pfw_dbh.close() if exitcode != 0: miscutils.fwdie("Error: problem creating master list (exitcode = %s)" % (exitcode), exitcode) miscutils.fwdebug_print("END")
def begblock(argv): """Program entry point. """ if argv == None: argv = sys.argv configfile = argv[0] config = pfwconfig.PfwConfig({'wclfile': configfile}) config.set_block_info() blknum = config[pfwdefs.PF_BLKNUM] blkdir = config.getfull('block_dir') os.chdir(blkdir) (exists, submit_des_services) = config.search('submit_des_services') if exists and submit_des_services is not None: os.environ['DES_SERVICES'] = submit_des_services (exists, submit_des_db_section) = config.search('submit_des_db_section') if exists and submit_des_db_section is not None: os.environ['DES_DB_SECTION'] = submit_des_db_section dbh = None blktid = -1 if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'): miscutils.fwdebug_print("blknum = %s" % (config[pfwdefs.PF_BLKNUM])) if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh = pfwdb.PFWDB(submit_des_services, submit_des_db_section) dbh.insert_block(config) blktid = config['task_id']['block'][str(blknum)] config['task_id']['begblock'] = dbh.create_task( name='begblock', info_table=None, parent_task_id=blktid, root_task_id=int(config['task_id']['attempt']), label=None, do_begin=True, do_commit=True) try: modulelist = miscutils.fwsplit( config.getfull(pfwdefs.SW_MODULELIST).lower()) modules_prev_in_list = {} joblist = {} parlist = OrderedDict() masterdata = OrderedDict() filelist = {'infiles': {}, 'outfiles': {}} for num, modname in enumerate(modulelist): print("XXXXXXXXXXXXXXXXXXXX %s XXXXXXXXXXXXXXXXXXXX" % modname) if modname not in config[pfwdefs.SW_MODULESECT]: miscutils.fwdie( "Error: Could not find module description for module %s\n" % (modname), pfwdefs.PF_EXIT_FAILURE) moddict = config[pfwdefs.SW_MODULESECT][modname] runqueries(config, configfile, modname, modules_prev_in_list) pfwblock.read_master_lists(config, modname, masterdata, modules_prev_in_list) (infsect, outfsect) = pfwblock.get_datasect_types(config, modname) pfwblock.fix_master_lists(config, modname, masterdata, outfsect) if pfwdefs.PF_NOOP not in moddict or not miscutils.convertBool( moddict[pfwdefs.PF_NOOP]): pfwblock.create_fullnames(config, modname, masterdata) if miscutils.fwdebug_check( 9, 'PFWBLOCK_DEBUG') and modname in masterdata: with open('%s-masterdata.txt' % modname, 'w') as fh: miscutils.pretty_print_dict(masterdata[modname], fh) pfwblock.add_file_metadata(config, modname) sublists = pfwblock.create_sublists(config, modname, masterdata) if sublists is not None: if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'): miscutils.fwdebug_print("sublists.keys() = %s" % (list(sublists.keys()))) loopvals = pfwblock.get_wrapper_loopvals(config, modname) wrapinst = pfwblock.create_wrapper_inst( config, modname, loopvals) wcnt = 1 for winst in list(wrapinst.values()): if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'): miscutils.fwdebug_print("winst %d - BEG" % wcnt) pfwblock.assign_data_wrapper_inst(config, modname, winst, masterdata, sublists, infsect, outfsect) pfwblock.finish_wrapper_inst(config, modname, winst, outfsect) tempfiles = pfwblock.create_module_wrapper_wcl( config, modname, winst) for fl in tempfiles['infiles']: if fl not in list(filelist['infiles'].keys()): filelist['infiles'][fl] = num for fl in tempfiles['outfiles']: filelist['outfiles'][fl] = num #filelist['infiles'] += tempfiles['infiles'] #filelist['outfiles'] += tempfiles['outfiles'] pfwblock.divide_into_jobs(config, modname, winst, joblist, parlist) if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'): miscutils.fwdebug_print("winst %d - %s - END" % (wcnt, etime - stime)) wcnt += 1 modules_prev_in_list[modname] = True if miscutils.fwdebug_check( 9, 'PFWBLOCK_DEBUG') and modname in masterdata: with open('%s-masterdata.txt' % modname, 'w') as fh: miscutils.pretty_print_dict(masterdata[modname], fh) scriptfile = pfwblock.write_runjob_script(config) intersect = list( set(filelist['infiles'].keys()) & set(filelist['outfiles'].keys())) finallist = [] for fl in list(filelist['infiles'].keys()): if fl not in intersect: finallist.append(fl) else: if filelist['infiles'][fl] <= filelist['outfiles'][fl]: raise Exception( 'Input file %s requested before it is generated.' % (fl)) if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): missingfiles = dbh.check_files(config, finallist) if len(missingfiles) > 0: raise Exception( "The following input files cannot be found in the archive:" + ",".join(missingfiles)) miscutils.fwdebug_print("Creating job files - BEG") for jobkey, jobdict in sorted(joblist.items()): jobdict['jobnum'] = pfwutils.pad_jobnum(config.inc_jobnum()) jobdict['jobkeys'] = jobkey jobdict['numexpwrap'] = len(jobdict['tasks']) if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'): miscutils.fwdebug_print("jobnum = %s, jobkey = %s:" % (jobkey, jobdict['jobnum'])) jobdict['tasksfile'] = write_workflow_taskfile( config, jobdict['jobnum'], jobdict['tasks']) if (len(jobdict['inlist']) > 0 and config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never' and 'submit_files_mvmt' in config and (pfwdefs.PF_DRYRUN not in config or not miscutils.convertBool( config.getfull(pfwdefs.PF_DRYRUN)))): # get home archive info home_archive = config.getfull('home_archive') archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive] # load filemgmt class attempt_tid = config['task_id']['attempt'] filemgmt = pfwutils.pfw_dynam_load_class( dbh, config, attempt_tid, attempt_tid, "filemgmt", archive_info['filemgmt'], archive_info) # save file information filemgmt.register_file_data('list', jobdict['inlist'], config['pfw_attempt_id'], attempt_tid, False, None, None) pfwblock.copy_input_lists_home_archive(config, filemgmt, archive_info, jobdict['inlist']) filemgmt.commit() jobdict['inputwcltar'] = pfwblock.tar_inputfiles( config, jobdict['jobnum'], jobdict['inwcl'] + jobdict['inlist']) if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh.insert_job(config, jobdict) pfwblock.write_jobwcl(config, jobkey, jobdict) if ('glidein_use_wall' in config and miscutils.convertBool( config.getfull('glidein_use_wall')) and 'jobwalltime' in config): jobdict['wall'] = config['jobwalltime'] miscutils.fwdebug_print("Creating job files - END") numjobs = len(joblist) if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh.update_block_numexpjobs(config, numjobs) #if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'): # miscutils.fwdebug_print("inputfiles: %s, %s" % (type(inputfiles), inputfiles)) # miscutils.fwdebug_print("outputfiles: %s, %s" % (type(outputfiles), outputfiles)) #files2stage = set(inputfiles) - set(outputfiles) #pfwblock.stage_inputs(config, files2stage) #if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \ # config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT).lower() == 'block': # config['block_outputlist'] = 'potential_outputfiles.list' # pfwblock.write_output_list(config, outputfiles) dagfile = config.get_filename('jobdag') pfwblock.create_jobmngr_dag(config, dagfile, scriptfile, joblist) except: retval = pfwdefs.PF_EXIT_FAILURE with open(configfile, 'w') as cfgfh: config.write( cfgfh) # save config, have updated jobnum, wrapnum, etc if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh.end_task(config['task_id']['begblock'], retval, True) dbh.end_task(blktid, retval, True) raise # save config, have updated jobnum, wrapnum, etc with open(configfile, 'w') as cfgfh: config.write(cfgfh) (exists, dryrun) = config.search(pfwdefs.PF_DRYRUN) if exists and miscutils.convertBool(dryrun): retval = pfwdefs.PF_EXIT_DRYRUN else: retval = pfwdefs.PF_EXIT_SUCCESS if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh.end_task(config['task_id']['begblock'], retval, True) miscutils.fwdebug_print("END - exiting with code %s" % retval) return retval
def blockpost(argv=None): """Program entry point. """ if argv is None: argv = sys.argv # open file to catch error messages about command line debugfh = open('blockpost.out', 'w') sys.stdout = debugfh sys.stderr = debugfh print(' '.join(argv)) # print command line for debugging print("running on %s" % (socket.gethostname())) if len(argv) != 3: print('Usage: blockpost.py configfile retval') debugfh.close() return pfwdefs.PF_EXIT_FAILURE configfile = argv[1] retval = int(argv[2]) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("configfile = %s" % configfile) miscutils.fwdebug_print("retval = %s" % retval) # read sysinfo file config = pfwconfig.PfwConfig({'wclfile': configfile}) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("done reading config file") blockname = config.getfull('blockname') blkdir = config.getfull('block_dir') # now that have more information, can rename output file miscutils.fwdebug_print("getting new_log_name") new_log_name = config.get_filename( 'block', {pfwdefs.PF_CURRVALS: { 'flabel': 'blockpost', 'fsuffix': 'out' }}) new_log_name = "%s/%s" % (blkdir, new_log_name) miscutils.fwdebug_print("new_log_name = %s" % new_log_name) debugfh.close() os.chmod('blockpost.out', 0o666) os.rename('blockpost.out', new_log_name) debugfh = open(new_log_name, 'a+') sys.stdout = debugfh sys.stderr = debugfh os.chdir(blkdir) log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval]) dryrun = config.getfull(pfwdefs.PF_DRYRUN) run = config.getfull('run') attid = config['pfw_attempt_id'] reqnum = config.getfull(pfwdefs.REQNUM) unitname = config.getfull(pfwdefs.UNITNAME) attnum = config.getfull(pfwdefs.ATTNUM) blknum = int(config.getfull(pfwdefs.PF_BLKNUM)) blktid = None msg2 = "" dbh = None job_byblk = {} wrap_byjob = {} wrap_bymod = {} wrapinfo = {} jobinfo = {} failedwraps = {} whyfailwraps = {} # mod failures for other modname, shouldn't happen usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)) verify_files = miscutils.convertBool(config.getfull('verify_files')) verify_status = 0 if verify_files and not usedb: print('Skipping file verification due to lack of database connection') if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): sem = None try: miscutils.fwdebug_print("Connecting to DB") dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) if verify_files: curs = dbh.cursor() curs.execute("select root from ops_archive where name='%s'" % (config.getfull('home_archive'))) rows = curs.fetchall() if rows is None or len(rows) != 1: raise Exception( "Invalid archive name (%s). Found %s rows in ops_archive" % (config.getfull('home_archive'), len(rows))) root = rows[0][0] if not os.path.isdir(root): print( "Cannot read archive root directory:%s This program must be run on an NCSA machine with access to the archive storage system." % (config.getfull('home_archive'))) sem = dbsem.DBSemaphore( 'verify_files_10', None, config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) print( "\n\nVerifying archive file sizes on disk (0 is success)") verify_status = cu.compare( dbh=dbh, archive=config.getfull('home_archive'), pfwid=attid, filesize=True, md5sum=False, quick=True, debug=False, script=False, verbose=False, silent=True) if sem is not None: del sem print(" Verification of files returned status %i" % (verify_status)) if verify_status != 0: print( " This indicates that one or more files do not have the correct file size (based on DB entries). Run" ) print( "\n compare_db.py --des_services %s --section %s --archive %s --pfwid %i --filesize --verbose" % (config.getfull('submit_des_services'), config.getfull('submit_des_db_section'), config.getfull('home_archive'), int(attid))) print("\n to see the details.") if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)): import qcframework.qcfdb as qcfdb qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) print("\n\nChecking non-job block task status from task table in DB (%s is success)" % \ pfwdefs.PF_EXIT_SUCCESS) num_bltasks_failed = 0 bltasks = {} blktid = None if ('block' in config['task_id'] and str(blknum) in config['task_id']['block']): blktid = int(config['task_id']['block'][str(blknum)]) miscutils.fwdebug_print("Getting block task info from DB") start_time = time.time() bltasks = dbh.get_block_task_info(blktid) end_time = time.time() miscutils.fwdebug_print( "Done getting block task info from DB (%s secs)" % (end_time - start_time)) for bltdict in list(bltasks.values()): print("Block status = ", bltdict['status']) if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN: print("setting return value to dryrun") retval = bltdict['status'] elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS: num_bltasks_failed += 1 msg2 += "\t%s" % (bltdict['name']) if bltdict['label'] is not None: msg2 += " - %s" % (bltdict['label']) msg2 += " failed\n" if bltdict['name'] == 'begblock': # try to read the begblock.out and begblock.err files print( "Trying to get begblock.out and begblock.err") msg2 += get_subblock_output("begblock") # try to get QCF messages (especially from query codes) begblock_tid = int(config['task_id']['begblock']) sql = "select id from task where parent_task_id=%i and status!=0" % ( begblock_tid) curs = dbh.cursor() curs.execute(sql) res = curs.fetchall() msg2 += "\n===== QCF Messages =====\n" msg2 += "\n begblock\n" wrapids = [blktid, begblock_tid] for r in res: wrapids.append(r[0]) wrapmsg = {} if qdbh is not None: miscutils.fwdebug_print( "Querying QCF messages") start_time = time.time() wrapmsg = qdbh.get_qcf_messages_for_wrappers( wrapids) end_time = time.time() miscutils.fwdebug_print( "Done querying QCF messages (%s secs)" % (end_time - start_time)) miscutils.fwdebug_print("wrapmsg = %s" % wrapmsg) if len(wrapmsg) == 0: msg2 += " No QCF messages\n" else: for msgs in list(wrapmsg.values()): for m in msgs: msg2 += " " + m['message'] + "\n" retval = pfwdefs.PF_EXIT_FAILURE if retval != pfwdefs.PF_EXIT_DRYRUN: print("\n\nChecking job status from pfw_job table in DB (%s is success)" % \ pfwdefs.PF_EXIT_SUCCESS) miscutils.fwdebug_print("Getting job info from DB") start_time = time.time() jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid}) end_time = time.time() miscutils.fwdebug_print( "Done getting job info from DB (%s secs)" % (end_time - start_time)) miscutils.fwdebug_print("Getting wrapper info from DB") start_time = time.time() wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid, pfw_block_task_id=blktid) end_time = time.time() miscutils.fwdebug_print( "Done getting wrapper info from DB (%s secs)" % (end_time - start_time)) else: msg = "Could not find task id for block %s in config.des" % blockname print("Error:", msg) if 'attempt' in config['task_id']: miscutils.fwdebug_print("Saving pfw message") start_time = time.time() Messaging.pfw_message(dbh, attid, config['task_id']['attempt'], msg, pfw_utils.PFW_DB_INFO, 'blockpost.out', 0) end_time = time.time() miscutils.fwdebug_print( "Done saving pfw message (%s secs)" % (end_time - start_time)) print("all the task ids:", config['task_id']) archive = None if pfwdefs.HOME_ARCHIVE in config: archive = config.getfull(pfwdefs.HOME_ARCHIVE) logfullnames = dbh.get_fail_log_fullnames(attid, archive) dbh.close() print("len(jobinfo) = ", len(jobinfo)) print("len(wrapinfo) = ", len(wrapinfo)) job_byblk = pfwutils.index_job_info(jobinfo) print("blktid: ", blktid) print("job_byblk:", job_byblk) if blktid not in job_byblk: print("Warn: could not find jobs for block %s" % blknum) print(" This is ok if attempt died before jobs ran") print(" block task_ids in job_byblk:" % list(job_byblk.keys())) else: wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo) #print "wrap_byjob:", wrap_byjob #print "wrap_bymod:", wrap_bymod for jobtid, jobdict in sorted(job_byblk[blktid].items()): failedwraps[jobtid] = [] whyfailwraps[jobtid] = [] jobkeys = "" # don't print out successful wrappers if jobtid in wrap_byjob and jobdict[ 'status'] == pfwdefs.PF_EXIT_SUCCESS: continue if jobdict['jobkeys'] is not None: jobkeys = jobdict['jobkeys'] #print "jobkeys = ", jobkeys, type(jobkeys) submit_job_path = "%s/B%02d-%s/%04d" % ( config.getfull('work_dir'), int(config.getfull('blknum')), config.getfull('blockname'), int(jobdict['jobnum'])) msg2 += "\n\t%s (%s) " % (pfwutils.pad_jobnum( jobdict['jobnum']), jobkeys) if jobtid not in wrap_byjob: msg2 += "\tNo wrapper instances" else: #print "wrapnum in job =", wrap_byjob[jobtid].keys() maxwrap = max(wrap_byjob[jobtid].keys()) #print "maxwrap =", maxwrap modname = wrap_byjob[jobtid][maxwrap]['modname'] #print "modname =", modname msg2 += "%d/%s %s" % (len( wrap_byjob[jobtid]), jobdict['expect_num_wrap'], modname) # determine wrappers for this job without success exit for wrapnum, wdict in list(wrap_byjob[jobtid].items()): if wdict['status'] is None or wdict[ 'status'] != pfwdefs.PF_EXIT_SUCCESS: if wdict['modname'] == modname: failedwraps[jobtid].append(wrapnum) else: whyfailwraps[jobtid].append(wrapnum) if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE: msg2 += " - FAIL - EUPS setup failure" retval = jobdict['status'] elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR: msg2 += " - FAIL - Condor/Globus failure" retval = jobdict['status'] elif jobdict['status'] is None: msg2 += " - FAIL - NULL status" retval = pfwdefs.PF_EXIT_FAILURE elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS: msg2 += " - FAIL - Non-zero status" retval = jobdict['status'] if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS: msg2 += "\n\t\t%s/runjob.out " % (submit_job_path) msg2 += '\n' # print pfw_messages if 'message' in jobdict: print(jobdict['message']) for msgdict in sorted(jobdict['message'], key=lambda k: k['message_time']): level = int(msgdict['message_lvl']) levelstr = 'info' if level == pfwdefs.PFWDB_MSG_WARN: levelstr = 'WARN' elif level == pfwdefs.PFWDB_MSG_ERROR: levelstr = 'ERROR' msg2 += "\t\t%s - %s\n" % ( levelstr, msgdict['message'].replace( '\n', '\n\t\t\t')) if jobtid in wrap_byjob: # print log file name for failed/unfinished wrappers for wrapnum in failedwraps[jobtid]: wrapdict = wrap_byjob[jobtid][wrapnum] if wrapdict['log'] in logfullnames: msg2 += "\t\t%s - %s\n" % ( wrapnum, logfullnames[wrapdict['log']]) else: msg2 += "\t\t%s - Could not find log in archive (%s)\n" % ( wrapnum, wrapdict['log']) wrapmsg = get_qcf_messages(qdbh, config, [wrapdict['task_id']]) msg2 = print_qcf_messages(config, wrapdict, wrapmsg, msg2) msg2 += '\n' # If weirdness happened in run, print a message if len(whyfailwraps[jobtid]) > 0: msg2 += "\n*** Contact framework developers. Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n" msg2 += "\t%s\n" % ','.join(whyfailwraps[jobtid]) except Exception as exc: if sem is not None: del sem msg2 += "\n\nEncountered error trying to gather status information for email." msg2 += "\nCheck output for blockpost for further details." print( "\n\nEncountered error trying to gather status information for email" ) print("%s: %s" % (exc.__class__.__name__, str(exc))) (extype, exvalue, trback) = sys.exc_info() traceback.print_exception(extype, exvalue, trback, file=sys.stdout) retval = pfwdefs.PF_EXIT_FAILURE retval = int(retval) + verify_status print("before email retval =", retval) when_to_email = 'run' if 'when_to_email' in config: when_to_email = config.getfull('when_to_email').lower() if miscutils.convertBool(dryrun): if when_to_email != 'never': print("dryrun = ", dryrun) print("Sending dryrun email") if retval == pfwdefs.PF_EXIT_DRYRUN: msg1 = "%s: In dryrun mode, block %s has finished successfully." % ( run, blockname) else: msg1 = "%s: In dryrun mode, block %s has failed." % ( run, blockname) send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending dryrun email") print("retval = ", retval) retval = pfwdefs.PF_EXIT_DRYRUN elif retval: if when_to_email != 'never': print("Sending block failed email\n") msg1 = "%s: block %s has failed." % (run, blockname) send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending failed email") print("retval = ", retval) elif retval == pfwdefs.PF_EXIT_SUCCESS: if when_to_email == 'block': msg1 = "%s: block %s has finished successfully." % (run, blockname) msg2 = "" print("Sending success email\n") send_email(config, blockname, retval, "", msg1, msg2) elif when_to_email == 'run': numblocks = len( miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) if int(config[pfwdefs.PF_BLKNUM]) == numblocks: msg1 = "%s: run has finished successfully." % (run) msg2 = "" print("Sending success email\n") send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending run email because not last block") print("retval = ", retval) else: print("Not sending success email") print("retval = ", retval) else: print("Not sending email") print("retval = ", retval) # Store values in DB and hist file dbh = None if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) if blktid is not None: print("Updating end of block task", blktid) dbh.end_task(blktid, retval, True) else: print("Could not update end of block task without block task id") if retval != pfwdefs.PF_EXIT_SUCCESS: print("Updating end of attempt", config['task_id']['attempt']) dbh.end_task(config['task_id']['attempt'], retval, True) dbh.commit() dbh.close() print("before next block retval = ", retval) if retval == pfwdefs.PF_EXIT_SUCCESS: # Get ready for next block config.inc_blknum() with open(configfile, 'w') as cfgfh: config.write(cfgfh) print("new blknum = ", config[pfwdefs.PF_BLKNUM]) print("number of blocks = ", len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))) miscutils.fwdebug_print("Returning retval = %s (%s)" % (retval, type(retval))) miscutils.fwdebug_print("END") debugfh.close() return int(retval)
def summary(argv=None): """ Create and send summary email """ if argv is None: argv = sys.argv debugfh = open('summary.out', 'w') sys.stdout = debugfh sys.stderr = debugfh print(' '.join(argv)) if len(argv) < 2: print("Usage: summary configfile status") debugfh.close() return pfwdefs.PF_EXIT_FAILURE if len(argv) == 3: status = argv[2] # dagman always exits with 0 or 1 if status == 1: status = pfwdefs.PF_EXIT_FAILURE else: print("summary: Missing status value") status = None # read sysinfo file config = pfwconfig.PfwConfig({'wclfile': argv[1]}) log_pfw_event(config, 'process', 'mngr', 'j', ['posttask', status]) msgstr = "" msg1 = "" subject = "" if not status: msg1 = f"Processing finished with unknown results.\n{msgstr}" elif pfwdefs.PF_DRYRUN in config and miscutils.convertBool( config.getfull(pfwdefs.PF_DRYRUN)): msg1 = f"Processing ended after DRYRUN\n{msgstr}" if int(status) == pfwdefs.PF_EXIT_SUCCESS: msg1 = "Processing has successfully completed.\n" subject = "" else: print(f"status = '{status}'") print("type(status) =", type(status)) print(f"SUCCESS = '{pfwdefs.PF_EXIT_SUCCESS}'") print("type(SUCCESS) =", type(pfwdefs.PF_EXIT_SUCCESS)) msg1 = f"Processing aborted with status {status}.\n" subject = "" pfwemail.send_email(config, "processing", status, subject, msg1, '') if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) dbh.update_attempt_end_vals(config['pfw_attempt_id'], status) print(f"summary: status = '{status}'") print("summary:", msg1) print("summary: End") debugfh.close() return status
def jobpre(argv=None): """ Program entry point """ if argv is None: argv = sys.argv #debugfh = tempfile.NamedTemporaryFile(prefix='jobpre_', dir='.', delete=False) default_log = f"jobpre_{random.randint(1,10000000):08d}.out" debugfh = open(default_log, 'w') tmpfn = debugfh.name outorig = sys.stdout errorig = sys.stderr sys.stdout = debugfh sys.stderr = debugfh print(' '.join(argv)) # command line for debugging print(os.getcwd()) if len(argv) < 3: print("Usage: jobpre configfile jobnum") debugfh.close() return pfwdefs.PF_EXIT_FAILURE configfile = sys.argv[1] jobnum = sys.argv[2] # could also be uberctrl # read wcl file config = pfwconfig.PfwConfig({'wclfile': configfile}) blockname = config.getfull('blockname') blkdir = config.get('block_dir') tjpad = pfwutils.pad_jobnum(jobnum) # now that have more information, can rename output file miscutils.fwdebug_print("getting new_log_name") new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM:jobnum, 'flabel': 'jobpre', 'fsuffix':'out'}}) new_log_name = f"{blkdir}/{tjpad}/{new_log_name}" miscutils.fwdebug_print(f"new_log_name = {new_log_name}") debugfh.close() sys.stdout = outorig sys.stderr = errorig os.chmod(tmpfn, 0o666) os.rename(tmpfn, new_log_name) dbh = None if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): if config.dbh is None: dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) else: dbh = config.dbh if 'use_qcf' in config and config['use_qcf']: debugfh = Messaging.Messaging(new_log_name, 'jobpre.py', config['pfw_attempt_id'], dbh=dbh, mode='a+', usedb=dbh is not None) else: debugfh = open(new_log_name, 'a+') sys.stdout = debugfh sys.stderr = debugfh if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): ctstr = dbh.get_current_timestamp_str() dbh.update_job_info(config, tjpad, {'condor_submit_time': ctstr, 'target_submit_time': ctstr}) log_pfw_event(config, blockname, tjpad, 'j', ['pretask']) miscutils.fwdebug_print("jobpre done") debugfh.close() sys.stdout = outorig sys.stderr = errorig return pfwdefs.PF_EXIT_SUCCESS
def jobpost(argv=None): """Performs steps needed after a pipeline job. """ condor2db = {'jobid': 'condor_job_id', 'csubmittime': 'condor_submit_time', 'gsubmittime': 'target_submit_time', 'starttime': 'condor_start_time', 'endtime': 'condor_end_time'} if argv is None: argv = sys.argv debugfh = tempfile.NamedTemporaryFile(mode='w+', prefix='jobpost_', dir='.', delete=False) tmpfn = debugfh.name sys.stdout = debugfh sys.stderr = debugfh miscutils.fwdebug_print("temp log name = %s" % tmpfn) print('cmd>', ' '.join(argv)) # print command line for debugging if len(argv) < 7: # open file to catch error messages about command line print('Usage: jobpost.py configfile block jobnum inputtar outputtar retval') debugfh.close() return pfwdefs.PF_EXIT_FAILURE configfile = argv[1] blockname = argv[2] jobnum = argv[3] inputtar = argv[4] outputtar = argv[5] retval = pfwdefs.PF_EXIT_FAILURE if len(argv) == 7: retval = int(sys.argv[6]) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("configfile = %s" % configfile) miscutils.fwdebug_print("block = %s" % blockname) miscutils.fwdebug_print("jobnum = %s" % jobnum) miscutils.fwdebug_print("inputtar = %s" % inputtar) miscutils.fwdebug_print("outputtar = %s" % outputtar) miscutils.fwdebug_print("retval = %s" % retval) # read sysinfo file config = pfwconfig.PfwConfig({'wclfile': configfile}) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("done reading config file") # now that have more information, rename output file if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("before get_filename") blockname = config.getfull('blockname') blkdir = config.getfull('block_dir') tjpad = pfwutils.pad_jobnum(jobnum) os.chdir("%s/%s" % (blkdir, tjpad)) new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM: jobnum, 'flabel': 'jobpost', 'fsuffix': 'out'}}) new_log_name = "%s" % (new_log_name) miscutils.fwdebug_print("new_log_name = %s" % new_log_name) debugfh.close() os.chmod(tmpfn, 0o666) os.rename(tmpfn, new_log_name) debugfh = open(new_log_name, 'a+') sys.stdout = debugfh sys.stderr = debugfh dbh = None if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) # get job information from the job stdout if exists (tjobinfo, tjobinfo_task) = parse_job_output(config, jobnum, dbh, retval) if dbh and len(tjobinfo) > 0: print("tjobinfo: ", tjobinfo) dbh.update_tjob_info(config['task_id']['job'][jobnum], tjobinfo) # get job information from the condor job log logfilename = 'runjob.log' if os.path.exists(logfilename) and os.path.getsize(logfilename) > 0: # if made it to submitting/running jobs try: # update job info in DB from condor log print("Updating job info in DB from condor log") condorjobinfo = pfwcondor.parse_condor_user_log(logfilename) if len(list(condorjobinfo.keys())) > 1: print("More than single job in job log") j = list(condorjobinfo.keys())[0] cjobinfo = condorjobinfo[j] djobinfo = {} for ckey, dkey in list(condor2db.items()): if ckey in cjobinfo: djobinfo[dkey] = cjobinfo[ckey] print(djobinfo) dbh.update_job_info(config, cjobinfo['jobname'], djobinfo) if 'holdreason' in cjobinfo and cjobinfo['holdreason'] is not None: msg = "Condor HoldReason: %s" % cjobinfo['holdreason'] print(msg) if dbh: Messaging.pfw_message(dbh, config['pfw_attempt_id'], config['task_id']['job'][jobnum], msg, pfwdefs.PFWDB_MSG_WARN) if 'abortreason' in cjobinfo and cjobinfo['abortreason'] is not None: tjobinfo_task['start_time'] = cjobinfo['starttime'] tjobinfo_task['end_time'] = cjobinfo['endtime'] if 'condor_rm' in cjobinfo['abortreason']: tjobinfo_task['status'] = pfwdefs.PF_EXIT_OPDELETE else: tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR else: pass except Exception: (extype, exvalue, trback) = sys.exc_info() traceback.print_exception(extype, exvalue, trback, file=sys.stdout) else: print("Warning: no job condor log file") if dbh: # update job task if 'status' not in tjobinfo_task: tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR if 'end_time' not in tjobinfo_task: tjobinfo_task['end_time'] = datetime.now() wherevals = {'id': config['task_id']['job'][jobnum]} dbh.basic_update_row('task', tjobinfo_task, wherevals) dbh.commit() log_pfw_event(config, blockname, jobnum, 'j', ['posttask', retval]) # input wcl should already exist in untar form if os.path.exists(inputtar): print("found inputtar: %s" % inputtar) os.unlink(inputtar) else: print("Could not find inputtar: %s" % inputtar) # untar output wcl tar and delete tar if os.path.exists(outputtar): print("Size of output wcl tar:", os.path.getsize(outputtar)) if os.path.getsize(outputtar) > 0: print("found outputtar: %s" % outputtar) pfwutils.untar_dir(outputtar, '..') os.unlink(outputtar) else: msg = "Warn: outputwcl tarball (%s) is 0 bytes." % outputtar print(msg) if dbh: Messaging.pfw_message(dbh, config['pfw_attempt_id'], config['task_id']['job'][jobnum], msg, pfwdefs.PFWDB_MSG_WARN) else: msg = "Warn: outputwcl tarball (%s) does not exist." % outputtar print(msg) if dbh: Messaging.pfw_message(dbh, config['pfw_attempt_id'], config['task_id']['job'][jobnum], msg, pfwdefs.PFWDB_MSG_WARN) if retval != pfwdefs.PF_EXIT_SUCCESS: miscutils.fwdebug_print("Setting failure retval") retval = pfwdefs.PF_EXIT_FAILURE miscutils.fwdebug_print("Returning retval = %s" % retval) miscutils.fwdebug_print("jobpost done") debugfh.close() return int(retval)
def __init__(self, args): """ Initialize configuration object, typically reading from wclfile """ WCL.__init__(self) # data which needs to be kept across programs must go in self # data which needs to be searched also must go in self self.set_search_order(PFW_SEARCH_ORDER) wclobj = WCL() if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading wclfile: %s" % (args['wclfile'])) try: starttime = time.time() print("\tReading submit wcl...", end=' ') with open(args['wclfile'], "r") as wclfh: wclobj.read(wclfh, filename=args['wclfile']) print("DONE (%0.2f secs)" % (time.time() - starttime)) #wclobj['wclfile'] = args['wclfile'] except IOError as err: miscutils.fwdie( "Error: Problem reading wcl file '%s' : %s" % (args['wclfile'], err), pfwdefs.PF_EXIT_FAILURE) # location of des services file if 'submit_des_services' in args and args[ 'submit_des_services'] is not None: wclobj['submit_des_services'] = args['submit_des_services'] elif 'submit_des_services' not in wclobj: if 'DES_SERVICES' in os.environ: wclobj['submit_des_services'] = os.environ['DES_SERVICES'] else: # let it default to $HOME/.desservices.init wclobj['submit_des_services'] = None # which section to use in des services file if 'submit_des_db_section' in args and args[ 'submit_des_db_section'] is not None: wclobj['submit_des_db_section'] = args['submit_des_db_section'] elif 'submit_des_db_section' not in wclobj: if 'DES_DB_SECTION' in os.environ: wclobj['submit_des_db_section'] = os.environ['DES_DB_SECTION'] else: # let DB connection code print error message wclobj['submit_des_db_section'] = None # for values passed in on command line, set top-level config for var in (pfwdefs.PF_DRYRUN, pfwdefs.PF_USE_DB_IN, pfwdefs.PF_USE_DB_OUT, pfwdefs.PF_USE_QCF, pfwdefs.PF_VERIFY_FILES): if var in args and args[var] is not None: wclobj[var] = args[var] if 'usePFWconfig' in args: pfwconfig = os.environ['PROCESSINGFW_DIR'] + '/etc/pfwconfig.des' if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading pfwconfig: %s" % (pfwconfig)) starttime = time.time() print("\tReading config from software install...", end=' ') pfwcfg_wcl = WCL() with open(pfwconfig, "r") as wclfh: pfwcfg_wcl.read(wclfh, filename=pfwconfig) self.update(pfwcfg_wcl) print("DONE (%0.2f secs)" % (time.time() - starttime)) use_db_in = None if pfwdefs.PF_USE_DB_IN in wclobj: use_db_in = miscutils.convertBool(wclobj[pfwdefs.PF_USE_DB_IN]) elif pfwdefs.PF_USE_DB_IN in self: use_db_in = miscutils.convertBool(self[pfwdefs.PF_USE_DB_IN]) if (use_db_in and 'get_db_config' in args and args['get_db_config']): print("\tGetting defaults from DB...", end=' ') sys.stdout.flush() starttime = time.time() dbh = pfwdb.PFWDB(wclobj['submit_des_services'], wclobj['submit_des_db_section']) print("DONE (%0.2f secs)" % (time.time() - starttime)) self.update(dbh.get_database_defaults()) # wclfile overrides all, so must be added last if 'wclfile' in args: if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Reading wclfile: %s" % (args['wclfile'])) self.update(wclobj) self.set_names() # store the file name of the top-level submitwcl in dict: if 'submitwcl' not in self and 'wclfile' in args: self['submitwcl'] = args['wclfile'] if 'processingfw_dir' not in self and \ 'PROCESSINGFW_DIR' in os.environ: self['processingfw_dir'] = os.environ['PROCESSINGFW_DIR'] if 'current' not in self: self['current'] = OrderedDict({ 'curr_block': '', 'curr_archive': '', #'curr_software': '', 'curr_site': '' }) self[pfwdefs.PF_WRAPNUM] = '0' self[pfwdefs.PF_BLKNUM] = '1' self[pfwdefs.PF_TASKNUM] = '0' self[pfwdefs.PF_JOBNUM] = '0'
def set_block_info(self): """ Set current vals to match current block number """ if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("BEG") curdict = self['current'] if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print(f"\tcurdict = {curdict}") # current block number blknum = self[pfwdefs.PF_BLKNUM] # update current block name for accessing block information blockname = self.get_block_name(blknum) if not blockname: miscutils.fwdie( f"Error: Cannot determine block name value for blknum={blknum}", pfwdefs.PF_EXIT_FAILURE) curdict['curr_block'] = blockname self['block_dir'] = f'../B{int(blknum):02d}-{blockname}' # update current target site name (exists, site) = self.search('target_site') if not exists: miscutils.fwdie("Error: Cannot determine target site.", pfwdefs.PF_EXIT_FAILURE) site = site.lower() if site not in self[pfwdefs.SW_SITESECT]: print(f"Error: invalid site value ({site})") print("\tsite defs contain entries for sites: ", list(self[pfwdefs.SW_SITESECT].keys())) miscutils.fwdie(f"Error: Invalid site value ({site})", pfwdefs.PF_EXIT_FAILURE) curdict['curr_site'] = site self['runsite'] = site # update current target archive name if using archive if ((pfwdefs.USE_TARGET_ARCHIVE_INPUT in self and miscutils.convertBool(self[pfwdefs.USE_TARGET_ARCHIVE_INPUT])) or (pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in self and miscutils.convertBool(self[pfwdefs.USE_TARGET_ARCHIVE_OUTPUT]))): (exists, archive) = self.search(pfwdefs.TARGET_ARCHIVE) if not exists: miscutils.fwdie("Error: Cannot determine target_archive value. \n" \ f"\tEither set target_archive or set to FALSE both {pfwdefs.USE_TARGET_ARCHIVE_INPUT} and {pfwdefs.USE_TARGET_ARCHIVE_OUTPUT}", pfwdefs.PF_EXIT_FAILURE) archive = archive.lower() if archive not in self[pfwdefs.SW_ARCHIVESECT]: print(f"Error: invalid target_archive value ({archive})") print("\tarchive contains: ", self[pfwdefs.SW_ARCHIVESECT]) miscutils.fwdie( f"Error: Invalid target_archive value ({archive})", pfwdefs.PF_EXIT_FAILURE) curdict['curr_archive'] = archive if 'list_target_archives' in self: if not archive in self['list_target_archives']: # assumes target archive names are not substrings of one another self['list_target_archives'] += ',' + archive else: self['list_target_archives'] = archive elif ((pfwdefs.USE_HOME_ARCHIVE_INPUT in self and self[pfwdefs.USE_HOME_ARCHIVE_INPUT] != 'never') or (pfwdefs.USE_HOME_ARCHIVE_OUTPUT in self and self[pfwdefs.USE_HOME_ARCHIVE_OUTPUT] != 'never')): (exists, archive) = self.search(pfwdefs.HOME_ARCHIVE) if not exists: miscutils.fwdie("Error: Cannot determine home_archive value.\n" \ f"\tEither set home_archive or set correctly both {pfwdefs.USE_HOME_ARCHIVE_INPUT} and {pfwdefs.USE_HOME_ARCHIVE_OUTPUT}", pfwdefs.PF_EXIT_FAILURE) archive = archive.lower() if archive not in self[pfwdefs.SW_ARCHIVESECT]: print(f"Error: invalid home_archive value ({archive})") print("\tarchive contains: ", self[pfwdefs.SW_ARCHIVESECT]) miscutils.fwdie( f"Error: Invalid home_archive value ({archive})", pfwdefs.PF_EXIT_FAILURE) # dynamically choose a transfer node if a list is given if 'transfer_server' in self[pfwdefs.SW_ARCHIVESECT][archive]: if self.use_db_in: if self.dbh is None: self.dbh = pfwdb.PFWDB(self['submit_des_services'], self['submit_des_db_section']) servers = self[pfwdefs.SW_ARCHIVESECT][archive][ 'transfer_server'].replace(' ', '').split(',') server = servers[random.randint(0, len(servers) - 1)] self[pfwdefs.SW_ARCHIVESECT][archive].update( self.dbh.get_transfer_data(server, archive)) else: miscutils.fwdie( f"Error: transfer_servers was specified, but {pfwdefs.PF_USE_DB_IN} was set to False. Must be able to use database to use transfer_servers option.", pfwdefs.PF_EXIT_FAILURE) curdict['curr_archive'] = archive else: # make sure to reset curr_archive from possible prev block value curdict['curr_archive'] = None if 'submit_des_services' in self: self['des_services'] = self['submit_des_services'] if 'submit_des_db_section' in self: self['des_db_section'] = self['submit_des_db_section'] if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("END")
def jobpre(argv=None): """Program entry point. """ if argv is None: argv = sys.argv debugfh = tempfile.NamedTemporaryFile(mode='w+', prefix='jobpre_', dir='.', delete=False) tmpfn = debugfh.name sys.stdout = debugfh sys.stderr = debugfh print(' '.join(sys.argv)) # command line for debugging print(os.getcwd()) if len(argv) < 3: print('Usage: jobpre configfile jobnum') debugfh.close() return pfwdefs.PF_EXIT_FAILURE configfile = sys.argv[1] jobnum = sys.argv[2] # could also be uberctrl # read wcl file config = pfwconfig.PfwConfig({'wclfile': configfile}) blockname = config.getfull('blockname') blkdir = config.get('block_dir') tjpad = pfwutils.pad_jobnum(jobnum) # now that have more information, can rename output file miscutils.fwdebug_print("getting new_log_name") new_log_name = config.get_filename( 'job', { pfwdefs.PF_CURRVALS: { pfwdefs.PF_JOBNUM: jobnum, 'flabel': 'jobpre', 'fsuffix': 'out' } }) new_log_name = "%s/%s/%s" % (blkdir, tjpad, new_log_name) miscutils.fwdebug_print("new_log_name = %s" % new_log_name) debugfh.close() os.chmod(tmpfn, 0o666) os.rename(tmpfn, new_log_name) debugfh = open(new_log_name, 'a+') sys.stdout = debugfh sys.stderr = debugfh if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) ctstr = dbh.get_current_timestamp_str() dbh.update_job_info(config, tjpad, { 'condor_submit_time': ctstr, 'target_submit_time': ctstr }) log_pfw_event(config, blockname, tjpad, 'j', ['pretask']) miscutils.fwdebug_print("jobpre done") debugfh.close() return pfwdefs.PF_EXIT_SUCCESS
def main(argv): """ Program entry point """ parser = argparse.ArgumentParser(description='genquery.py') parser.add_argument('--qoutfile', action='store') parser.add_argument('--qouttype', action='store') parser.add_argument('--config', action='store', dest='configfile') parser.add_argument('--module', action='store', dest='modulename') parser.add_argument('--search', action='store', dest='searchname') args = parser.parse_args(argv) if args.modulename is None: raise Exception("Error: Must specify module\n") print(args.configfile) config = pfwconfig.PfwConfig({'wclfile':args.configfile}) if args.modulename not in config[pfwdefs.SW_MODULESECT]: raise Exception(f"Error: module '{args.modulename}' does not exist.\n") module_dict = config[pfwdefs.SW_MODULESECT][args.modulename] if args.searchname is not None: if pfwdefs.SW_LISTSECT in module_dict and \ args.searchname in module_dict[pfwdefs.SW_LISTSECT]: search_dict = module_dict[pfwdefs.SW_LISTSECT][args.searchname] elif pfwdefs.SW_FILESECT in module_dict and \ args.searchname in module_dict[pfwdefs.SW_FILESECT]: search_dict = module_dict[pfwdefs.SW_FILESECT][args.searchname] else: raise Exception(f"Error: Could not find either list or file by name {args.searchname} in module {args.modulename}\n") else: raise Exception("Error: need to define either list or file or search\n") archive_names = [] if config.getfull(pfwdefs.USE_HOME_ARCHIVE_INPUT) != 'never': archive_names.append(config.getfull(pfwdefs.HOME_ARCHIVE)) if config.getfull(pfwdefs.USE_TARGET_ARCHIVE_INPUT) != 'never': archive_names.append(config.getfull(pfwdefs.TARGET_ARCHIVE)) fields = miscutils.fwsplit(search_dict[pfwdefs.SW_QUERYFIELDS].lower()) if ('query_run' in config and 'fileclass' in search_dict and 'fileclass' in config and search_dict['fileclass'] == config['fileclass']): query_run = config['query_run'].lower() if query_run == 'current': fields.append('run') elif query_run == 'allbutfirstcurrent': if 'current' not in config: raise Exception("Internal Error: Current object doesn't exist\n") if 'curr_blocknum' not in config['current']: raise Exception("Internal Error: current->curr_blocknum doesn't exist\n") block_num = config['current']['curr_blocknum'] if block_num > 0: fields.append('run') query = {} qtable = search_dict['query_table'] for fld in fields: table = qtable if '.' in fld: table, fld = fld.split('.') if fld in search_dict: value = search_dict[fld] elif fld in module_dict: value = module_dict[fld] elif fld in config: value = config.getfull(fld) else: raise Exception(f"Error: genquery could not find value for query field {fld}\n") value = replfuncs.replace_vars(value, config, {pfwdefs.PF_CURRVALS: {'modulename': args.modulename}, 'searchobj': search_dict, intgdefs.REPLACE_VARS: True, 'expand': True})[0] if value is None: raise Exception(f"Value=None for query field {fld}\n") if ',' in value: value = miscutils.fwsplit(value) if ':' in value: value = miscutils.fwsplit(value) if table not in query: query[table] = {} if 'key_vals' not in query[table]: query[table]['key_vals'] = {} query[table]['key_vals'][fld] = value # if specified, insert join into query hash if 'join' in search_dict: joins = miscutils.fwsplit(search_dict['join'].lower()) for j in joins: jmatch = re.search(r"(\S+)\.(\S+)\s*=\s*(\S+)", j) if jmatch: table = jmatch.group(1) if table not in query: query[table] = {} if 'join' not in query[table]: query[table]['join'] = j else: query[jmatch.group(1)]['join'] += "," + j #query[table]['join']=search_dict['join'] query[qtable]['select_fields'] = ['filename'] # check output fields for fields from other tables. if 'output_fields' in search_dict: output_fields = miscutils.fwsplit(search_dict['output_fields'].lower()) for ofield in output_fields: ofmatch = re.search(r"(\S+)\.(\S+)", ofield) if ofmatch: table = ofmatch.group(1) field = ofmatch.group(2) else: table = qtable field = ofield if table not in query: query[table] = {} if 'select_fields' not in query[table]: query[table]['select_fields'] = [] if field not in query[table]['select_fields']: query[table]['select_fields'].append(field) for tbl in query: if 'select_fields' in query[tbl]: query[tbl]['select_fields'] = ','.join(query[tbl]['select_fields']) if archive_names: #query[qtable]['join'] = "%s.filename=file_archive_info.filename" % qtable query['file_archive_info'] = {'select_fields': 'compression'} query['file_archive_info']['join'] = f"file_archive_info.filename={qtable}.filename" query['file_archive_info']['key_vals'] = {'archive_name': ','.join(archive_names)} print("Calling gen_file_list with the following query:\n") miscutils.pretty_print_dict(query, out_file=None, sortit=False, indent=4) print("\n\n") dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) files = queryutils.gen_file_list(dbh, query) if not files: raise Exception(f"genquery: query returned zero results for {args.searchname}\nAborting\n") ## output list lines = queryutils.convert_single_files_to_lines(files) queryutils.output_lines(args.qoutfile, lines, args.qouttype) return 0
def blockpost(argv=None): """ Program entry point """ realstdout = sys.stdout realstderr = sys.stderr if argv is None: argv = sys.argv # open file to catch error messages about command line debugfh = open('blockpost.out', 'w') sys.stdout = debugfh sys.stderr = debugfh print(' '.join(argv)) # print command line for debugging print(f"running on {socket.gethostname()}") if len(argv) != 3: print('Usage: blockpost.py configfile retval') debugfh.close() return pfwdefs.PF_EXIT_FAILURE configfile = argv[1] retval = int(argv[2]) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print(f"configfile = {configfile}") miscutils.fwdebug_print(f"retval = {retval}") # read sysinfo file config = pfwconfig.PfwConfig({'wclfile': configfile}) if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print("done reading config file") blockname = config.getfull('blockname') blkdir = config.getfull('block_dir') # now that have more information, can rename output file miscutils.fwdebug_print("getting new_log_name") new_log_name = config.get_filename('block', {pfwdefs.PF_CURRVALS: {'flabel': 'blockpost', 'fsuffix':'out'}}) new_log_name = f"{blkdir}/{new_log_name}" miscutils.fwdebug_print(f"new_log_name = {new_log_name}") debugfh.close() os.chmod('blockpost.out', 0o666) os.rename('blockpost.out', new_log_name) debugfh = open(new_log_name, 'a+') sys.stdout = debugfh sys.stderr = debugfh os.chdir(blkdir) log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval]) dryrun = config.getfull(pfwdefs.PF_DRYRUN) run = config.getfull('run') attid = config['pfw_attempt_id'] blknum = int(config.getfull(pfwdefs.PF_BLKNUM)) blktid = None msg2 = "" dbh = None qdbh = None job_byblk = {} wrap_byjob = {} wrapinfo = {} jobinfo = {} failedwraps = {} whyfailwraps = {} # mod failures for other modname, shouldn't happen usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)) verify_files = miscutils.convertBool(config.getfull('verify_files')) verify_status = 0 sem = None if verify_files and not usedb: print('Skipping file verification due to lack of database connection') if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)): try: miscutils.fwdebug_print("Connecting to DB") if config.dbh is None: dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) else: dbh = config.dbh if verify_files: curs = dbh.cursor() curs.execute(f"select root from ops_archive where name='{config.getfull('home_archive')}'") rows = curs.fetchall() if rows is None or len(rows) != 1: raise Exception(f"Invalid archive name ({config.getfull('home_archive')}). Found {len(rows)} rows in ops_archive") root = rows[0][0] if not os.path.isdir(root): print(f"Cannot read archive root directory:{config.getfull('home_archive')} This program must be run on an NCSA machine with access to the archive storage system.") sem = dbsem.DBSemaphore('verify_files_10', None, config.getfull('submit_des_services'), config.getfull('submit_des_db_section'), connection=dbh) print("\n\nVerifying archive file sizes on disk (0 is success)") verify_status = cu.compare(dbh=dbh, archive=config.getfull('home_archive'), pfwid=attid, md5sum=False, debug=False, script=False, verbose=False, silent=True) if sem is not None: del sem sem = None print(f" Verification of files returned status {verify_status:d}") if verify_status != 0: print(" This indicates that one or more files do not have the correct file size (based on DB entries). Run") print(f"\n compare_db.py --des_services {config.getfull('submit_des_services')} --section {config.getfull('submit_des_db_section')} --archive {config.getfull('home_archive')} --pfwid {int(attid):d} --verbose") print("\n to see the details.") if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)): import qcframework.qcfdb as qcfdb #qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'), # config.getfull('submit_des_db_section')) qdbh = qcfdb.QCFDB(connection=dbh) print(f"\n\nChecking non-job block task status from task table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)") num_bltasks_failed = 0 bltasks = {} blktid = None if ('block' in config['task_id'] and str(blknum) in config['task_id']['block']): blktid = int(config['task_id']['block'][str(blknum)]) miscutils.fwdebug_print("Getting block task info from DB") start_time = time.time() bltasks = dbh.get_block_task_info(blktid) end_time = time.time() miscutils.fwdebug_print(f"Done getting block task info from DB ({end_time - start_time} secs)") for bltdict in bltasks.values(): print("Block status = ", bltdict['status']) if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN: print("setting return value to dryrun") retval = bltdict['status'] elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS: num_bltasks_failed += 1 msg2 += f"\t{bltdict['name']}" if bltdict['label'] is not None: msg2 += f" - {bltdict['label']}" msg2 += " failed\n" if bltdict['name'] == 'begblock': # try to read the begblock.out and begblock.err files print("Trying to get begblock.out and begblock.err") msg2 += get_subblock_output("begblock") # try to get QCF messages (especially from query codes) begblock_tid = int(config['task_id']['begblock']) sql = f"select id from task where parent_task_id={begblock_tid:d} and status!=0" curs = dbh.cursor() curs.execute(sql) res = curs.fetchall() msg2 += "\n===== QCF Messages =====\n" msg2 += "\n begblock\n" wrapids = [blktid, begblock_tid] for r in res: wrapids.append(r[0]) wrapmsg = {} if qdbh is not None: miscutils.fwdebug_print("Querying QCF messages") start_time = time.time() wrapmsg = qdbh.get_qcf_messages_for_wrappers(wrapids) end_time = time.time() miscutils.fwdebug_print(f"Done querying QCF messages ({end_time-start_time} secs)") miscutils.fwdebug_print(f"wrapmsg = {wrapmsg}") if not wrapmsg: msg2 += " No QCF messages\n" else: for msgs in wrapmsg.values(): for m in msgs: msg2 += " " + m['message'] + "\n" retval = pfwdefs.PF_EXIT_FAILURE if retval != pfwdefs.PF_EXIT_DRYRUN: print(f"\n\nChecking job status from pfw_job table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)") miscutils.fwdebug_print("Getting job info from DB") start_time = time.time() jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid}) end_time = time.time() miscutils.fwdebug_print(f"Done getting job info from DB ({end_time - start_time} secs)") miscutils.fwdebug_print("Getting wrapper info from DB") start_time = time.time() wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid, pfw_block_task_id=blktid) if retval != pfwdefs.PF_EXIT_SUCCESS: jobwrap = dbh.get_jobwrapper_info(id=attid) else: jobwrap = {} end_time = time.time() miscutils.fwdebug_print(f"Done getting wrapper info from DB ({end_time - start_time} secs)") else: msg = f"Could not find task id for block {blockname} in config.des" print("Error:", msg) if 'attempt' in config['task_id']: miscutils.fwdebug_print("Saving pfw message") start_time = time.time() Messaging.pfw_message(dbh, attid, config['task_id']['attempt'], msg, pfwdefs.PFWDB_MSG_INFO, 'blockpost.out', 0) end_time = time.time() miscutils.fwdebug_print(f"Done saving pfw message ({end_time - start_time} secs)") print("all the task ids:", config['task_id']) archive = None if pfwdefs.HOME_ARCHIVE in config: archive = config.getfull(pfwdefs.HOME_ARCHIVE) logfullnames = dbh.get_log_fullnames(attid, archive) #dbh.close() print("len(jobinfo) = ", len(jobinfo)) print("len(wrapinfo) = ", len(wrapinfo)) job_byblk = pfwutils.index_job_info(jobinfo) print("blktid: ", blktid) print("job_byblk:", job_byblk) if blktid not in job_byblk: print(f"Warn: could not find jobs for block {blknum}") print(" This is ok if attempt died before jobs ran") print(" block task_ids in job_byblk:", list(job_byblk.keys())) else: wrap_byjob, _ = pfwutils.index_wrapper_info(wrapinfo) #for wid,jwr in jobwrap.iteritems(): #print wid,jwr # in case the post wrapper stuff failed, internally mark the task # as failed to retrieve the info later for wrapb in wrap_byjob.values(): for wrapper in wrapb.values(): if wrapper['parent_task_id'] in jobwrap and jobwrap[wrapper['parent_task_id']]['status'] is not None \ and wrapper['status'] is not None and jobwrap[wrapper['parent_task_id']]['status'] > wrapper['status']: wrapper['status'] = jobwrap[wrapper['parent_task_id']]['status'] #print "wrap_bymod:", wrap_bymod jobtid = '' jobdict = {} for jobtid, jobdict in sorted(job_byblk[blktid].items()): failedwraps[jobtid] = [] whyfailwraps[jobtid] = [] jobkeys = "" # don't print out successful wrappers if jobtid in wrap_byjob and jobdict['status'] == pfwdefs.PF_EXIT_SUCCESS: continue if jobdict['jobkeys'] is not None: jobkeys = jobdict['jobkeys'] #print "jobkeys = ", jobkeys, type(jobkeys) submit_job_path = f"{config.getfull('work_dir')}/B{int(config.getfull('blknum')):02d}-{config.getfull('blockname'):s}/{int(jobdict['jobnum']):04d}" msg2 += f"\n\t{pfwutils.pad_jobnum(jobdict['jobnum'])} ({jobkeys}) " if jobtid not in wrap_byjob: msg2 += "\tNo wrapper instances" else: #print "wrapnum in job =", wrap_byjob[jobtid].keys() maxwrap = max(wrap_byjob[jobtid]) #print "maxwrap =", maxwrap modname = wrap_byjob[jobtid][maxwrap]['modname'] #print "modname =", modname msg2 += f"{len(wrap_byjob[jobtid]):d}/{jobdict['expect_num_wrap']} {modname}" # determine wrappers for this job without success exit for wrapnum, wdict in wrap_byjob[jobtid].items(): if wdict['status'] is None or wdict['status'] != pfwdefs.PF_EXIT_SUCCESS: if wdict['modname'] == modname: failedwraps[jobtid].append(wrapnum) else: whyfailwraps[jobtid].append(wrapnum) if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE: msg2 += " - FAIL - EUPS setup failure" retval = jobdict['status'] elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR: msg2 += " - FAIL - Condor/Globus failure" retval = jobdict['status'] elif jobdict['status'] is None: msg2 += " - FAIL - NULL status" retval = pfwdefs.PF_EXIT_FAILURE elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS: msg2 += " - FAIL - Non-zero status" retval = jobdict['status'] if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS: msg2 += f"\n\t\t{submit_job_path}/runjob.out " msg2 += '\n' # print pfw_messages if 'message' in jobdict: print('\nmessages: ', jobdict['message']) for msgdict in sorted(jobdict['message'], key=lambda k: k['message_time']): level = int(msgdict['message_lvl']) levelstr = 'info' if level == pfwdefs.PFWDB_MSG_WARN: levelstr = 'WARN' elif level == pfwdefs.PFWDB_MSG_ERROR: levelstr = 'ERROR' msg2 += "\t\t{} - {}\n".format(levelstr, msgdict['message'].replace('\n', '\n\t\t\t')) if jobtid in wrap_byjob: # print log file name for failed/unfinished wrappers for wrapnum in failedwraps[jobtid]: wrapdict = wrap_byjob[jobtid][wrapnum] if wrapdict['log'] in logfullnames: msg2 += f"\t\t{wrapnum} - {logfullnames[wrapdict['log']]}\n" else: msg2 += f"\t\t{wrapnum} - Could not find log in archive {wrapdict['log']})\n" wrapmsg = get_qcf_messages(qdbh, [wrapdict['task_id']]) msg2 += print_qcf_messages(wrapdict, wrapmsg) msg2 += '\n' # If weirdness happened in run, print a message if whyfailwraps[jobtid]: msg2 += "\n*** Contact framework developers. Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n" msg2 += f"\t{','.join(whyfailwraps[jobtid])}\n" except Exception as exc: if sem is not None: del sem msg2 += "\n\nEncountered error trying to gather status information for email." msg2 += "\nCheck output for blockpost for further details." print("\n\nEncountered error trying to gather status information for email") print(f"{exc.__class__.__name__}: {str(exc)}") (extype, exvalue, trback) = sys.exc_info() traceback.print_exception(extype, exvalue, trback, file=sys.stdout) retval = pfwdefs.PF_EXIT_FAILURE retval = int(retval) + verify_status print("before email retval =", retval) when_to_email = 'run' if 'when_to_email' in config: when_to_email = config.getfull('when_to_email').lower() if miscutils.convertBool(dryrun): if when_to_email != 'never': print("dryrun = ", dryrun) print("Sending dryrun email") if retval == pfwdefs.PF_EXIT_DRYRUN: msg1 = f"{run}: In dryrun mode, block {blockname} has finished successfully." else: msg1 = f"{run}: In dryrun mode, block {blockname} has failed." send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending dryrun email") print("retval = ", retval) retval = pfwdefs.PF_EXIT_DRYRUN elif retval: if when_to_email != 'never': print("Sending block failed email\n") msg1 = f"{run}: block {blockname} has failed." send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending failed email") print("retval = ", retval) elif retval == pfwdefs.PF_EXIT_SUCCESS: if when_to_email == 'block': msg1 = f"{run}: block {blockname} has finished successfully." msg2 = "" print("Sending success email\n") send_email(config, blockname, retval, "", msg1, msg2) elif when_to_email == 'run': numblocks = len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) if int(config[pfwdefs.PF_BLKNUM]) == numblocks: msg1 = f"{run}: run has finished successfully." msg2 = "" print("Sending success email\n") send_email(config, blockname, retval, "", msg1, msg2) else: print("Not sending run email because not last block") print("retval = ", retval) else: print("Not sending success email") print("retval = ", retval) else: print("Not sending email") print("retval = ", retval) # Store values in DB and hist file #dbh = None if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): if dbh is None: dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) if blktid is not None: print("Updating end of block task", blktid) dbh.end_task(blktid, retval, True) else: print("Could not update end of block task without block task id") if retval != pfwdefs.PF_EXIT_SUCCESS: print("Updating end of attempt", config['task_id']['attempt']) dbh.end_task(config['task_id']['attempt'], retval, True) dbh.commit() #dbh.close() print("before next block retval = ", retval) if retval == pfwdefs.PF_EXIT_SUCCESS: # Get ready for next block config.inc_blknum() with open(configfile, 'w') as cfgfh: config.write(cfgfh) print("new blknum = ", config[pfwdefs.PF_BLKNUM]) print("number of blocks = ", len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))) if int(config[pfwdefs.PF_BLKNUM]) > len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) and miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) updatevals = {'PROCESSING_STATE': 'PASS'} wherevals = {'PFW_ATTEMPT_ID': attid} dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals) dbh.commit() #dbh.close() elif miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]): #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section')) updatevals = {'PROCESSING_STATE': 'FAIL'} wherevals = {'PFW_ATTEMPT_ID': attid} dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals) dbh.commit() #dbh.close() if dbh is not None: dbh.close() miscutils.fwdebug_print(f"Returning retval = {retval} ({type(retval)})") miscutils.fwdebug_print("END") debugfh.close() if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'): miscutils.fwdebug_print(f"Exiting with = {exitcode}") miscutils.fwdebug_print(f"type of exitcode = {type(exitcode)}") sys.stdout = realstdout sys.stderr = realstderr return int(retval)