Example #1
0
def main(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    if len(argv) != 3:
        miscutils.fwdie("Usage: runqueries.pl configfile condorjobid\n", pfwdefs.PF_EXIT_FAILURE)

    configfile = argv[1]
    condorid = argv[2]

    config = pfwconfig.PfwConfig({'wclfile': configfile})
    # log condor jobid
    log_pfw_event(config, config['curr_block'], 'runqueries', 'j', ['cid', condorid])

    if pfwdefs.SW_MODULELIST not in config:
        miscutils.fwdie("Error:  No modules to run.", pfwdefs.PF_EXIT_FAILURE)

    ### Get master lists and files calling external codes when needed

    modulelist = miscutils.fwsplit(config[pfwdefs.SW_MODULELIST].lower())

    modules_prev_in_list = {}
    for modname in modulelist:
        if modname not in config[pfwdefs.SW_MODULESECT]:
            miscutils.fwdie("Error: Could not find module description for module %s\n" %
                            (modname), pfwdefs.PF_EXIT_FAILURE)
        runqueries(config, configfile, modname, modules_prev_in_list)
        modules_prev_in_list[modname] = True

    return 0
Example #2
0
def main():
    """ Entry point when called as an executable """

    parser = argparse.ArgumentParser(
        description='Run check on given submit wcl')
    parser.add_argument('--verbose', action='store', default=True)
    parser.add_argument('--des_db_section', action='store')
    parser.add_argument('--des_services', action='store')
    parser.add_argument(
        '--expandwcl',
        action='store',
        default=True,
        help='set to False if running on an uberctrl/config.des')
    parser.add_argument('wclfile', action='store')

    args = vars(parser.parse_args())  # convert dict

    args['verbose'] = miscutils.convertBool(args['verbose'])
    args['usePFWconfig'] = miscutils.convertBool(args['expandwcl'])
    args['get_db_config'] = miscutils.convertBool(args['expandwcl'])

    # usePFWconfig and get_db_config set to True because dessubmit does
    #   (work only done at submit time)
    #   use_db_in=False in submit wcl overrides get_db_config
    print "Gathering wcl..."
    config = pfwconfig.PfwConfig(args)

    config[pfwdefs.ATTNUM] = '0'  # must be string as if read from wcl file
    testcnts = pfwcheck.check(config, '')

    print "\nTest Summary"
    print "\tErrors: %d" % testcnts[0]
    print "\tWarnings: %d" % testcnts[1]
    print "\tItems fixed: %d" % testcnts[2]
Example #3
0
def endrun(configfile):
    """Program entry point.
    """
    miscutils.fwdebug_print("BEG")

    config = pfwconfig.PfwConfig({'wclfile': configfile})
    os.chdir('../uberctrl')

    retval = pfwdefs.PF_EXIT_SUCCESS

    if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
       config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'run':
        if pfwdefs.ATTEMPT_ARCHIVE_PATH not in config:
            print("Error:  Cannot find %s in config" % pfwdefs.ATTEMPT_ARCHIVE_PATH)
            print("\tIt is needed for the mass copy of the run back to the " \
                  "home archive at the end of the run")
            return pfwdefs.PF_EXIT_FAILURE

        archpath = config.getfull(config[pfwdefs.ATTEMPT_ARCHIVE_PATH])
        print("archpath =", archpath)

        # call archive transfer for target archive to home archive
        # check if using target archive
        target_info = None
        if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \
           config.getfull(pfwdefs.USE_TARGET_ARCHIVE_OUTPUT).lower() != 'never':
            if pfwdefs.TARGET_ARCHIVE in config and \
                    config.getfull(pfwdefs.TARGET_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]:
                target_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.TARGET_ARCHIVE)]
            else:
                print("Error:  cannot determine info for target archive")
                return pfwdefs.PF_EXIT_FAILURE
        else:
            print("Error:  Asked to transfer outputs at end of run, but not using target archive")
            return pfwdefs.PF_EXIT_FAILURE

        home_info = None
        print(config[pfwdefs.HOME_ARCHIVE])
        if pfwdefs.HOME_ARCHIVE in config and \
                config[pfwdefs.HOME_ARCHIVE] in config[pfwdefs.SW_ARCHIVESECT]:
            home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)]

        # call transfer
        archive_transfer_utils.archive_copy_dir(target_info, home_info,
                                                config.getfull('archive_transfer'),
                                                archpath, config)

    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        miscutils.fwdebug_print("Calling update_attempt_end: retval = %s" % retval)
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        dbh.close()

    miscutils.fwdebug_print("END - exiting with code %s" % retval)
    return retval
Example #4
0
def logpre(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    default_log = 'logpre.out'
    debugfh = open(default_log, 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv))  # command line for debugging

    if len(argv) < 5:
        print('Usage: logpre configfile block subblocktype subblock')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    blockname = sys.argv[2]  # could also be uberctrl
    subblocktype = sys.argv[3]
    subblock = sys.argv[4]

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename(
        'block', {
            pfwdefs.PF_CURRVALS: {
                'subblock': subblock,
                'flabel': '${subblock}_logpre',
                'fsuffix': 'out'
            }
        })
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)
    debugfh.close()

    os.chmod(default_log, 0o666)
    os.rename(default_log, new_log_name)

    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    log_pfw_event(config, blockname, subblock, subblocktype, ['pretask'])

    print("logpre done")
    debugfh.close()
    return pfwdefs.PF_EXIT_SUCCESS
Example #5
0
def endblock(configfile):
    """Program entry point.
    """
    miscutils.fwdebug_print("BEG")

    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blkdir = config.getfull('block_dir')
    os.chdir(blkdir)

    if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
            config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'block':

        # check if using target archive
        target_info = None
        if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \
                config[pfwdefs.USE_TARGET_ARCHIVE_OUTPUT].lower() != 'never':
            print(config[pfwdefs.TARGET_ARCHIVE])
            if pfwdefs.TARGET_ARCHIVE in config and \
                    config[pfwdefs.TARGET_ARCHIVE] in config.getfull(pfwdefs.SW_ARCHIVESECT):
                target_info = config.getfull(pfwdefs.SW_ARCHIVESECT)[config.getfull(pfwdefs.TARGET_ARCHIVE)]
            else:
                print("Error:  cannot determine info for target archive")
                return pfwdefs.PF_EXIT_FAILURE
        else:
            print("Error:  Asked to transfer outputs at end of block, but not using target archive")
            return pfwdefs.PF_EXIT_FAILURE

        home_info = None
        print(config.getfull(pfwdefs.HOME_ARCHIVE))
        if pfwdefs.HOME_ARCHIVE in config and \
                config.getfull(pfwdefs.HOME_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]:
            home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)]

        # get file list of files to transfer
        if pfwdefs.PF_USE_DB_OUT in config and miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
            dbh = pfwdb.PFWDB()
            filelist = dbh.get_run_filelist(config.getfull(pfwdefs.REQNUM),
                                            config.getfull(pfwdefs.UNITNAME),
                                            config.getfull(pfwdefs.ATTNUM),
                                            config.getfull(pfwdefs.PF_BLKNUM),
                                            config.getfull(pfwdefs.TARGET_ARCHIVE))
        else:
            print("Error:  Asked to transfer outputs at end of block, but not using database.")
            print("        Currently not supported.")
            return pfwdefs.PF_EXIT_FAILURE

        # call transfer
        archive_transfer_utils.archive_copy(target_info, home_info,
                                            config.getfull('archive_transfer'), filelist, config)

    miscutils.fwdebug_print("END - exiting with code %s" % pfwdefs.PF_EXIT_SUCCESS)
    return pfwdefs.PF_EXIT_SUCCESS
Example #6
0
def begblock(argv):
    """Program entry point.
    """
    if argv == None:
        argv = sys.argv

    configfile = argv[0]
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    config.set_block_info()
    blknum = config[pfwdefs.PF_BLKNUM]

    blkdir = config.getfull('block_dir')
    os.chdir(blkdir)

    (exists, submit_des_services) = config.search('submit_des_services')
    if exists and submit_des_services is not None:
        os.environ['DES_SERVICES'] = submit_des_services
    (exists, submit_des_db_section) = config.search('submit_des_db_section')
    if exists and submit_des_db_section is not None:
        os.environ['DES_DB_SECTION'] = submit_des_db_section

    dbh = None
    blktid = -1
    if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'):
        miscutils.fwdebug_print("blknum = %s" % (config[pfwdefs.PF_BLKNUM]))
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(submit_des_services, submit_des_db_section)
        dbh.insert_block(config)
        blktid = config['task_id']['block'][str(blknum)]
        config['task_id']['begblock'] = dbh.create_task(
            name='begblock',
            info_table=None,
            parent_task_id=blktid,
            root_task_id=int(config['task_id']['attempt']),
            label=None,
            do_begin=True,
            do_commit=True)

    try:
        modulelist = miscutils.fwsplit(
            config.getfull(pfwdefs.SW_MODULELIST).lower())
        modules_prev_in_list = {}

        joblist = {}
        parlist = OrderedDict()
        masterdata = OrderedDict()
        filelist = {'infiles': {}, 'outfiles': {}}
        for num, modname in enumerate(modulelist):
            print("XXXXXXXXXXXXXXXXXXXX %s XXXXXXXXXXXXXXXXXXXX" % modname)
            if modname not in config[pfwdefs.SW_MODULESECT]:
                miscutils.fwdie(
                    "Error: Could not find module description for module %s\n"
                    % (modname), pfwdefs.PF_EXIT_FAILURE)
            moddict = config[pfwdefs.SW_MODULESECT][modname]

            runqueries(config, configfile, modname, modules_prev_in_list)
            pfwblock.read_master_lists(config, modname, masterdata,
                                       modules_prev_in_list)

            (infsect, outfsect) = pfwblock.get_datasect_types(config, modname)
            pfwblock.fix_master_lists(config, modname, masterdata, outfsect)

            if pfwdefs.PF_NOOP not in moddict or not miscutils.convertBool(
                    moddict[pfwdefs.PF_NOOP]):
                pfwblock.create_fullnames(config, modname, masterdata)
                if miscutils.fwdebug_check(
                        9, 'PFWBLOCK_DEBUG') and modname in masterdata:
                    with open('%s-masterdata.txt' % modname, 'w') as fh:
                        miscutils.pretty_print_dict(masterdata[modname], fh)

                pfwblock.add_file_metadata(config, modname)
                sublists = pfwblock.create_sublists(config, modname,
                                                    masterdata)
                if sublists is not None:
                    if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("sublists.keys() = %s" %
                                                (list(sublists.keys())))
                loopvals = pfwblock.get_wrapper_loopvals(config, modname)
                wrapinst = pfwblock.create_wrapper_inst(
                    config, modname, loopvals)
                wcnt = 1
                for winst in list(wrapinst.values()):
                    if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("winst %d - BEG" % wcnt)
                    pfwblock.assign_data_wrapper_inst(config, modname, winst,
                                                      masterdata, sublists,
                                                      infsect, outfsect)
                    pfwblock.finish_wrapper_inst(config, modname, winst,
                                                 outfsect)
                    tempfiles = pfwblock.create_module_wrapper_wcl(
                        config, modname, winst)
                    for fl in tempfiles['infiles']:
                        if fl not in list(filelist['infiles'].keys()):
                            filelist['infiles'][fl] = num

                    for fl in tempfiles['outfiles']:
                        filelist['outfiles'][fl] = num
                    #filelist['infiles'] += tempfiles['infiles']
                    #filelist['outfiles'] += tempfiles['outfiles']
                    pfwblock.divide_into_jobs(config, modname, winst, joblist,
                                              parlist)
                    if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("winst %d - %s - END" %
                                                (wcnt, etime - stime))
                    wcnt += 1
            modules_prev_in_list[modname] = True

            if miscutils.fwdebug_check(
                    9, 'PFWBLOCK_DEBUG') and modname in masterdata:
                with open('%s-masterdata.txt' % modname, 'w') as fh:
                    miscutils.pretty_print_dict(masterdata[modname], fh)

        scriptfile = pfwblock.write_runjob_script(config)

        intersect = list(
            set(filelist['infiles'].keys()) & set(filelist['outfiles'].keys()))
        finallist = []

        for fl in list(filelist['infiles'].keys()):
            if fl not in intersect:
                finallist.append(fl)
            else:
                if filelist['infiles'][fl] <= filelist['outfiles'][fl]:
                    raise Exception(
                        'Input file %s requested before it is generated.' %
                        (fl))

        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            missingfiles = dbh.check_files(config, finallist)
            if len(missingfiles) > 0:
                raise Exception(
                    "The following input files cannot be found in the archive:"
                    + ",".join(missingfiles))
        miscutils.fwdebug_print("Creating job files - BEG")
        for jobkey, jobdict in sorted(joblist.items()):
            jobdict['jobnum'] = pfwutils.pad_jobnum(config.inc_jobnum())
            jobdict['jobkeys'] = jobkey
            jobdict['numexpwrap'] = len(jobdict['tasks'])
            if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                miscutils.fwdebug_print("jobnum = %s, jobkey = %s:" %
                                        (jobkey, jobdict['jobnum']))
            jobdict['tasksfile'] = write_workflow_taskfile(
                config, jobdict['jobnum'], jobdict['tasks'])
            if (len(jobdict['inlist']) > 0 and
                    config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never'
                    and 'submit_files_mvmt' in config and
                (pfwdefs.PF_DRYRUN not in config or not miscutils.convertBool(
                    config.getfull(pfwdefs.PF_DRYRUN)))):
                # get home archive info
                home_archive = config.getfull('home_archive')
                archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive]

                # load filemgmt class
                attempt_tid = config['task_id']['attempt']
                filemgmt = pfwutils.pfw_dynam_load_class(
                    dbh, config, attempt_tid, attempt_tid, "filemgmt",
                    archive_info['filemgmt'], archive_info)
                # save file information
                filemgmt.register_file_data('list', jobdict['inlist'],
                                            config['pfw_attempt_id'],
                                            attempt_tid, False, None, None)
                pfwblock.copy_input_lists_home_archive(config, filemgmt,
                                                       archive_info,
                                                       jobdict['inlist'])
                filemgmt.commit()
            jobdict['inputwcltar'] = pfwblock.tar_inputfiles(
                config, jobdict['jobnum'],
                jobdict['inwcl'] + jobdict['inlist'])
            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
                dbh.insert_job(config, jobdict)
            pfwblock.write_jobwcl(config, jobkey, jobdict)
            if ('glidein_use_wall' in config and miscutils.convertBool(
                    config.getfull('glidein_use_wall'))
                    and 'jobwalltime' in config):
                jobdict['wall'] = config['jobwalltime']

        miscutils.fwdebug_print("Creating job files - END")

        numjobs = len(joblist)
        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            dbh.update_block_numexpjobs(config, numjobs)

        #if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
        #    miscutils.fwdebug_print("inputfiles: %s, %s" % (type(inputfiles), inputfiles))
        #    miscutils.fwdebug_print("outputfiles: %s, %s" % (type(outputfiles), outputfiles))
        #files2stage = set(inputfiles) - set(outputfiles)
        #pfwblock.stage_inputs(config, files2stage)

        #if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
        #   config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT).lower() == 'block':
        #    config['block_outputlist'] = 'potential_outputfiles.list'
        #    pfwblock.write_output_list(config, outputfiles)

        dagfile = config.get_filename('jobdag')
        pfwblock.create_jobmngr_dag(config, dagfile, scriptfile, joblist)
    except:
        retval = pfwdefs.PF_EXIT_FAILURE
        with open(configfile, 'w') as cfgfh:
            config.write(
                cfgfh)  # save config, have updated jobnum, wrapnum, etc
        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            dbh.end_task(config['task_id']['begblock'], retval, True)
            dbh.end_task(blktid, retval, True)
        raise

    # save config, have updated jobnum, wrapnum, etc
    with open(configfile, 'w') as cfgfh:
        config.write(cfgfh)

    (exists, dryrun) = config.search(pfwdefs.PF_DRYRUN)
    if exists and miscutils.convertBool(dryrun):
        retval = pfwdefs.PF_EXIT_DRYRUN
    else:
        retval = pfwdefs.PF_EXIT_SUCCESS
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh.end_task(config['task_id']['begblock'], retval, True)
    miscutils.fwdebug_print("END - exiting with code %s" % retval)

    return retval
Example #7
0
def blockpost(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    print("running on %s" % (socket.gethostname()))

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
    miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'block',
        {pfwdefs.PF_CURRVALS: {
            'flabel': 'blockpost',
            'fsuffix': 'out'
        }})
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    reqnum = config.getfull(pfwdefs.REQNUM)
    unitname = config.getfull(pfwdefs.UNITNAME)
    attnum = config.getfull(pfwdefs.ATTNUM)
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrap_bymod = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}  # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        sem = None
        try:
            miscutils.fwdebug_print("Connecting to DB")
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
            if verify_files:
                curs = dbh.cursor()
                curs.execute("select root from ops_archive where name='%s'" %
                             (config.getfull('home_archive')))
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(
                        "Invalid archive name (%s).   Found %s rows in ops_archive"
                        % (config.getfull('home_archive'), len(rows)))
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(
                        "Cannot read archive root directory:%s This program must be run on an NCSA machine with access to the archive storage system."
                        % (config.getfull('home_archive')))
                sem = dbsem.DBSemaphore(
                    'verify_files_10', None,
                    config.getfull('submit_des_services'),
                    config.getfull('submit_des_db_section'))
                print(
                    "\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(
                    dbh=dbh,
                    archive=config.getfull('home_archive'),
                    pfwid=attid,
                    filesize=True,
                    md5sum=False,
                    quick=True,
                    debug=False,
                    script=False,
                    verbose=False,
                    silent=True)
                if sem is not None:
                    del sem
                print("  Verification of files returned status %i" %
                      (verify_status))
                if verify_status != 0:
                    print(
                        "  This indicates that one or more files do not have the correct file size (based on DB entries). Run"
                    )
                    print(
                        "\n    compare_db.py --des_services %s --section %s --archive %s --pfwid %i --filesize --verbose"
                        % (config.getfull('submit_des_services'),
                           config.getfull('submit_des_db_section'),
                           config.getfull('home_archive'), int(attid)))
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                                   config.getfull('submit_des_db_section'))

            print("\n\nChecking non-job block task status from task table in DB (%s is success)" % \
                  pfwdefs.PF_EXIT_SUCCESS)
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id']
                    and str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(
                    "Done getting block task info from DB (%s secs)" %
                    (end_time - start_time))
                for bltdict in list(bltasks.values()):
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += "\t%s" % (bltdict['name'])
                        if bltdict['label'] is not None:
                            msg2 += " - %s" % (bltdict['label'])
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print(
                                "Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = "select id from task where parent_task_id=%i and status!=0" % (
                                begblock_tid)
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print(
                                    "Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(
                                    wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(
                                    "Done querying QCF messages (%s secs)" %
                                    (end_time - start_time))
                                miscutils.fwdebug_print("wrapmsg = %s" %
                                                        wrapmsg)
                            if len(wrapmsg) == 0:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in list(wrapmsg.values()):
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print("\n\nChecking job status from pfw_job table in DB (%s is success)" % \
                        pfwdefs.PF_EXIT_SUCCESS)

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting job info from DB (%s secs)" %
                        (end_time - start_time))

                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid,
                                                    pfw_block_task_id=blktid)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting wrapper info from DB (%s secs)" %
                        (end_time - start_time))
            else:
                msg = "Could not find task id for block %s in config.des" % blockname
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid,
                                          config['task_id']['attempt'], msg,
                                          pfw_utils.PFW_DB_INFO,
                                          'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done saving pfw message (%s secs)" %
                        (end_time - start_time))
                print("all the task ids:", config['task_id'])

            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_fail_log_fullnames(attid, archive)
            dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print("Warn: could not find jobs for block %s" % blknum)
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:" %
                      list(job_byblk.keys()))
            else:
                wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo)
                #print "wrap_byjob:", wrap_byjob
                #print "wrap_bymod:", wrap_bymod
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict[
                            'status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = "%s/B%02d-%s/%04d" % (
                        config.getfull('work_dir'),
                        int(config.getfull('blknum')),
                        config.getfull('blockname'), int(jobdict['jobnum']))
                    msg2 += "\n\t%s (%s) " % (pfwutils.pad_jobnum(
                        jobdict['jobnum']), jobkeys)

                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid].keys())
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += "%d/%s  %s" % (len(
                            wrap_byjob[jobtid]), jobdict['expect_num_wrap'],
                                               modname)

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in list(wrap_byjob[jobtid].items()):
                            if wdict['status'] is None or wdict[
                                    'status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                    if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += "\n\t\t%s/runjob.out " % (submit_job_path)

                    msg2 += '\n'

                    # print pfw_messages
                    if 'message' in jobdict:
                        print(jobdict['message'])
                        for msgdict in sorted(jobdict['message'],
                                              key=lambda k: k['message_time']):
                            level = int(msgdict['message_lvl'])
                            levelstr = 'info'
                            if level == pfwdefs.PFWDB_MSG_WARN:
                                levelstr = 'WARN'
                            elif level == pfwdefs.PFWDB_MSG_ERROR:
                                levelstr = 'ERROR'

                            msg2 += "\t\t%s - %s\n" % (
                                levelstr, msgdict['message'].replace(
                                    '\n', '\n\t\t\t'))

                    if jobtid in wrap_byjob:
                        # print log file name for failed/unfinished wrappers
                        for wrapnum in failedwraps[jobtid]:
                            wrapdict = wrap_byjob[jobtid][wrapnum]
                            if wrapdict['log'] in logfullnames:
                                msg2 += "\t\t%s - %s\n" % (
                                    wrapnum, logfullnames[wrapdict['log']])
                            else:
                                msg2 += "\t\t%s - Could not find log in archive (%s)\n" % (
                                    wrapnum, wrapdict['log'])
                            wrapmsg = get_qcf_messages(qdbh, config,
                                                       [wrapdict['task_id']])
                            msg2 = print_qcf_messages(config, wrapdict,
                                                      wrapmsg, msg2)

                        msg2 += '\n'

                        # If weirdness happened in run, print a message
                        if len(whyfailwraps[jobtid]) > 0:
                            msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                            msg2 += "\t%s\n" % ','.join(whyfailwraps[jobtid])

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print(
                "\n\nEncountered error trying to gather status information for email"
            )
            print("%s: %s" % (exc.__class__.__name__, str(exc)))
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = "%s:  In dryrun mode, block %s has finished successfully." % (
                    run, blockname)
            else:
                msg1 = "%s:  In dryrun mode, block %s has failed." % (
                    run, blockname)

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = "%s:  block %s has failed." % (run, blockname)
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = "%s:  block %s has finished successfully." % (run,
                                                                 blockname)
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(
                miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = "%s:  run has finished successfully." % (run)
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ",
              len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))

    miscutils.fwdebug_print("Returning retval = %s (%s)" %
                            (retval, type(retval)))
    miscutils.fwdebug_print("END")
    debugfh.close()
    return int(retval)
Example #8
0
def summary(argv=None):
    """ Create and send summary email """
    if argv is None:
        argv = sys.argv

    debugfh = open('summary.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))

    if len(argv) < 2:
        print("Usage: summary configfile status")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    if len(argv) == 3:
        status = argv[2]
        # dagman always exits with 0 or 1
        if status == 1:
            status = pfwdefs.PF_EXIT_FAILURE
    else:
        print("summary: Missing status value")
        status = None

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': argv[1]})

    log_pfw_event(config, 'process', 'mngr', 'j', ['posttask', status])

    msgstr = ""

    msg1 = ""
    subject = ""
    if not status:
        msg1 = f"Processing finished with unknown results.\n{msgstr}"
    elif pfwdefs.PF_DRYRUN in config and miscutils.convertBool(
            config.getfull(pfwdefs.PF_DRYRUN)):
        msg1 = f"Processing ended after DRYRUN\n{msgstr}"

        if int(status) == pfwdefs.PF_EXIT_SUCCESS:
            msg1 = "Processing has successfully completed.\n"
            subject = ""
        else:
            print(f"status = '{status}'")
            print("type(status) =", type(status))
            print(f"SUCCESS = '{pfwdefs.PF_EXIT_SUCCESS}'")
            print("type(SUCCESS) =", type(pfwdefs.PF_EXIT_SUCCESS))
            msg1 = f"Processing aborted with status {status}.\n"

    subject = ""
    pfwemail.send_email(config, "processing", status, subject, msg1, '')

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        dbh.update_attempt_end_vals(config['pfw_attempt_id'], status)
    print(f"summary: status = '{status}'")
    print("summary:", msg1)
    print("summary: End")
    debugfh.close()
    return status
Example #9
0
def jobpre(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    #debugfh = tempfile.NamedTemporaryFile(prefix='jobpre_', dir='.', delete=False)
    default_log = f"jobpre_{random.randint(1,10000000):08d}.out"
    debugfh = open(default_log, 'w')

    tmpfn = debugfh.name
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv)) # command line for debugging
    print(os.getcwd())

    if len(argv) < 3:
        print("Usage: jobpre configfile jobnum")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    jobnum = sys.argv[2]    # could also be uberctrl

    # read wcl file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blockname = config.getfull('blockname')
    blkdir = config.get('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM:jobnum,
                                                                     'flabel': 'jobpre',
                                                                     'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{tjpad}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        if config.dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
        else:
            dbh = config.dbh

    if 'use_qcf' in config and config['use_qcf']:
        debugfh = Messaging.Messaging(new_log_name, 'jobpre.py', config['pfw_attempt_id'], dbh=dbh, mode='a+', usedb=dbh is not None)
    else:
        debugfh = open(new_log_name, 'a+')

    sys.stdout = debugfh
    sys.stderr = debugfh

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        ctstr = dbh.get_current_timestamp_str()
        dbh.update_job_info(config, tjpad, {'condor_submit_time': ctstr,
                                            'target_submit_time': ctstr})

    log_pfw_event(config, blockname, tjpad, 'j', ['pretask'])

    miscutils.fwdebug_print("jobpre done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    return pfwdefs.PF_EXIT_SUCCESS
Example #10
0
def jobpost(argv=None):
    """Performs steps needed after a pipeline job.
    """
    condor2db = {'jobid': 'condor_job_id',
                 'csubmittime': 'condor_submit_time',
                 'gsubmittime': 'target_submit_time',
                 'starttime': 'condor_start_time',
                 'endtime': 'condor_end_time'}

    if argv is None:
        argv = sys.argv

    debugfh = tempfile.NamedTemporaryFile(mode='w+', prefix='jobpost_', dir='.', delete=False)
    tmpfn = debugfh.name
    sys.stdout = debugfh
    sys.stderr = debugfh

    miscutils.fwdebug_print("temp log name = %s" % tmpfn)
    print('cmd>', ' '.join(argv))  # print command line for debugging

    if len(argv) < 7:
        # open file to catch error messages about command line
        print('Usage: jobpost.py configfile block jobnum inputtar outputtar retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    jobnum = argv[3]
    inputtar = argv[4]
    outputtar = argv[5]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 7:
        retval = int(sys.argv[6])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
        miscutils.fwdebug_print("block = %s" % blockname)
        miscutils.fwdebug_print("jobnum = %s" % jobnum)
        miscutils.fwdebug_print("inputtar = %s" % inputtar)
        miscutils.fwdebug_print("outputtar = %s" % outputtar)
        miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    os.chdir("%s/%s" % (blkdir, tjpad))
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM: jobnum,
                                                                     'flabel': 'jobpost',
                                                                     'fsuffix': 'out'}})
    new_log_name = "%s" % (new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))

        # get job information from the job stdout if exists
        (tjobinfo, tjobinfo_task) = parse_job_output(config, jobnum, dbh, retval)

        if dbh and len(tjobinfo) > 0:
            print("tjobinfo: ", tjobinfo)
            dbh.update_tjob_info(config['task_id']['job'][jobnum], tjobinfo)

        # get job information from the condor job log
        logfilename = 'runjob.log'
        if os.path.exists(logfilename) and os.path.getsize(logfilename) > 0:  # if made it to submitting/running jobs
            try:
                # update job info in DB from condor log
                print("Updating job info in DB from condor log")
                condorjobinfo = pfwcondor.parse_condor_user_log(logfilename)
                if len(list(condorjobinfo.keys())) > 1:
                    print("More than single job in job log")
                j = list(condorjobinfo.keys())[0]
                cjobinfo = condorjobinfo[j]
                djobinfo = {}
                for ckey, dkey in list(condor2db.items()):
                    if ckey in cjobinfo:
                        djobinfo[dkey] = cjobinfo[ckey]
                print(djobinfo)
                dbh.update_job_info(config, cjobinfo['jobname'], djobinfo)

                if 'holdreason' in cjobinfo and cjobinfo['holdreason'] is not None:
                    msg = "Condor HoldReason: %s" % cjobinfo['holdreason']
                    print(msg)
                    if dbh:
                        Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                              config['task_id']['job'][jobnum],
                                              msg, pfwdefs.PFWDB_MSG_WARN)

                if 'abortreason' in cjobinfo and cjobinfo['abortreason'] is not None:
                    tjobinfo_task['start_time'] = cjobinfo['starttime']
                    tjobinfo_task['end_time'] = cjobinfo['endtime']
                    if 'condor_rm' in cjobinfo['abortreason']:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_OPDELETE
                    else:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
                else:
                    pass
            except Exception:
                (extype, exvalue, trback) = sys.exc_info()
                traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
        else:
            print("Warning:  no job condor log file")

        if dbh:
            # update job task
            if 'status' not in tjobinfo_task:
                tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
            if 'end_time' not in tjobinfo_task:
                tjobinfo_task['end_time'] = datetime.now()
            wherevals = {'id': config['task_id']['job'][jobnum]}
            dbh.basic_update_row('task', tjobinfo_task, wherevals)
            dbh.commit()

    log_pfw_event(config, blockname, jobnum, 'j', ['posttask', retval])

    # input wcl should already exist in untar form
    if os.path.exists(inputtar):
        print("found inputtar: %s" % inputtar)
        os.unlink(inputtar)
    else:
        print("Could not find inputtar: %s" % inputtar)

    # untar output wcl tar and delete tar
    if os.path.exists(outputtar):
        print("Size of output wcl tar:", os.path.getsize(outputtar))
        if os.path.getsize(outputtar) > 0:
            print("found outputtar: %s" % outputtar)
            pfwutils.untar_dir(outputtar, '..')
            os.unlink(outputtar)
        else:
            msg = "Warn: outputwcl tarball (%s) is 0 bytes." % outputtar
            print(msg)
            if dbh:
                Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                      config['task_id']['job'][jobnum],
                                      msg, pfwdefs.PFWDB_MSG_WARN)
    else:
        msg = "Warn: outputwcl tarball (%s) does not exist." % outputtar
        print(msg)
        if dbh:
            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                  config['task_id']['job'][jobnum],
                                  msg, pfwdefs.PFWDB_MSG_WARN)

    if retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print("Returning retval = %s" % retval)
    miscutils.fwdebug_print("jobpost done")
    debugfh.close()
    return int(retval)
Example #11
0
def blockpre(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    default_log = 'blockpre.out'

    debugfh = open(default_log, 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv))  # command line for debugging

    if len(argv) < 2 or len(argv) > 3:
        print('Usage: blockpre configfile')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})

    # make sure values which depend upon block are set correctly
    config.set_block_info()
    miscutils.fwdebug_print("blknum = %s" % config[pfwdefs.PF_BLKNUM])

    with open(configfile, 'w') as cfgfh:
        config.write(cfgfh)

    blockname = config.getfull('blockname')
    miscutils.fwdebug_print("blockname = %s" % blockname)

    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'block',
        {pfwdefs.PF_CURRVALS: {
            'flabel': 'blockpre',
            'fsuffix': 'out'
        }})
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(default_log, 0o666)
    os.rename(default_log, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    write_block_condor(config)

    log_pfw_event(config, blockname, 'blockpre', 'j', ['pretask'])

    miscutils.fwdebug_print("blockpre done")
    debugfh.close()

    return pfwdefs.PF_EXIT_SUCCESS
Example #12
0
def logpost(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('logpost.out', 'w')
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    if len(argv) < 5:
        print("Usage: logpost configfile block subblocktype subblock retval")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    subblocktype = argv[3]
    subblock = argv[4]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 6:
        retval = int(sys.argv[5])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"configfile = {configfile}")
        miscutils.fwdebug_print(f"block = {blockname}")
        miscutils.fwdebug_print(f"subblock = {subblock}")
        miscutils.fwdebug_print(f"retval = {retval}")

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename(
        'block', {
            pfwdefs.PF_CURRVALS: {
                'flabel': '${subblock}_logpost',
                'subblock': subblock,
                'fsuffix': 'out'
            }
        })
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig

    os.chmod('logpost.out', 0o666)
    os.rename('logpost.out', new_log_name)
    if 'use_qcf' in config and config['use_qcf']:
        if config.dbh is None:
            if 'submit_des_services' in config:
                os.environ['DES_SERVICES'] = config.getfull(
                    'submit_des_services')
            os.environ['DES_DB_SECTION'] = config.getfull(
                'submit_des_db_section')

            debugfh = Messaging.Messaging(new_log_name,
                                          'logpost.py',
                                          config['pfw_attempt_id'],
                                          mode='a+')
        else:
            debugfh = Messaging.Messaging(new_log_name,
                                          'logpost.py',
                                          config['pfw_attempt_id'],
                                          dbh=config.dbh,
                                          mode='a+')

    else:
        debugfh = open(new_log_name, 'a+')

    sys.stdout = debugfh
    sys.stderr = debugfh

    log_pfw_event(config, blockname, subblock, subblocktype,
                  ['posttask', retval])

    # In order to continue, make pipelines dagman jobs exit with success status
    #if 'pipelinesmngr' not in subblock:
    #    retval = pfwdefs.PF_EXIT_SUCCESS

    #    # If error at non-manager level, send failure email
    #    if retval != pfwdefs.PF_EXIT_SUCCESS and \
    #        'mngr' not in subblock:
    #        send_subblock_email(config, blockname, subblock, retval)

    if subblock != 'begblock' and retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print(f"returning retval = {retval}")
    miscutils.fwdebug_print("logpost done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    miscutils.fwdebug_print(f"Exiting with = {retval}")
    return int(retval)
Example #13
0
def jobpre(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    debugfh = tempfile.NamedTemporaryFile(mode='w+',
                                          prefix='jobpre_',
                                          dir='.',
                                          delete=False)
    tmpfn = debugfh.name
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv))  # command line for debugging
    print(os.getcwd())

    if len(argv) < 3:
        print('Usage: jobpre configfile jobnum')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    jobnum = sys.argv[2]  # could also be uberctrl

    # read wcl file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blockname = config.getfull('blockname')
    blkdir = config.get('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'job', {
            pfwdefs.PF_CURRVALS: {
                pfwdefs.PF_JOBNUM: jobnum,
                'flabel': 'jobpre',
                'fsuffix': 'out'
            }
        })
    new_log_name = "%s/%s/%s" % (blkdir, tjpad, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        ctstr = dbh.get_current_timestamp_str()
        dbh.update_job_info(config, tjpad, {
            'condor_submit_time': ctstr,
            'target_submit_time': ctstr
        })

    log_pfw_event(config, blockname, tjpad, 'j', ['pretask'])

    miscutils.fwdebug_print("jobpre done")
    debugfh.close()
    return pfwdefs.PF_EXIT_SUCCESS
def main(argv):
    """ Program entry point """
    parser = argparse.ArgumentParser(description='genquery.py')
    parser.add_argument('--qoutfile', action='store')
    parser.add_argument('--qouttype', action='store')
    parser.add_argument('--config', action='store', dest='configfile')
    parser.add_argument('--module', action='store', dest='modulename')
    parser.add_argument('--search', action='store', dest='searchname')
    args = parser.parse_args(argv)

    if args.modulename is None:
        raise Exception("Error: Must specify module\n")

    print(args.configfile)
    config = pfwconfig.PfwConfig({'wclfile':args.configfile})

    if args.modulename not in config[pfwdefs.SW_MODULESECT]:
        raise Exception(f"Error: module '{args.modulename}' does not exist.\n")

    module_dict = config[pfwdefs.SW_MODULESECT][args.modulename]

    if args.searchname is not None:
        if pfwdefs.SW_LISTSECT in module_dict and \
           args.searchname in module_dict[pfwdefs.SW_LISTSECT]:
            search_dict = module_dict[pfwdefs.SW_LISTSECT][args.searchname]
        elif pfwdefs.SW_FILESECT in module_dict and \
             args.searchname in module_dict[pfwdefs.SW_FILESECT]:
            search_dict = module_dict[pfwdefs.SW_FILESECT][args.searchname]
        else:
            raise Exception(f"Error: Could not find either list or file by name {args.searchname} in module {args.modulename}\n")
    else:
        raise Exception("Error: need to define either list or file or search\n")


    archive_names = []

    if config.getfull(pfwdefs.USE_HOME_ARCHIVE_INPUT) != 'never':
        archive_names.append(config.getfull(pfwdefs.HOME_ARCHIVE))

    if config.getfull(pfwdefs.USE_TARGET_ARCHIVE_INPUT) != 'never':
        archive_names.append(config.getfull(pfwdefs.TARGET_ARCHIVE))

    fields = miscutils.fwsplit(search_dict[pfwdefs.SW_QUERYFIELDS].lower())

    if ('query_run' in config and 'fileclass' in search_dict and
            'fileclass' in config and search_dict['fileclass'] == config['fileclass']):
        query_run = config['query_run'].lower()
        if query_run == 'current':
            fields.append('run')
        elif query_run == 'allbutfirstcurrent':
            if 'current' not in config:
                raise Exception("Internal Error:  Current object doesn't exist\n")
            if 'curr_blocknum' not in config['current']:
                raise Exception("Internal Error:  current->curr_blocknum doesn't exist\n")

            block_num = config['current']['curr_blocknum']
            if block_num > 0:
                fields.append('run')

    query = {}
    qtable = search_dict['query_table']
    for fld in fields:
        table = qtable
        if '.' in fld:
            table, fld = fld.split('.')

        if fld in search_dict:
            value = search_dict[fld]
        elif fld in module_dict:
            value = module_dict[fld]
        elif fld in config:
            value = config.getfull(fld)
        else:
            raise Exception(f"Error: genquery could not find value for query field {fld}\n")

        value = replfuncs.replace_vars(value, config,
                                       {pfwdefs.PF_CURRVALS: {'modulename': args.modulename},
                                        'searchobj': search_dict,
                                        intgdefs.REPLACE_VARS: True,
                                        'expand': True})[0]
        if value is None:
            raise Exception(f"Value=None for query field {fld}\n")

        if ',' in value:
            value = miscutils.fwsplit(value)

        if ':' in value:
            value = miscutils.fwsplit(value)

        if table not in query:
            query[table] = {}

        if 'key_vals' not in query[table]:
            query[table]['key_vals'] = {}

        query[table]['key_vals'][fld] = value


    # if specified, insert join into query hash
    if 'join' in search_dict:
        joins = miscutils.fwsplit(search_dict['join'].lower())
        for j in joins:
            jmatch = re.search(r"(\S+)\.(\S+)\s*=\s*(\S+)", j)
            if jmatch:
                table = jmatch.group(1)
                if table not in query:
                    query[table] = {}
                if 'join' not in query[table]:
                    query[table]['join'] = j
                else:
                    query[jmatch.group(1)]['join'] += "," + j
        #query[table]['join']=search_dict['join']


    query[qtable]['select_fields'] = ['filename']

    # check output fields for fields from other tables.
    if 'output_fields' in search_dict:
        output_fields = miscutils.fwsplit(search_dict['output_fields'].lower())


        for ofield in output_fields:
            ofmatch = re.search(r"(\S+)\.(\S+)", ofield)
            if ofmatch:
                table = ofmatch.group(1)
                field = ofmatch.group(2)
            else:
                table = qtable
                field = ofield
            if table not in query:
                query[table] = {}
            if 'select_fields' not in query[table]:
                query[table]['select_fields'] = []
            if field not in query[table]['select_fields']:
                query[table]['select_fields'].append(field)


    for tbl in query:
        if 'select_fields' in query[tbl]:
            query[tbl]['select_fields'] = ','.join(query[tbl]['select_fields'])

    if archive_names:
        #query[qtable]['join'] = "%s.filename=file_archive_info.filename" % qtable
        query['file_archive_info'] = {'select_fields': 'compression'}
        query['file_archive_info']['join'] = f"file_archive_info.filename={qtable}.filename"
        query['file_archive_info']['key_vals'] = {'archive_name': ','.join(archive_names)}

    print("Calling gen_file_list with the following query:\n")
    miscutils.pretty_print_dict(query, out_file=None, sortit=False, indent=4)
    print("\n\n")
    dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                      config.getfull('submit_des_db_section'))
    files = queryutils.gen_file_list(dbh, query)

    if not files:
        raise Exception(f"genquery: query returned zero results for {args.searchname}\nAborting\n")

    ## output list
    lines = queryutils.convert_single_files_to_lines(files)
    queryutils.output_lines(args.qoutfile, lines, args.qouttype)

    return 0
def blockpost(argv=None):
    """ Program entry point """
    realstdout = sys.stdout
    realstderr = sys.stderr

    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh
    print(' '.join(argv))  # print command line for debugging

    print(f"running on {socket.gethostname()}")

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"configfile = {configfile}")
    miscutils.fwdebug_print(f"retval = {retval}")

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('block',
                                       {pfwdefs.PF_CURRVALS: {'flabel': 'blockpost',
                                                              'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    qdbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}   # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    sem = None
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        try:
            miscutils.fwdebug_print("Connecting to DB")
            if config.dbh is None:
                dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            else:
                dbh = config.dbh
            if verify_files:
                curs = dbh.cursor()
                curs.execute(f"select root from ops_archive where name='{config.getfull('home_archive')}'")
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(f"Invalid archive name ({config.getfull('home_archive')}).   Found {len(rows)} rows in ops_archive")
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(f"Cannot read archive root directory:{config.getfull('home_archive')} This program must be run on an NCSA machine with access to the archive storage system.")
                sem = dbsem.DBSemaphore('verify_files_10', None, config.getfull('submit_des_services'), config.getfull('submit_des_db_section'), connection=dbh)
                print("\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(dbh=dbh, archive=config.getfull('home_archive'), pfwid=attid, md5sum=False, debug=False, script=False, verbose=False, silent=True)
                if sem is not None:
                    del sem
                    sem = None
                print(f"  Verification of files returned status {verify_status:d}")
                if verify_status != 0:
                    print("  This indicates that one or more files do not have the correct file size (based on DB entries). Run")
                    print(f"\n    compare_db.py --des_services {config.getfull('submit_des_services')} --section {config.getfull('submit_des_db_section')} --archive {config.getfull('home_archive')} --pfwid {int(attid):d} --verbose")
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                #qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                #                   config.getfull('submit_des_db_section'))
                qdbh = qcfdb.QCFDB(connection=dbh)

            print(f"\n\nChecking non-job block task status from task table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id'] and
                    str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(f"Done getting block task info from DB ({end_time - start_time} secs)")
                for bltdict in bltasks.values():
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += f"\t{bltdict['name']}"
                        if bltdict['label'] is not None:
                            msg2 += f" - {bltdict['label']}"
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print("Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = f"select id from task where parent_task_id={begblock_tid:d} and status!=0"
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print("Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(f"Done querying QCF messages ({end_time-start_time} secs)")
                                miscutils.fwdebug_print(f"wrapmsg = {wrapmsg}")
                            if not wrapmsg:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in wrapmsg.values():
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print(f"\n\nChecking job status from pfw_job table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting job info from DB ({end_time - start_time} secs)")
                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid, pfw_block_task_id=blktid)
                    if retval != pfwdefs.PF_EXIT_SUCCESS:
                        jobwrap = dbh.get_jobwrapper_info(id=attid)
                    else:
                        jobwrap = {}
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting wrapper info from DB ({end_time - start_time} secs)")
            else:
                msg = f"Could not find task id for block {blockname} in config.des"
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid, config['task_id']['attempt'],
                                          msg, pfwdefs.PFWDB_MSG_INFO, 'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done saving pfw message ({end_time - start_time} secs)")
                print("all the task ids:", config['task_id'])


            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_log_fullnames(attid, archive)
            #dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print(f"Warn: could not find jobs for block {blknum}")
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:", list(job_byblk.keys()))
            else:
                wrap_byjob, _ = pfwutils.index_wrapper_info(wrapinfo)
                #for wid,jwr in jobwrap.iteritems():
                    #print wid,jwr

                # in case the post wrapper stuff failed, internally mark the task
                # as failed to retrieve the info later
                for wrapb in wrap_byjob.values():
                    for wrapper in wrapb.values():
                        if wrapper['parent_task_id'] in jobwrap and jobwrap[wrapper['parent_task_id']]['status'] is not None \
                           and wrapper['status'] is not None and jobwrap[wrapper['parent_task_id']]['status'] > wrapper['status']:
                            wrapper['status'] = jobwrap[wrapper['parent_task_id']]['status']

                #print "wrap_bymod:", wrap_bymod
                jobtid = ''
                jobdict = {}
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict['status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = f"{config.getfull('work_dir')}/B{int(config.getfull('blknum')):02d}-{config.getfull('blockname'):s}/{int(jobdict['jobnum']):04d}"
                    msg2 += f"\n\t{pfwutils.pad_jobnum(jobdict['jobnum'])} ({jobkeys}) "


                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid])
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += f"{len(wrap_byjob[jobtid]):d}/{jobdict['expect_num_wrap']}  {modname}"

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in wrap_byjob[jobtid].items():
                            if wdict['status'] is None or wdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                    msg2 += f"\n\t\t{submit_job_path}/runjob.out "

                msg2 += '\n'

                # print pfw_messages
                if 'message' in jobdict:
                    print('\nmessages: ', jobdict['message'])
                    for msgdict in sorted(jobdict['message'], key=lambda k: k['message_time']):
                        level = int(msgdict['message_lvl'])
                        levelstr = 'info'
                        if level == pfwdefs.PFWDB_MSG_WARN:
                            levelstr = 'WARN'
                        elif level == pfwdefs.PFWDB_MSG_ERROR:
                            levelstr = 'ERROR'

                        msg2 += "\t\t{} - {}\n".format(levelstr, msgdict['message'].replace('\n', '\n\t\t\t'))

                if jobtid in wrap_byjob:
                    # print log file name for failed/unfinished wrappers
                    for wrapnum in failedwraps[jobtid]:
                        wrapdict = wrap_byjob[jobtid][wrapnum]
                        if wrapdict['log'] in logfullnames:
                            msg2 += f"\t\t{wrapnum} - {logfullnames[wrapdict['log']]}\n"
                        else:
                            msg2 += f"\t\t{wrapnum} - Could not find log in archive {wrapdict['log']})\n"
                        wrapmsg = get_qcf_messages(qdbh, [wrapdict['task_id']])
                        msg2 += print_qcf_messages(wrapdict, wrapmsg)

                    msg2 += '\n'

                    # If weirdness happened in run, print a message
                    if whyfailwraps[jobtid]:
                        msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                        msg2 += f"\t{','.join(whyfailwraps[jobtid])}\n"

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print("\n\nEncountered error trying to gather status information for email")
            print(f"{exc.__class__.__name__}: {str(exc)}")
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has finished successfully."
            else:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has failed."

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = f"{run}:  block {blockname} has failed."
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = f"{run}:  block {blockname} has finished successfully."
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = f"{run}:  run has finished successfully."
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    #dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        if dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        #dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ", len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))
        if int(config[pfwdefs.PF_BLKNUM]) > len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) and  miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
            #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
            updatevals = {'PROCESSING_STATE': 'PASS'}
            wherevals = {'PFW_ATTEMPT_ID': attid}
            dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
            dbh.commit()
            #dbh.close()
    elif miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        updatevals = {'PROCESSING_STATE': 'FAIL'}
        wherevals = {'PFW_ATTEMPT_ID': attid}
        dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
        dbh.commit()
        #dbh.close()
    if dbh is not None:
        dbh.close()
    miscutils.fwdebug_print(f"Returning retval = {retval} ({type(retval)})")
    miscutils.fwdebug_print("END")
    debugfh.close()
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"Exiting with = {exitcode}")
        miscutils.fwdebug_print(f"type of exitcode = {type(exitcode)}")

    sys.stdout = realstdout
    sys.stderr = realstderr

    return int(retval)
Example #16
0
def logpre(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    default_log = 'logpre.out'
    debugfh = open(default_log, 'w')
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv)) # command line for debugging

    if len(argv) < 5:
        print("Usage: logpre configfile block subblocktype subblock")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]    # could also be uberctrl
    subblocktype = argv[3]
    subblock = argv[4]

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename('block',
                                       {pfwdefs.PF_CURRVALS: {'subblock': subblock,
                                                              'flabel': '${subblock}_logpre',
                                                              'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")
    debugfh.close()

    os.chmod(default_log, 0o666)
    os.rename(default_log, new_log_name)

    #debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig

    if 'use_qcf' in config and config['use_qcf']:
        if config.dbh is None:
            if 'submit_des_services' in config:
                os.environ['DES_SERVICES'] = config.getfull('submit_des_services')
            os.environ['DES_DB_SECTION'] = config.getfull('submit_des_db_section')

            debugfh = Messaging.Messaging(new_log_name, 'logpre.py', config['pfw_attempt_id'], mode='a+')
        else:
            debugfh = Messaging.Messaging(new_log_name, 'logpre.py', config['pfw_attempt_id'], dbh=config.dbh, mode='a+')

    else:
        debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh


    log_pfw_event(config, blockname, subblock, subblocktype, ['pretask'])

    print("logpre done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    return pfwdefs.PF_EXIT_SUCCESS
Example #17
0
def begrun(argv):
    """TODO: short summary.

    Performs steps executed on submit machine at beginning of processing
    attempt.
    """
    pfw_dbh = None
    try:
        configfile = argv[0]
        config = pfwconfig.PfwConfig({'wclfile': configfile})

        if miscutils.fwdebug_check(6, 'BEGRUN_DEBUG'):
            miscutils.fwdebug_print(
                'use_home_archive_output = %s' %
                config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT))

        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            import processingfw.pfwdb as pfwdb
            pfw_dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            pfw_dbh.begin_task(config['task_id']['attempt'], True)

        # the three wcl files to copy to the home archive
        origwcl = config['origwcl']
        expwcl = config['expwcl']
        fullwcl = config['fullwcl']

        # if not a dryrun and using a home archive for output
        if (config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never'
                and 'submit_files_mvmt' in config and
            (pfwdefs.PF_DRYRUN not in config
             or not miscutils.convertBool(config.getfull(pfwdefs.PF_DRYRUN)))):

            # get home archive info
            home_archive = config.getfull('home_archive')
            archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive]

            # load filemgmt class
            attempt_tid = config['task_id']['attempt']
            filemgmt = pfwutils.pfw_dynam_load_class(pfw_dbh, config,
                                                     attempt_tid, attempt_tid,
                                                     "filemgmt",
                                                     archive_info['filemgmt'],
                                                     archive_info)

            # save file information
            filemgmt.register_file_data('wcl', [origwcl, expwcl, fullwcl],
                                        config['pfw_attempt_id'], attempt_tid,
                                        False, None, None)
            copy_files_home(config, archive_info, filemgmt)
            filemgmt.commit()

        if pfw_dbh is not None:
            print("Saving attempt's archive path into PFW tables...", end=' ')
            pfw_dbh.update_attempt_archive_path(config)
            pfw_dbh.commit()
    except Exception as exc:
        msg = "begrun: %s: %s" % (exc.__class__.__name__, str(exc))
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
    except SystemExit as exc:
        msg = "begrun: SysExit=%s" % str(exc)
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
Example #18
0
def logpost(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('logpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    if len(argv) < 5:
        print('Usage: logpost configfile block subblocktype subblock retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    subblocktype = argv[3]
    subblock = argv[4]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 6:
        retval = int(sys.argv[5])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
        miscutils.fwdebug_print("block = %s" % blockname)
        miscutils.fwdebug_print("subblock = %s" % subblock)
        miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    new_log_name = config.get_filename(
        'block', {
            pfwdefs.PF_CURRVALS: {
                'flabel': '${subblock}_logpost',
                'subblock': subblock,
                'fsuffix': 'out'
            }
        })
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod('logpost.out', 0o666)
    os.rename('logpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    log_pfw_event(config, blockname, subblock, subblocktype,
                  ['posttask', retval])

    # In order to continue, make pipelines dagman jobs exit with success status
    #if 'pipelinesmngr' not in subblock:
    #    retval = pfwdefs.PF_EXIT_SUCCESS

    #    # If error at non-manager level, send failure email
    #    if retval != pfwdefs.PF_EXIT_SUCCESS and \
    #        'mngr' not in subblock:
    #        send_subblock_email(config, blockname, subblock, retval)

    if subblock != 'begblock' and retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print("returning retval = %s" % retval)
    miscutils.fwdebug_print("logpost done")
    debugfh.close()
    return int(retval)