Esempio n. 1
0
def endrun(configfile):
    """Program entry point.
    """
    miscutils.fwdebug_print("BEG")

    config = pfwconfig.PfwConfig({'wclfile': configfile})
    os.chdir('../uberctrl')

    retval = pfwdefs.PF_EXIT_SUCCESS

    if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
       config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'run':
        if pfwdefs.ATTEMPT_ARCHIVE_PATH not in config:
            print("Error:  Cannot find %s in config" % pfwdefs.ATTEMPT_ARCHIVE_PATH)
            print("\tIt is needed for the mass copy of the run back to the " \
                  "home archive at the end of the run")
            return pfwdefs.PF_EXIT_FAILURE

        archpath = config.getfull(config[pfwdefs.ATTEMPT_ARCHIVE_PATH])
        print("archpath =", archpath)

        # call archive transfer for target archive to home archive
        # check if using target archive
        target_info = None
        if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \
           config.getfull(pfwdefs.USE_TARGET_ARCHIVE_OUTPUT).lower() != 'never':
            if pfwdefs.TARGET_ARCHIVE in config and \
                    config.getfull(pfwdefs.TARGET_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]:
                target_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.TARGET_ARCHIVE)]
            else:
                print("Error:  cannot determine info for target archive")
                return pfwdefs.PF_EXIT_FAILURE
        else:
            print("Error:  Asked to transfer outputs at end of run, but not using target archive")
            return pfwdefs.PF_EXIT_FAILURE

        home_info = None
        print(config[pfwdefs.HOME_ARCHIVE])
        if pfwdefs.HOME_ARCHIVE in config and \
                config[pfwdefs.HOME_ARCHIVE] in config[pfwdefs.SW_ARCHIVESECT]:
            home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)]

        # call transfer
        archive_transfer_utils.archive_copy_dir(target_info, home_info,
                                                config.getfull('archive_transfer'),
                                                archpath, config)

    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        miscutils.fwdebug_print("Calling update_attempt_end: retval = %s" % retval)
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        dbh.close()

    miscutils.fwdebug_print("END - exiting with code %s" % retval)
    return retval
Esempio n. 2
0
def endblock(configfile):
    """Program entry point.
    """
    miscutils.fwdebug_print("BEG")

    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blkdir = config.getfull('block_dir')
    os.chdir(blkdir)

    if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
            config[pfwdefs.USE_HOME_ARCHIVE_OUTPUT].lower() == 'block':

        # check if using target archive
        target_info = None
        if pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in config and \
                config[pfwdefs.USE_TARGET_ARCHIVE_OUTPUT].lower() != 'never':
            print(config[pfwdefs.TARGET_ARCHIVE])
            if pfwdefs.TARGET_ARCHIVE in config and \
                    config[pfwdefs.TARGET_ARCHIVE] in config.getfull(pfwdefs.SW_ARCHIVESECT):
                target_info = config.getfull(pfwdefs.SW_ARCHIVESECT)[config.getfull(pfwdefs.TARGET_ARCHIVE)]
            else:
                print("Error:  cannot determine info for target archive")
                return pfwdefs.PF_EXIT_FAILURE
        else:
            print("Error:  Asked to transfer outputs at end of block, but not using target archive")
            return pfwdefs.PF_EXIT_FAILURE

        home_info = None
        print(config.getfull(pfwdefs.HOME_ARCHIVE))
        if pfwdefs.HOME_ARCHIVE in config and \
                config.getfull(pfwdefs.HOME_ARCHIVE) in config[pfwdefs.SW_ARCHIVESECT]:
            home_info = config[pfwdefs.SW_ARCHIVESECT][config.getfull(pfwdefs.HOME_ARCHIVE)]

        # get file list of files to transfer
        if pfwdefs.PF_USE_DB_OUT in config and miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
            dbh = pfwdb.PFWDB()
            filelist = dbh.get_run_filelist(config.getfull(pfwdefs.REQNUM),
                                            config.getfull(pfwdefs.UNITNAME),
                                            config.getfull(pfwdefs.ATTNUM),
                                            config.getfull(pfwdefs.PF_BLKNUM),
                                            config.getfull(pfwdefs.TARGET_ARCHIVE))
        else:
            print("Error:  Asked to transfer outputs at end of block, but not using database.")
            print("        Currently not supported.")
            return pfwdefs.PF_EXIT_FAILURE

        # call transfer
        archive_transfer_utils.archive_copy(target_info, home_info,
                                            config.getfull('archive_transfer'), filelist, config)

    miscutils.fwdebug_print("END - exiting with code %s" % pfwdefs.PF_EXIT_SUCCESS)
    return pfwdefs.PF_EXIT_SUCCESS
Esempio n. 3
0
def print_job_info(argv):
    """
    """
    args = parse_args(argv)

    try:
        dbh = pfwdb.PFWDB(args['des_services'], args['section'])
    except configparser.NoSectionError:
        print(
            "Can't determine section of services file to get DB connection info"
        )
        print(
            "\tEither set environment variable DES_DB_SECTION or add command-line option --section"
        )
        sys.exit(1)

    # get the run info
    for run in args['runs']:
        print(run)
        reqnum, unitname, attnum = parse_attempt_str(run)
        attinfo = dbh.get_attempt_info(reqnum, unitname, attnum)
        if attinfo is None:
            print("No DB information about the processing attempt")
            print(
                "(Double check which DB querying vs which DB the attempt used)"
            )
        else:
            if 'endtime' in attinfo and attinfo['endtime'] is not None:
                print("Note:  run has finished with status %s" %
                      attinfo['status'])

            # get the block info
            blockinfo = dbh.get_block_info(pfw_attempt_id=attinfo['id'])

            # get job info
            jobinfo = dbh.get_job_info({'pfw_attempt_id': attinfo['id']})
            # index jobinfo by blknum
            job_byblk = pfwutils.index_job_info(jobinfo)

            jobwrapinfo = dbh.get_jobwrapper_info(id=attinfo['id'])
            jwrap_byjob, jwrap_bywrap = pfwutils.index_jobwrapper_info(
                jobwrapinfo)

            # get wrapper instance information
            wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attinfo['id'])
            wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo)

            for blknum in list(blockinfo.keys()):
                print_single_block(blknum, blockinfo[blknum], job_byblk,
                                   jwrap_byjob, wrap_byjob, args['verbose'])
Esempio n. 4
0
def get_task_info(args):
    """Query the DB for task information.
    """
    dbh = pfwdb.PFWDB(args['des_services'], args['section'])

    # get the run info
    attinfo = dbh.get_attempt_info(args['reqnum'], args['unitname'],
                                   args['attnum'])
    attid = attinfo['task_id']
    print("attempt task id = ", attid)

    #sql = """WITH alltasks (id, parent_task_id, root_task_id, lvl,
    #name, info_table, start_time, end_time, status, exec_host,label)
    #AS (SELECT c.id, c.parent_task_id, c.root_task_id, 1, c.name,
    #c.info_table, c.start_time, c.end_time, c.status, c.exec_host,c.label
    #FROM task c WHERE c.id=%s UNION ALL SELECT r.id, r.parent_task_id,
    #r.root_task_id, a.lvl+1, r.name, r.info_table, r.start_time,
    #r.end_time, r.status, r.exec_host,r.label FROM alltasks a INNER
    #JOIN task r ON r.parent_task_id = a.id) SELECT id, parent_task_id,
    #root_task_id, lvl, name, info_table, start_time, end_time, status,
    #exec_host, label FROM alltasks ORDER BY lvl ASC""" % (attid)

    sql = "select * from task where root_task_id=%d order by id" % attid
    print(sql)

    curs = dbh.cursor()
    curs.execute(sql)
    desc = [d[0].lower() for d in curs.description]
    #print desc

    tasks = {}
    for line in curs:
        lined = dict(list(zip(desc, line)))
        lined['children'] = []
        tasks[lined['id']] = lined

    return attid, tasks
Esempio n. 5
0
def begrun(argv):
    """TODO: short summary.

    Performs steps executed on submit machine at beginning of processing
    attempt.
    """
    pfw_dbh = None
    try:
        configfile = argv[0]
        config = pfwconfig.PfwConfig({'wclfile': configfile})

        if miscutils.fwdebug_check(6, 'BEGRUN_DEBUG'):
            miscutils.fwdebug_print(
                'use_home_archive_output = %s' %
                config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT))

        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            import processingfw.pfwdb as pfwdb
            pfw_dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            pfw_dbh.begin_task(config['task_id']['attempt'], True)

        # the three wcl files to copy to the home archive
        origwcl = config['origwcl']
        expwcl = config['expwcl']
        fullwcl = config['fullwcl']

        # if not a dryrun and using a home archive for output
        if (config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never'
                and 'submit_files_mvmt' in config and
            (pfwdefs.PF_DRYRUN not in config
             or not miscutils.convertBool(config.getfull(pfwdefs.PF_DRYRUN)))):

            # get home archive info
            home_archive = config.getfull('home_archive')
            archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive]

            # load filemgmt class
            attempt_tid = config['task_id']['attempt']
            filemgmt = pfwutils.pfw_dynam_load_class(pfw_dbh, config,
                                                     attempt_tid, attempt_tid,
                                                     "filemgmt",
                                                     archive_info['filemgmt'],
                                                     archive_info)

            # save file information
            filemgmt.register_file_data('wcl', [origwcl, expwcl, fullwcl],
                                        config['pfw_attempt_id'], attempt_tid,
                                        False, None, None)
            copy_files_home(config, archive_info, filemgmt)
            filemgmt.commit()

        if pfw_dbh is not None:
            print("Saving attempt's archive path into PFW tables...", end=' ')
            pfw_dbh.update_attempt_archive_path(config)
            pfw_dbh.commit()
    except Exception as exc:
        msg = "begrun: %s: %s" % (exc.__class__.__name__, str(exc))
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
    except SystemExit as exc:
        msg = "begrun: SysExit=%s" % str(exc)
        if pfw_dbh is not None:
            Messaging.pfw_message(pfw_dbh, config['pfw_attempt_id'],
                                  config['task_id']['attempt'], msg,
                                  pfw_utils.PFW_DB_WARN, 'begrun.out', 0)
        send_failed_email(config, msg)
        raise
Esempio n. 6
0
def create_master_list(config, configfile, modname, moddict,
                       search_name, search_dict, search_type):
    """Create master data list for a module's list or file def.
    """
    miscutils.fwdebug_print("BEG")

    if 'qouttype' in search_dict:
        qouttype = search_dict['qouttype']
    else:
        qouttype = intgdefs.DEFAULT_QUERY_OUTPUT_FORMAT

    qoutfile = config.get_filename('qoutput',
                                   {pfwdefs.PF_CURRVALS: {'modulename': modname,
                                                          'searchname': search_name,
                                                          'suffix': qouttype}})
    qlog = config.get_filename('qoutput',
                               {pfwdefs.PF_CURRVALS: {'modulename': modname,
                                                      'searchname': search_name,
                                                      'suffix': 'out'}})

    prog = None
    if 'exec' in search_dict:
        prog = search_dict['exec']
        if 'args' not in search_dict:
            print("\t\tWarning:  %s in module %s does not have args defined\n" % \
                (search_name, modname))
            args = ""
        else:
            args = search_dict['args']
    elif 'query_fields' in search_dict:
        if 'processingfw_dir' in config:
            dirgenquery = config['processingfw_dir']
        elif 'PROCESSINGFW_DIR' in os.environ:
            dirgenquery = os.environ['PROCESSINGFW_DIR']
        else:
            miscutils.fwdie("Error: Could not determine base path for genquerydb.py",
                            pfwdefs.PF_EXIT_FAILURE)

        prog = "%s/libexec/genquerydb.py" % (dirgenquery)
        args = "--qoutfile %s --qouttype %s --config %s --module %s --search %s" % \
               (qoutfile, qouttype, configfile, modname, search_name)

    if not prog:
        print("\tWarning: %s in module %s does not have exec or %s defined" % \
            (search_name, modname, pfwdefs.SW_QUERYFIELDS))
        return

    search_dict['qoutfile'] = qoutfile
    search_dict['qlog'] = qlog

    prog = replfuncs.replace_vars_single(prog, config,
                                         {pfwdefs.PF_CURRVALS: {pfwdefs.SW_MODULESECT: modname},
                                          'searchobj': search_dict})

    # handle both outputxml and outputfile args
    args = replfuncs.replace_vars_single(args, config,
                                         {pfwdefs.PF_CURRVALS: {pfwdefs.SW_MODULESECT: modname,
                                                                'outputxml': qoutfile,
                                                                'outputfile': qoutfile,
                                                                'qoutfile': qoutfile},
                                          #intgdefs.REPLACE_VARS: True,
                                          'searchobj': search_dict})

    # get version for query code
    query_version = None
    if prog in config[pfwdefs.SW_EXEC_DEF]:
        query_version = pfwutils.get_version(prog, config[pfwdefs.SW_EXEC_DEF])

    if search_type == pfwdefs.SW_LISTSECT:
        datatype = 'L'
    elif search_type == pfwdefs.SW_FILESECT:
        datatype = 'F'
    else:
        datatype = search_type[0].upper()

    # call code
    query_tid = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        pfw_dbh = pfwdb.PFWDB()
        query_tid = pfw_dbh.insert_data_query(config, modname, datatype, search_name,
                                              prog, args, query_version)
        #pfw_dbh.close()
    else:
        pfw_dbh = None

    cwd = os.getcwd()
    print("\t\tCalling code to create master list for obj %s in module %s" % \
        (search_name, modname))
    print("\t\t", prog, args)
    print("\t\tSee output in %s/%s" % (cwd, qlog))
    print("\t\tSee master list will be in %s/%s" % (cwd, qoutfile))

    print("\t\tCreating master list - start ", time.time())

    cmd = "%s %s" % (prog, args)
    exitcode = None
    try:
        exitcode = pfwutils.run_cmd_qcf(cmd, qlog, query_tid, os.path.basename(prog),
                                        config.getfull(pfwdefs.PF_USE_QCF), pfw_dbh,
                                        config['pfw_attempt_id'])
        #exitcode = pfwutils.run_cmd_qcf(cmd, qlog, query_tid, os.path.basename(prog),
        #                                5000, config.getfull(pfwdefs.PF_USE_QCF))
    except:
        print("******************************")
        print("Error: ")
        (extype, exvalue, trback) = sys.exc_info()
        print("******************************")
        traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
        exitcode = pfwdefs.PF_EXIT_FAILURE

    print("\t\tCreating master list - end ", time.time())
    sys.stdout.flush()
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        pfw_dbh = pfwdb.PFWDB()
        pfw_dbh.end_task(query_tid, exitcode, True)
        pfw_dbh.close()

    if exitcode != 0:
        miscutils.fwdie("Error: problem creating master list (exitcode = %s)" %
                        (exitcode), exitcode)

    miscutils.fwdebug_print("END")
Esempio n. 7
0
def begblock(argv):
    """Program entry point.
    """
    if argv == None:
        argv = sys.argv

    configfile = argv[0]
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    config.set_block_info()
    blknum = config[pfwdefs.PF_BLKNUM]

    blkdir = config.getfull('block_dir')
    os.chdir(blkdir)

    (exists, submit_des_services) = config.search('submit_des_services')
    if exists and submit_des_services is not None:
        os.environ['DES_SERVICES'] = submit_des_services
    (exists, submit_des_db_section) = config.search('submit_des_db_section')
    if exists and submit_des_db_section is not None:
        os.environ['DES_DB_SECTION'] = submit_des_db_section

    dbh = None
    blktid = -1
    if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'):
        miscutils.fwdebug_print("blknum = %s" % (config[pfwdefs.PF_BLKNUM]))
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(submit_des_services, submit_des_db_section)
        dbh.insert_block(config)
        blktid = config['task_id']['block'][str(blknum)]
        config['task_id']['begblock'] = dbh.create_task(
            name='begblock',
            info_table=None,
            parent_task_id=blktid,
            root_task_id=int(config['task_id']['attempt']),
            label=None,
            do_begin=True,
            do_commit=True)

    try:
        modulelist = miscutils.fwsplit(
            config.getfull(pfwdefs.SW_MODULELIST).lower())
        modules_prev_in_list = {}

        joblist = {}
        parlist = OrderedDict()
        masterdata = OrderedDict()
        filelist = {'infiles': {}, 'outfiles': {}}
        for num, modname in enumerate(modulelist):
            print("XXXXXXXXXXXXXXXXXXXX %s XXXXXXXXXXXXXXXXXXXX" % modname)
            if modname not in config[pfwdefs.SW_MODULESECT]:
                miscutils.fwdie(
                    "Error: Could not find module description for module %s\n"
                    % (modname), pfwdefs.PF_EXIT_FAILURE)
            moddict = config[pfwdefs.SW_MODULESECT][modname]

            runqueries(config, configfile, modname, modules_prev_in_list)
            pfwblock.read_master_lists(config, modname, masterdata,
                                       modules_prev_in_list)

            (infsect, outfsect) = pfwblock.get_datasect_types(config, modname)
            pfwblock.fix_master_lists(config, modname, masterdata, outfsect)

            if pfwdefs.PF_NOOP not in moddict or not miscutils.convertBool(
                    moddict[pfwdefs.PF_NOOP]):
                pfwblock.create_fullnames(config, modname, masterdata)
                if miscutils.fwdebug_check(
                        9, 'PFWBLOCK_DEBUG') and modname in masterdata:
                    with open('%s-masterdata.txt' % modname, 'w') as fh:
                        miscutils.pretty_print_dict(masterdata[modname], fh)

                pfwblock.add_file_metadata(config, modname)
                sublists = pfwblock.create_sublists(config, modname,
                                                    masterdata)
                if sublists is not None:
                    if miscutils.fwdebug_check(3, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("sublists.keys() = %s" %
                                                (list(sublists.keys())))
                loopvals = pfwblock.get_wrapper_loopvals(config, modname)
                wrapinst = pfwblock.create_wrapper_inst(
                    config, modname, loopvals)
                wcnt = 1
                for winst in list(wrapinst.values()):
                    if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("winst %d - BEG" % wcnt)
                    pfwblock.assign_data_wrapper_inst(config, modname, winst,
                                                      masterdata, sublists,
                                                      infsect, outfsect)
                    pfwblock.finish_wrapper_inst(config, modname, winst,
                                                 outfsect)
                    tempfiles = pfwblock.create_module_wrapper_wcl(
                        config, modname, winst)
                    for fl in tempfiles['infiles']:
                        if fl not in list(filelist['infiles'].keys()):
                            filelist['infiles'][fl] = num

                    for fl in tempfiles['outfiles']:
                        filelist['outfiles'][fl] = num
                    #filelist['infiles'] += tempfiles['infiles']
                    #filelist['outfiles'] += tempfiles['outfiles']
                    pfwblock.divide_into_jobs(config, modname, winst, joblist,
                                              parlist)
                    if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                        miscutils.fwdebug_print("winst %d - %s - END" %
                                                (wcnt, etime - stime))
                    wcnt += 1
            modules_prev_in_list[modname] = True

            if miscutils.fwdebug_check(
                    9, 'PFWBLOCK_DEBUG') and modname in masterdata:
                with open('%s-masterdata.txt' % modname, 'w') as fh:
                    miscutils.pretty_print_dict(masterdata[modname], fh)

        scriptfile = pfwblock.write_runjob_script(config)

        intersect = list(
            set(filelist['infiles'].keys()) & set(filelist['outfiles'].keys()))
        finallist = []

        for fl in list(filelist['infiles'].keys()):
            if fl not in intersect:
                finallist.append(fl)
            else:
                if filelist['infiles'][fl] <= filelist['outfiles'][fl]:
                    raise Exception(
                        'Input file %s requested before it is generated.' %
                        (fl))

        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            missingfiles = dbh.check_files(config, finallist)
            if len(missingfiles) > 0:
                raise Exception(
                    "The following input files cannot be found in the archive:"
                    + ",".join(missingfiles))
        miscutils.fwdebug_print("Creating job files - BEG")
        for jobkey, jobdict in sorted(joblist.items()):
            jobdict['jobnum'] = pfwutils.pad_jobnum(config.inc_jobnum())
            jobdict['jobkeys'] = jobkey
            jobdict['numexpwrap'] = len(jobdict['tasks'])
            if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
                miscutils.fwdebug_print("jobnum = %s, jobkey = %s:" %
                                        (jobkey, jobdict['jobnum']))
            jobdict['tasksfile'] = write_workflow_taskfile(
                config, jobdict['jobnum'], jobdict['tasks'])
            if (len(jobdict['inlist']) > 0 and
                    config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT) != 'never'
                    and 'submit_files_mvmt' in config and
                (pfwdefs.PF_DRYRUN not in config or not miscutils.convertBool(
                    config.getfull(pfwdefs.PF_DRYRUN)))):
                # get home archive info
                home_archive = config.getfull('home_archive')
                archive_info = config[pfwdefs.SW_ARCHIVESECT][home_archive]

                # load filemgmt class
                attempt_tid = config['task_id']['attempt']
                filemgmt = pfwutils.pfw_dynam_load_class(
                    dbh, config, attempt_tid, attempt_tid, "filemgmt",
                    archive_info['filemgmt'], archive_info)
                # save file information
                filemgmt.register_file_data('list', jobdict['inlist'],
                                            config['pfw_attempt_id'],
                                            attempt_tid, False, None, None)
                pfwblock.copy_input_lists_home_archive(config, filemgmt,
                                                       archive_info,
                                                       jobdict['inlist'])
                filemgmt.commit()
            jobdict['inputwcltar'] = pfwblock.tar_inputfiles(
                config, jobdict['jobnum'],
                jobdict['inwcl'] + jobdict['inlist'])
            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
                dbh.insert_job(config, jobdict)
            pfwblock.write_jobwcl(config, jobkey, jobdict)
            if ('glidein_use_wall' in config and miscutils.convertBool(
                    config.getfull('glidein_use_wall'))
                    and 'jobwalltime' in config):
                jobdict['wall'] = config['jobwalltime']

        miscutils.fwdebug_print("Creating job files - END")

        numjobs = len(joblist)
        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            dbh.update_block_numexpjobs(config, numjobs)

        #if miscutils.fwdebug_check(6, 'PFWBLOCK_DEBUG'):
        #    miscutils.fwdebug_print("inputfiles: %s, %s" % (type(inputfiles), inputfiles))
        #    miscutils.fwdebug_print("outputfiles: %s, %s" % (type(outputfiles), outputfiles))
        #files2stage = set(inputfiles) - set(outputfiles)
        #pfwblock.stage_inputs(config, files2stage)

        #if pfwdefs.USE_HOME_ARCHIVE_OUTPUT in config and \
        #   config.getfull(pfwdefs.USE_HOME_ARCHIVE_OUTPUT).lower() == 'block':
        #    config['block_outputlist'] = 'potential_outputfiles.list'
        #    pfwblock.write_output_list(config, outputfiles)

        dagfile = config.get_filename('jobdag')
        pfwblock.create_jobmngr_dag(config, dagfile, scriptfile, joblist)
    except:
        retval = pfwdefs.PF_EXIT_FAILURE
        with open(configfile, 'w') as cfgfh:
            config.write(
                cfgfh)  # save config, have updated jobnum, wrapnum, etc
        if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
            dbh.end_task(config['task_id']['begblock'], retval, True)
            dbh.end_task(blktid, retval, True)
        raise

    # save config, have updated jobnum, wrapnum, etc
    with open(configfile, 'w') as cfgfh:
        config.write(cfgfh)

    (exists, dryrun) = config.search(pfwdefs.PF_DRYRUN)
    if exists and miscutils.convertBool(dryrun):
        retval = pfwdefs.PF_EXIT_DRYRUN
    else:
        retval = pfwdefs.PF_EXIT_SUCCESS
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh.end_task(config['task_id']['begblock'], retval, True)
    miscutils.fwdebug_print("END - exiting with code %s" % retval)

    return retval
Esempio n. 8
0
def blockpost(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))  # print command line for debugging

    print("running on %s" % (socket.gethostname()))

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
    miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'block',
        {pfwdefs.PF_CURRVALS: {
            'flabel': 'blockpost',
            'fsuffix': 'out'
        }})
    new_log_name = "%s/%s" % (blkdir, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    reqnum = config.getfull(pfwdefs.REQNUM)
    unitname = config.getfull(pfwdefs.UNITNAME)
    attnum = config.getfull(pfwdefs.ATTNUM)
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrap_bymod = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}  # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        sem = None
        try:
            miscutils.fwdebug_print("Connecting to DB")
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
            if verify_files:
                curs = dbh.cursor()
                curs.execute("select root from ops_archive where name='%s'" %
                             (config.getfull('home_archive')))
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(
                        "Invalid archive name (%s).   Found %s rows in ops_archive"
                        % (config.getfull('home_archive'), len(rows)))
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(
                        "Cannot read archive root directory:%s This program must be run on an NCSA machine with access to the archive storage system."
                        % (config.getfull('home_archive')))
                sem = dbsem.DBSemaphore(
                    'verify_files_10', None,
                    config.getfull('submit_des_services'),
                    config.getfull('submit_des_db_section'))
                print(
                    "\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(
                    dbh=dbh,
                    archive=config.getfull('home_archive'),
                    pfwid=attid,
                    filesize=True,
                    md5sum=False,
                    quick=True,
                    debug=False,
                    script=False,
                    verbose=False,
                    silent=True)
                if sem is not None:
                    del sem
                print("  Verification of files returned status %i" %
                      (verify_status))
                if verify_status != 0:
                    print(
                        "  This indicates that one or more files do not have the correct file size (based on DB entries). Run"
                    )
                    print(
                        "\n    compare_db.py --des_services %s --section %s --archive %s --pfwid %i --filesize --verbose"
                        % (config.getfull('submit_des_services'),
                           config.getfull('submit_des_db_section'),
                           config.getfull('home_archive'), int(attid)))
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                                   config.getfull('submit_des_db_section'))

            print("\n\nChecking non-job block task status from task table in DB (%s is success)" % \
                  pfwdefs.PF_EXIT_SUCCESS)
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id']
                    and str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(
                    "Done getting block task info from DB (%s secs)" %
                    (end_time - start_time))
                for bltdict in list(bltasks.values()):
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += "\t%s" % (bltdict['name'])
                        if bltdict['label'] is not None:
                            msg2 += " - %s" % (bltdict['label'])
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print(
                                "Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = "select id from task where parent_task_id=%i and status!=0" % (
                                begblock_tid)
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print(
                                    "Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(
                                    wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(
                                    "Done querying QCF messages (%s secs)" %
                                    (end_time - start_time))
                                miscutils.fwdebug_print("wrapmsg = %s" %
                                                        wrapmsg)
                            if len(wrapmsg) == 0:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in list(wrapmsg.values()):
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print("\n\nChecking job status from pfw_job table in DB (%s is success)" % \
                        pfwdefs.PF_EXIT_SUCCESS)

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting job info from DB (%s secs)" %
                        (end_time - start_time))

                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid,
                                                    pfw_block_task_id=blktid)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done getting wrapper info from DB (%s secs)" %
                        (end_time - start_time))
            else:
                msg = "Could not find task id for block %s in config.des" % blockname
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid,
                                          config['task_id']['attempt'], msg,
                                          pfw_utils.PFW_DB_INFO,
                                          'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(
                        "Done saving pfw message (%s secs)" %
                        (end_time - start_time))
                print("all the task ids:", config['task_id'])

            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_fail_log_fullnames(attid, archive)
            dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print("Warn: could not find jobs for block %s" % blknum)
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:" %
                      list(job_byblk.keys()))
            else:
                wrap_byjob, wrap_bymod = pfwutils.index_wrapper_info(wrapinfo)
                #print "wrap_byjob:", wrap_byjob
                #print "wrap_bymod:", wrap_bymod
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict[
                            'status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = "%s/B%02d-%s/%04d" % (
                        config.getfull('work_dir'),
                        int(config.getfull('blknum')),
                        config.getfull('blockname'), int(jobdict['jobnum']))
                    msg2 += "\n\t%s (%s) " % (pfwutils.pad_jobnum(
                        jobdict['jobnum']), jobkeys)

                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid].keys())
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += "%d/%s  %s" % (len(
                            wrap_byjob[jobtid]), jobdict['expect_num_wrap'],
                                               modname)

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in list(wrap_byjob[jobtid].items()):
                            if wdict['status'] is None or wdict[
                                    'status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                    if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += "\n\t\t%s/runjob.out " % (submit_job_path)

                    msg2 += '\n'

                    # print pfw_messages
                    if 'message' in jobdict:
                        print(jobdict['message'])
                        for msgdict in sorted(jobdict['message'],
                                              key=lambda k: k['message_time']):
                            level = int(msgdict['message_lvl'])
                            levelstr = 'info'
                            if level == pfwdefs.PFWDB_MSG_WARN:
                                levelstr = 'WARN'
                            elif level == pfwdefs.PFWDB_MSG_ERROR:
                                levelstr = 'ERROR'

                            msg2 += "\t\t%s - %s\n" % (
                                levelstr, msgdict['message'].replace(
                                    '\n', '\n\t\t\t'))

                    if jobtid in wrap_byjob:
                        # print log file name for failed/unfinished wrappers
                        for wrapnum in failedwraps[jobtid]:
                            wrapdict = wrap_byjob[jobtid][wrapnum]
                            if wrapdict['log'] in logfullnames:
                                msg2 += "\t\t%s - %s\n" % (
                                    wrapnum, logfullnames[wrapdict['log']])
                            else:
                                msg2 += "\t\t%s - Could not find log in archive (%s)\n" % (
                                    wrapnum, wrapdict['log'])
                            wrapmsg = get_qcf_messages(qdbh, config,
                                                       [wrapdict['task_id']])
                            msg2 = print_qcf_messages(config, wrapdict,
                                                      wrapmsg, msg2)

                        msg2 += '\n'

                        # If weirdness happened in run, print a message
                        if len(whyfailwraps[jobtid]) > 0:
                            msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                            msg2 += "\t%s\n" % ','.join(whyfailwraps[jobtid])

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print(
                "\n\nEncountered error trying to gather status information for email"
            )
            print("%s: %s" % (exc.__class__.__name__, str(exc)))
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = "%s:  In dryrun mode, block %s has finished successfully." % (
                    run, blockname)
            else:
                msg1 = "%s:  In dryrun mode, block %s has failed." % (
                    run, blockname)

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = "%s:  block %s has failed." % (run, blockname)
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = "%s:  block %s has finished successfully." % (run,
                                                                 blockname)
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(
                miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = "%s:  run has finished successfully." % (run)
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ",
              len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))

    miscutils.fwdebug_print("Returning retval = %s (%s)" %
                            (retval, type(retval)))
    miscutils.fwdebug_print("END")
    debugfh.close()
    return int(retval)
Esempio n. 9
0
def summary(argv=None):
    """ Create and send summary email """
    if argv is None:
        argv = sys.argv

    debugfh = open('summary.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv))

    if len(argv) < 2:
        print("Usage: summary configfile status")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    if len(argv) == 3:
        status = argv[2]
        # dagman always exits with 0 or 1
        if status == 1:
            status = pfwdefs.PF_EXIT_FAILURE
    else:
        print("summary: Missing status value")
        status = None

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': argv[1]})

    log_pfw_event(config, 'process', 'mngr', 'j', ['posttask', status])

    msgstr = ""

    msg1 = ""
    subject = ""
    if not status:
        msg1 = f"Processing finished with unknown results.\n{msgstr}"
    elif pfwdefs.PF_DRYRUN in config and miscutils.convertBool(
            config.getfull(pfwdefs.PF_DRYRUN)):
        msg1 = f"Processing ended after DRYRUN\n{msgstr}"

        if int(status) == pfwdefs.PF_EXIT_SUCCESS:
            msg1 = "Processing has successfully completed.\n"
            subject = ""
        else:
            print(f"status = '{status}'")
            print("type(status) =", type(status))
            print(f"SUCCESS = '{pfwdefs.PF_EXIT_SUCCESS}'")
            print("type(SUCCESS) =", type(pfwdefs.PF_EXIT_SUCCESS))
            msg1 = f"Processing aborted with status {status}.\n"

    subject = ""
    pfwemail.send_email(config, "processing", status, subject, msg1, '')

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        dbh.update_attempt_end_vals(config['pfw_attempt_id'], status)
    print(f"summary: status = '{status}'")
    print("summary:", msg1)
    print("summary: End")
    debugfh.close()
    return status
Esempio n. 10
0
def jobpre(argv=None):
    """ Program entry point """
    if argv is None:
        argv = sys.argv

    #debugfh = tempfile.NamedTemporaryFile(prefix='jobpre_', dir='.', delete=False)
    default_log = f"jobpre_{random.randint(1,10000000):08d}.out"
    debugfh = open(default_log, 'w')

    tmpfn = debugfh.name
    outorig = sys.stdout
    errorig = sys.stderr
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(argv)) # command line for debugging
    print(os.getcwd())

    if len(argv) < 3:
        print("Usage: jobpre configfile jobnum")
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    jobnum = sys.argv[2]    # could also be uberctrl

    # read wcl file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blockname = config.getfull('blockname')
    blkdir = config.get('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM:jobnum,
                                                                     'flabel': 'jobpre',
                                                                     'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{tjpad}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        if config.dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                              config.getfull('submit_des_db_section'))
        else:
            dbh = config.dbh

    if 'use_qcf' in config and config['use_qcf']:
        debugfh = Messaging.Messaging(new_log_name, 'jobpre.py', config['pfw_attempt_id'], dbh=dbh, mode='a+', usedb=dbh is not None)
    else:
        debugfh = open(new_log_name, 'a+')

    sys.stdout = debugfh
    sys.stderr = debugfh

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        ctstr = dbh.get_current_timestamp_str()
        dbh.update_job_info(config, tjpad, {'condor_submit_time': ctstr,
                                            'target_submit_time': ctstr})

    log_pfw_event(config, blockname, tjpad, 'j', ['pretask'])

    miscutils.fwdebug_print("jobpre done")
    debugfh.close()
    sys.stdout = outorig
    sys.stderr = errorig
    return pfwdefs.PF_EXIT_SUCCESS
Esempio n. 11
0
def jobpost(argv=None):
    """Performs steps needed after a pipeline job.
    """
    condor2db = {'jobid': 'condor_job_id',
                 'csubmittime': 'condor_submit_time',
                 'gsubmittime': 'target_submit_time',
                 'starttime': 'condor_start_time',
                 'endtime': 'condor_end_time'}

    if argv is None:
        argv = sys.argv

    debugfh = tempfile.NamedTemporaryFile(mode='w+', prefix='jobpost_', dir='.', delete=False)
    tmpfn = debugfh.name
    sys.stdout = debugfh
    sys.stderr = debugfh

    miscutils.fwdebug_print("temp log name = %s" % tmpfn)
    print('cmd>', ' '.join(argv))  # print command line for debugging

    if len(argv) < 7:
        # open file to catch error messages about command line
        print('Usage: jobpost.py configfile block jobnum inputtar outputtar retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    blockname = argv[2]
    jobnum = argv[3]
    inputtar = argv[4]
    outputtar = argv[5]
    retval = pfwdefs.PF_EXIT_FAILURE
    if len(argv) == 7:
        retval = int(sys.argv[6])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("configfile = %s" % configfile)
        miscutils.fwdebug_print("block = %s" % blockname)
        miscutils.fwdebug_print("jobnum = %s" % jobnum)
        miscutils.fwdebug_print("inputtar = %s" % inputtar)
        miscutils.fwdebug_print("outputtar = %s" % outputtar)
        miscutils.fwdebug_print("retval = %s" % retval)

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")

    # now that have more information, rename output file
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("before get_filename")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    os.chdir("%s/%s" % (blkdir, tjpad))
    new_log_name = config.get_filename('job', {pfwdefs.PF_CURRVALS: {pfwdefs.PF_JOBNUM: jobnum,
                                                                     'flabel': 'jobpost',
                                                                     'fsuffix': 'out'}})
    new_log_name = "%s" % (new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    dbh = None
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))

        # get job information from the job stdout if exists
        (tjobinfo, tjobinfo_task) = parse_job_output(config, jobnum, dbh, retval)

        if dbh and len(tjobinfo) > 0:
            print("tjobinfo: ", tjobinfo)
            dbh.update_tjob_info(config['task_id']['job'][jobnum], tjobinfo)

        # get job information from the condor job log
        logfilename = 'runjob.log'
        if os.path.exists(logfilename) and os.path.getsize(logfilename) > 0:  # if made it to submitting/running jobs
            try:
                # update job info in DB from condor log
                print("Updating job info in DB from condor log")
                condorjobinfo = pfwcondor.parse_condor_user_log(logfilename)
                if len(list(condorjobinfo.keys())) > 1:
                    print("More than single job in job log")
                j = list(condorjobinfo.keys())[0]
                cjobinfo = condorjobinfo[j]
                djobinfo = {}
                for ckey, dkey in list(condor2db.items()):
                    if ckey in cjobinfo:
                        djobinfo[dkey] = cjobinfo[ckey]
                print(djobinfo)
                dbh.update_job_info(config, cjobinfo['jobname'], djobinfo)

                if 'holdreason' in cjobinfo and cjobinfo['holdreason'] is not None:
                    msg = "Condor HoldReason: %s" % cjobinfo['holdreason']
                    print(msg)
                    if dbh:
                        Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                              config['task_id']['job'][jobnum],
                                              msg, pfwdefs.PFWDB_MSG_WARN)

                if 'abortreason' in cjobinfo and cjobinfo['abortreason'] is not None:
                    tjobinfo_task['start_time'] = cjobinfo['starttime']
                    tjobinfo_task['end_time'] = cjobinfo['endtime']
                    if 'condor_rm' in cjobinfo['abortreason']:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_OPDELETE
                    else:
                        tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
                else:
                    pass
            except Exception:
                (extype, exvalue, trback) = sys.exc_info()
                traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
        else:
            print("Warning:  no job condor log file")

        if dbh:
            # update job task
            if 'status' not in tjobinfo_task:
                tjobinfo_task['status'] = pfwdefs.PF_EXIT_CONDOR
            if 'end_time' not in tjobinfo_task:
                tjobinfo_task['end_time'] = datetime.now()
            wherevals = {'id': config['task_id']['job'][jobnum]}
            dbh.basic_update_row('task', tjobinfo_task, wherevals)
            dbh.commit()

    log_pfw_event(config, blockname, jobnum, 'j', ['posttask', retval])

    # input wcl should already exist in untar form
    if os.path.exists(inputtar):
        print("found inputtar: %s" % inputtar)
        os.unlink(inputtar)
    else:
        print("Could not find inputtar: %s" % inputtar)

    # untar output wcl tar and delete tar
    if os.path.exists(outputtar):
        print("Size of output wcl tar:", os.path.getsize(outputtar))
        if os.path.getsize(outputtar) > 0:
            print("found outputtar: %s" % outputtar)
            pfwutils.untar_dir(outputtar, '..')
            os.unlink(outputtar)
        else:
            msg = "Warn: outputwcl tarball (%s) is 0 bytes." % outputtar
            print(msg)
            if dbh:
                Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                      config['task_id']['job'][jobnum],
                                      msg, pfwdefs.PFWDB_MSG_WARN)
    else:
        msg = "Warn: outputwcl tarball (%s) does not exist." % outputtar
        print(msg)
        if dbh:
            Messaging.pfw_message(dbh, config['pfw_attempt_id'],
                                  config['task_id']['job'][jobnum],
                                  msg, pfwdefs.PFWDB_MSG_WARN)

    if retval != pfwdefs.PF_EXIT_SUCCESS:
        miscutils.fwdebug_print("Setting failure retval")
        retval = pfwdefs.PF_EXIT_FAILURE

    miscutils.fwdebug_print("Returning retval = %s" % retval)
    miscutils.fwdebug_print("jobpost done")
    debugfh.close()
    return int(retval)
Esempio n. 12
0
    def __init__(self, args):
        """ Initialize configuration object, typically reading from wclfile """

        WCL.__init__(self)

        # data which needs to be kept across programs must go in self
        # data which needs to be searched also must go in self
        self.set_search_order(PFW_SEARCH_ORDER)

        wclobj = WCL()
        if 'wclfile' in args:
            if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
                miscutils.fwdebug_print("Reading wclfile: %s" %
                                        (args['wclfile']))
            try:
                starttime = time.time()
                print("\tReading submit wcl...", end=' ')
                with open(args['wclfile'], "r") as wclfh:
                    wclobj.read(wclfh, filename=args['wclfile'])
                print("DONE (%0.2f secs)" % (time.time() - starttime))
                #wclobj['wclfile'] = args['wclfile']
            except IOError as err:
                miscutils.fwdie(
                    "Error: Problem reading wcl file '%s' : %s" %
                    (args['wclfile'], err), pfwdefs.PF_EXIT_FAILURE)

        # location of des services file
        if 'submit_des_services' in args and args[
                'submit_des_services'] is not None:
            wclobj['submit_des_services'] = args['submit_des_services']
        elif 'submit_des_services' not in wclobj:
            if 'DES_SERVICES' in os.environ:
                wclobj['submit_des_services'] = os.environ['DES_SERVICES']
            else:
                # let it default to $HOME/.desservices.init
                wclobj['submit_des_services'] = None

        # which section to use in des services file
        if 'submit_des_db_section' in args and args[
                'submit_des_db_section'] is not None:
            wclobj['submit_des_db_section'] = args['submit_des_db_section']
        elif 'submit_des_db_section' not in wclobj:
            if 'DES_DB_SECTION' in os.environ:
                wclobj['submit_des_db_section'] = os.environ['DES_DB_SECTION']
            else:
                # let DB connection code print error message
                wclobj['submit_des_db_section'] = None

        # for values passed in on command line, set top-level config
        for var in (pfwdefs.PF_DRYRUN, pfwdefs.PF_USE_DB_IN,
                    pfwdefs.PF_USE_DB_OUT, pfwdefs.PF_USE_QCF,
                    pfwdefs.PF_VERIFY_FILES):
            if var in args and args[var] is not None:
                wclobj[var] = args[var]

        if 'usePFWconfig' in args:
            pfwconfig = os.environ['PROCESSINGFW_DIR'] + '/etc/pfwconfig.des'
            if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
                miscutils.fwdebug_print("Reading pfwconfig: %s" % (pfwconfig))
            starttime = time.time()
            print("\tReading config from software install...", end=' ')
            pfwcfg_wcl = WCL()
            with open(pfwconfig, "r") as wclfh:
                pfwcfg_wcl.read(wclfh, filename=pfwconfig)
            self.update(pfwcfg_wcl)
            print("DONE (%0.2f secs)" % (time.time() - starttime))

        use_db_in = None
        if pfwdefs.PF_USE_DB_IN in wclobj:
            use_db_in = miscutils.convertBool(wclobj[pfwdefs.PF_USE_DB_IN])
        elif pfwdefs.PF_USE_DB_IN in self:
            use_db_in = miscutils.convertBool(self[pfwdefs.PF_USE_DB_IN])

        if (use_db_in and 'get_db_config' in args and args['get_db_config']):
            print("\tGetting defaults from DB...", end=' ')
            sys.stdout.flush()
            starttime = time.time()
            dbh = pfwdb.PFWDB(wclobj['submit_des_services'],
                              wclobj['submit_des_db_section'])
            print("DONE (%0.2f secs)" % (time.time() - starttime))
            self.update(dbh.get_database_defaults())

        # wclfile overrides all, so must be added last
        if 'wclfile' in args:
            if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
                miscutils.fwdebug_print("Reading wclfile: %s" %
                                        (args['wclfile']))
            self.update(wclobj)

        self.set_names()

        # store the file name of the top-level submitwcl in dict:
        if 'submitwcl' not in self and 'wclfile' in args:
            self['submitwcl'] = args['wclfile']

        if 'processingfw_dir' not in self and \
           'PROCESSINGFW_DIR' in os.environ:
            self['processingfw_dir'] = os.environ['PROCESSINGFW_DIR']

        if 'current' not in self:
            self['current'] = OrderedDict({
                'curr_block': '',
                'curr_archive': '',
                #'curr_software': '',
                'curr_site': ''
            })
            self[pfwdefs.PF_WRAPNUM] = '0'
            self[pfwdefs.PF_BLKNUM] = '1'
            self[pfwdefs.PF_TASKNUM] = '0'
            self[pfwdefs.PF_JOBNUM] = '0'
Esempio n. 13
0
    def set_block_info(self):
        """ Set current vals to match current block number """
        if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
            miscutils.fwdebug_print("BEG")

        curdict = self['current']

        if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
            miscutils.fwdebug_print(f"\tcurdict = {curdict}")

        # current block number
        blknum = self[pfwdefs.PF_BLKNUM]

        # update current block name for accessing block information
        blockname = self.get_block_name(blknum)
        if not blockname:
            miscutils.fwdie(
                f"Error: Cannot determine block name value for blknum={blknum}",
                pfwdefs.PF_EXIT_FAILURE)
        curdict['curr_block'] = blockname

        self['block_dir'] = f'../B{int(blknum):02d}-{blockname}'

        # update current target site name
        (exists, site) = self.search('target_site')
        if not exists:
            miscutils.fwdie("Error:  Cannot determine target site.",
                            pfwdefs.PF_EXIT_FAILURE)

        site = site.lower()
        if site not in self[pfwdefs.SW_SITESECT]:
            print(f"Error: invalid site value ({site})")
            print("\tsite defs contain entries for sites: ",
                  list(self[pfwdefs.SW_SITESECT].keys()))
            miscutils.fwdie(f"Error: Invalid site value ({site})",
                            pfwdefs.PF_EXIT_FAILURE)
        curdict['curr_site'] = site
        self['runsite'] = site

        # update current target archive name if using archive
        if ((pfwdefs.USE_TARGET_ARCHIVE_INPUT in self
             and miscutils.convertBool(self[pfwdefs.USE_TARGET_ARCHIVE_INPUT]))
                or
            (pfwdefs.USE_TARGET_ARCHIVE_OUTPUT in self and
             miscutils.convertBool(self[pfwdefs.USE_TARGET_ARCHIVE_OUTPUT]))):
            (exists, archive) = self.search(pfwdefs.TARGET_ARCHIVE)
            if not exists:
                miscutils.fwdie("Error: Cannot determine target_archive value.   \n" \
                                f"\tEither set target_archive or set to FALSE both {pfwdefs.USE_TARGET_ARCHIVE_INPUT} and {pfwdefs.USE_TARGET_ARCHIVE_OUTPUT}",
                                pfwdefs.PF_EXIT_FAILURE)

            archive = archive.lower()
            if archive not in self[pfwdefs.SW_ARCHIVESECT]:
                print(f"Error: invalid target_archive value ({archive})")
                print("\tarchive contains: ", self[pfwdefs.SW_ARCHIVESECT])
                miscutils.fwdie(
                    f"Error: Invalid target_archive value ({archive})",
                    pfwdefs.PF_EXIT_FAILURE)

            curdict['curr_archive'] = archive

            if 'list_target_archives' in self:
                if not archive in self['list_target_archives']:
                    # assumes target archive names are not substrings of one another
                    self['list_target_archives'] += ',' + archive
            else:
                self['list_target_archives'] = archive

        elif ((pfwdefs.USE_HOME_ARCHIVE_INPUT in self
               and self[pfwdefs.USE_HOME_ARCHIVE_INPUT] != 'never')
              or (pfwdefs.USE_HOME_ARCHIVE_OUTPUT in self
                  and self[pfwdefs.USE_HOME_ARCHIVE_OUTPUT] != 'never')):
            (exists, archive) = self.search(pfwdefs.HOME_ARCHIVE)
            if not exists:
                miscutils.fwdie("Error: Cannot determine home_archive value.\n" \
                                f"\tEither set home_archive or set correctly both {pfwdefs.USE_HOME_ARCHIVE_INPUT} and {pfwdefs.USE_HOME_ARCHIVE_OUTPUT}",
                                pfwdefs.PF_EXIT_FAILURE)

            archive = archive.lower()
            if archive not in self[pfwdefs.SW_ARCHIVESECT]:
                print(f"Error: invalid home_archive value ({archive})")
                print("\tarchive contains: ", self[pfwdefs.SW_ARCHIVESECT])
                miscutils.fwdie(
                    f"Error: Invalid home_archive value ({archive})",
                    pfwdefs.PF_EXIT_FAILURE)
            # dynamically choose a transfer node if a list is given
            if 'transfer_server' in self[pfwdefs.SW_ARCHIVESECT][archive]:
                if self.use_db_in:
                    if self.dbh is None:
                        self.dbh = pfwdb.PFWDB(self['submit_des_services'],
                                               self['submit_des_db_section'])
                    servers = self[pfwdefs.SW_ARCHIVESECT][archive][
                        'transfer_server'].replace(' ', '').split(',')
                    server = servers[random.randint(0, len(servers) - 1)]
                    self[pfwdefs.SW_ARCHIVESECT][archive].update(
                        self.dbh.get_transfer_data(server, archive))
                else:
                    miscutils.fwdie(
                        f"Error: transfer_servers was specified, but {pfwdefs.PF_USE_DB_IN} was set to False. Must be able to use database to use transfer_servers option.",
                        pfwdefs.PF_EXIT_FAILURE)

            curdict['curr_archive'] = archive
        else:
            # make sure to reset curr_archive from possible prev block value
            curdict['curr_archive'] = None

        if 'submit_des_services' in self:
            self['des_services'] = self['submit_des_services']

        if 'submit_des_db_section' in self:
            self['des_db_section'] = self['submit_des_db_section']

        if miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
            miscutils.fwdebug_print("END")
Esempio n. 14
0
def jobpre(argv=None):
    """Program entry point.
    """
    if argv is None:
        argv = sys.argv

    debugfh = tempfile.NamedTemporaryFile(mode='w+',
                                          prefix='jobpre_',
                                          dir='.',
                                          delete=False)
    tmpfn = debugfh.name
    sys.stdout = debugfh
    sys.stderr = debugfh

    print(' '.join(sys.argv))  # command line for debugging
    print(os.getcwd())

    if len(argv) < 3:
        print('Usage: jobpre configfile jobnum')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = sys.argv[1]
    jobnum = sys.argv[2]  # could also be uberctrl

    # read wcl file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    blockname = config.getfull('blockname')
    blkdir = config.get('block_dir')
    tjpad = pfwutils.pad_jobnum(jobnum)

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename(
        'job', {
            pfwdefs.PF_CURRVALS: {
                pfwdefs.PF_JOBNUM: jobnum,
                'flabel': 'jobpre',
                'fsuffix': 'out'
            }
        })
    new_log_name = "%s/%s/%s" % (blkdir, tjpad, new_log_name)
    miscutils.fwdebug_print("new_log_name = %s" % new_log_name)

    debugfh.close()
    os.chmod(tmpfn, 0o666)
    os.rename(tmpfn, new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                          config.getfull('submit_des_db_section'))
        ctstr = dbh.get_current_timestamp_str()
        dbh.update_job_info(config, tjpad, {
            'condor_submit_time': ctstr,
            'target_submit_time': ctstr
        })

    log_pfw_event(config, blockname, tjpad, 'j', ['pretask'])

    miscutils.fwdebug_print("jobpre done")
    debugfh.close()
    return pfwdefs.PF_EXIT_SUCCESS
Esempio n. 15
0
def main(argv):
    """ Program entry point """
    parser = argparse.ArgumentParser(description='genquery.py')
    parser.add_argument('--qoutfile', action='store')
    parser.add_argument('--qouttype', action='store')
    parser.add_argument('--config', action='store', dest='configfile')
    parser.add_argument('--module', action='store', dest='modulename')
    parser.add_argument('--search', action='store', dest='searchname')
    args = parser.parse_args(argv)

    if args.modulename is None:
        raise Exception("Error: Must specify module\n")

    print(args.configfile)
    config = pfwconfig.PfwConfig({'wclfile':args.configfile})

    if args.modulename not in config[pfwdefs.SW_MODULESECT]:
        raise Exception(f"Error: module '{args.modulename}' does not exist.\n")

    module_dict = config[pfwdefs.SW_MODULESECT][args.modulename]

    if args.searchname is not None:
        if pfwdefs.SW_LISTSECT in module_dict and \
           args.searchname in module_dict[pfwdefs.SW_LISTSECT]:
            search_dict = module_dict[pfwdefs.SW_LISTSECT][args.searchname]
        elif pfwdefs.SW_FILESECT in module_dict and \
             args.searchname in module_dict[pfwdefs.SW_FILESECT]:
            search_dict = module_dict[pfwdefs.SW_FILESECT][args.searchname]
        else:
            raise Exception(f"Error: Could not find either list or file by name {args.searchname} in module {args.modulename}\n")
    else:
        raise Exception("Error: need to define either list or file or search\n")


    archive_names = []

    if config.getfull(pfwdefs.USE_HOME_ARCHIVE_INPUT) != 'never':
        archive_names.append(config.getfull(pfwdefs.HOME_ARCHIVE))

    if config.getfull(pfwdefs.USE_TARGET_ARCHIVE_INPUT) != 'never':
        archive_names.append(config.getfull(pfwdefs.TARGET_ARCHIVE))

    fields = miscutils.fwsplit(search_dict[pfwdefs.SW_QUERYFIELDS].lower())

    if ('query_run' in config and 'fileclass' in search_dict and
            'fileclass' in config and search_dict['fileclass'] == config['fileclass']):
        query_run = config['query_run'].lower()
        if query_run == 'current':
            fields.append('run')
        elif query_run == 'allbutfirstcurrent':
            if 'current' not in config:
                raise Exception("Internal Error:  Current object doesn't exist\n")
            if 'curr_blocknum' not in config['current']:
                raise Exception("Internal Error:  current->curr_blocknum doesn't exist\n")

            block_num = config['current']['curr_blocknum']
            if block_num > 0:
                fields.append('run')

    query = {}
    qtable = search_dict['query_table']
    for fld in fields:
        table = qtable
        if '.' in fld:
            table, fld = fld.split('.')

        if fld in search_dict:
            value = search_dict[fld]
        elif fld in module_dict:
            value = module_dict[fld]
        elif fld in config:
            value = config.getfull(fld)
        else:
            raise Exception(f"Error: genquery could not find value for query field {fld}\n")

        value = replfuncs.replace_vars(value, config,
                                       {pfwdefs.PF_CURRVALS: {'modulename': args.modulename},
                                        'searchobj': search_dict,
                                        intgdefs.REPLACE_VARS: True,
                                        'expand': True})[0]
        if value is None:
            raise Exception(f"Value=None for query field {fld}\n")

        if ',' in value:
            value = miscutils.fwsplit(value)

        if ':' in value:
            value = miscutils.fwsplit(value)

        if table not in query:
            query[table] = {}

        if 'key_vals' not in query[table]:
            query[table]['key_vals'] = {}

        query[table]['key_vals'][fld] = value


    # if specified, insert join into query hash
    if 'join' in search_dict:
        joins = miscutils.fwsplit(search_dict['join'].lower())
        for j in joins:
            jmatch = re.search(r"(\S+)\.(\S+)\s*=\s*(\S+)", j)
            if jmatch:
                table = jmatch.group(1)
                if table not in query:
                    query[table] = {}
                if 'join' not in query[table]:
                    query[table]['join'] = j
                else:
                    query[jmatch.group(1)]['join'] += "," + j
        #query[table]['join']=search_dict['join']


    query[qtable]['select_fields'] = ['filename']

    # check output fields for fields from other tables.
    if 'output_fields' in search_dict:
        output_fields = miscutils.fwsplit(search_dict['output_fields'].lower())


        for ofield in output_fields:
            ofmatch = re.search(r"(\S+)\.(\S+)", ofield)
            if ofmatch:
                table = ofmatch.group(1)
                field = ofmatch.group(2)
            else:
                table = qtable
                field = ofield
            if table not in query:
                query[table] = {}
            if 'select_fields' not in query[table]:
                query[table]['select_fields'] = []
            if field not in query[table]['select_fields']:
                query[table]['select_fields'].append(field)


    for tbl in query:
        if 'select_fields' in query[tbl]:
            query[tbl]['select_fields'] = ','.join(query[tbl]['select_fields'])

    if archive_names:
        #query[qtable]['join'] = "%s.filename=file_archive_info.filename" % qtable
        query['file_archive_info'] = {'select_fields': 'compression'}
        query['file_archive_info']['join'] = f"file_archive_info.filename={qtable}.filename"
        query['file_archive_info']['key_vals'] = {'archive_name': ','.join(archive_names)}

    print("Calling gen_file_list with the following query:\n")
    miscutils.pretty_print_dict(query, out_file=None, sortit=False, indent=4)
    print("\n\n")
    dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                      config.getfull('submit_des_db_section'))
    files = queryutils.gen_file_list(dbh, query)

    if not files:
        raise Exception(f"genquery: query returned zero results for {args.searchname}\nAborting\n")

    ## output list
    lines = queryutils.convert_single_files_to_lines(files)
    queryutils.output_lines(args.qoutfile, lines, args.qouttype)

    return 0
Esempio n. 16
0
def blockpost(argv=None):
    """ Program entry point """
    realstdout = sys.stdout
    realstderr = sys.stderr

    if argv is None:
        argv = sys.argv

    # open file to catch error messages about command line
    debugfh = open('blockpost.out', 'w')
    sys.stdout = debugfh
    sys.stderr = debugfh
    print(' '.join(argv))  # print command line for debugging

    print(f"running on {socket.gethostname()}")

    if len(argv) != 3:
        print('Usage: blockpost.py configfile retval')
        debugfh.close()
        return pfwdefs.PF_EXIT_FAILURE

    configfile = argv[1]
    retval = int(argv[2])

    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"configfile = {configfile}")
    miscutils.fwdebug_print(f"retval = {retval}")

    # read sysinfo file
    config = pfwconfig.PfwConfig({'wclfile': configfile})
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print("done reading config file")
    blockname = config.getfull('blockname')
    blkdir = config.getfull('block_dir')

    # now that have more information, can rename output file
    miscutils.fwdebug_print("getting new_log_name")
    new_log_name = config.get_filename('block',
                                       {pfwdefs.PF_CURRVALS: {'flabel': 'blockpost',
                                                              'fsuffix':'out'}})
    new_log_name = f"{blkdir}/{new_log_name}"
    miscutils.fwdebug_print(f"new_log_name = {new_log_name}")

    debugfh.close()
    os.chmod('blockpost.out', 0o666)
    os.rename('blockpost.out', new_log_name)
    debugfh = open(new_log_name, 'a+')
    sys.stdout = debugfh
    sys.stderr = debugfh

    os.chdir(blkdir)

    log_pfw_event(config, blockname, 'blockpost', 'j', ['posttask', retval])

    dryrun = config.getfull(pfwdefs.PF_DRYRUN)
    run = config.getfull('run')
    attid = config['pfw_attempt_id']
    blknum = int(config.getfull(pfwdefs.PF_BLKNUM))
    blktid = None

    msg2 = ""
    dbh = None
    qdbh = None
    job_byblk = {}
    wrap_byjob = {}
    wrapinfo = {}
    jobinfo = {}
    failedwraps = {}
    whyfailwraps = {}   # mod failures for other modname, shouldn't happen
    usedb = miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT))
    verify_files = miscutils.convertBool(config.getfull('verify_files'))
    verify_status = 0
    sem = None
    if verify_files and not usedb:
        print('Skipping file verification due to lack of database connection')
    if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_DB_OUT)):
        try:
            miscutils.fwdebug_print("Connecting to DB")
            if config.dbh is None:
                dbh = pfwdb.PFWDB(config.getfull('submit_des_services'),
                                  config.getfull('submit_des_db_section'))
            else:
                dbh = config.dbh
            if verify_files:
                curs = dbh.cursor()
                curs.execute(f"select root from ops_archive where name='{config.getfull('home_archive')}'")
                rows = curs.fetchall()
                if rows is None or len(rows) != 1:
                    raise Exception(f"Invalid archive name ({config.getfull('home_archive')}).   Found {len(rows)} rows in ops_archive")
                root = rows[0][0]
                if not os.path.isdir(root):
                    print(f"Cannot read archive root directory:{config.getfull('home_archive')} This program must be run on an NCSA machine with access to the archive storage system.")
                sem = dbsem.DBSemaphore('verify_files_10', None, config.getfull('submit_des_services'), config.getfull('submit_des_db_section'), connection=dbh)
                print("\n\nVerifying archive file sizes on disk (0 is success)")
                verify_status = cu.compare(dbh=dbh, archive=config.getfull('home_archive'), pfwid=attid, md5sum=False, debug=False, script=False, verbose=False, silent=True)
                if sem is not None:
                    del sem
                    sem = None
                print(f"  Verification of files returned status {verify_status:d}")
                if verify_status != 0:
                    print("  This indicates that one or more files do not have the correct file size (based on DB entries). Run")
                    print(f"\n    compare_db.py --des_services {config.getfull('submit_des_services')} --section {config.getfull('submit_des_db_section')} --archive {config.getfull('home_archive')} --pfwid {int(attid):d} --verbose")
                    print("\n  to see the details.")

            if miscutils.convertBool(config.getfull(pfwdefs.PF_USE_QCF)):
                import qcframework.qcfdb as qcfdb
                #qdbh = qcfdb.QCFDB(config.getfull('submit_des_services'),
                #                   config.getfull('submit_des_db_section'))
                qdbh = qcfdb.QCFDB(connection=dbh)

            print(f"\n\nChecking non-job block task status from task table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")
            num_bltasks_failed = 0
            bltasks = {}
            blktid = None
            if ('block' in config['task_id'] and
                    str(blknum) in config['task_id']['block']):
                blktid = int(config['task_id']['block'][str(blknum)])
                miscutils.fwdebug_print("Getting block task info from DB")
                start_time = time.time()
                bltasks = dbh.get_block_task_info(blktid)
                end_time = time.time()
                miscutils.fwdebug_print(f"Done getting block task info from DB ({end_time - start_time} secs)")
                for bltdict in bltasks.values():
                    print("Block status = ", bltdict['status'])
                    if bltdict['status'] == pfwdefs.PF_EXIT_DRYRUN:
                        print("setting return value to dryrun")
                        retval = bltdict['status']
                    elif bltdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        num_bltasks_failed += 1
                        msg2 += f"\t{bltdict['name']}"
                        if bltdict['label'] is not None:
                            msg2 += f" - {bltdict['label']}"
                        msg2 += " failed\n"

                        if bltdict['name'] == 'begblock':
                            # try to read the begblock.out and begblock.err files
                            print("Trying to get begblock.out and begblock.err")
                            msg2 += get_subblock_output("begblock")

                            # try to get QCF messages (especially from query codes)
                            begblock_tid = int(config['task_id']['begblock'])
                            sql = f"select id from task where parent_task_id={begblock_tid:d} and status!=0"
                            curs = dbh.cursor()
                            curs.execute(sql)
                            res = curs.fetchall()
                            msg2 += "\n===== QCF Messages =====\n"
                            msg2 += "\n begblock\n"
                            wrapids = [blktid, begblock_tid]
                            for r in res:
                                wrapids.append(r[0])

                            wrapmsg = {}
                            if qdbh is not None:
                                miscutils.fwdebug_print("Querying QCF messages")
                                start_time = time.time()
                                wrapmsg = qdbh.get_qcf_messages_for_wrappers(wrapids)
                                end_time = time.time()
                                miscutils.fwdebug_print(f"Done querying QCF messages ({end_time-start_time} secs)")
                                miscutils.fwdebug_print(f"wrapmsg = {wrapmsg}")
                            if not wrapmsg:
                                msg2 += "    No QCF messages\n"
                            else:
                                for msgs in wrapmsg.values():
                                    for m in msgs:
                                        msg2 += "    " + m['message'] + "\n"

                        retval = pfwdefs.PF_EXIT_FAILURE

                if retval != pfwdefs.PF_EXIT_DRYRUN:
                    print(f"\n\nChecking job status from pfw_job table in DB ({pfwdefs.PF_EXIT_SUCCESS} is success)")

                    miscutils.fwdebug_print("Getting job info from DB")
                    start_time = time.time()
                    jobinfo = dbh.get_job_info({'pfw_block_task_id': blktid})
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting job info from DB ({end_time - start_time} secs)")
                    miscutils.fwdebug_print("Getting wrapper info from DB")
                    start_time = time.time()
                    wrapinfo = dbh.get_wrapper_info(pfw_attempt_id=attid, pfw_block_task_id=blktid)
                    if retval != pfwdefs.PF_EXIT_SUCCESS:
                        jobwrap = dbh.get_jobwrapper_info(id=attid)
                    else:
                        jobwrap = {}
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done getting wrapper info from DB ({end_time - start_time} secs)")
            else:
                msg = f"Could not find task id for block {blockname} in config.des"
                print("Error:", msg)
                if 'attempt' in config['task_id']:
                    miscutils.fwdebug_print("Saving pfw message")
                    start_time = time.time()
                    Messaging.pfw_message(dbh, attid, config['task_id']['attempt'],
                                          msg, pfwdefs.PFWDB_MSG_INFO, 'blockpost.out', 0)
                    end_time = time.time()
                    miscutils.fwdebug_print(f"Done saving pfw message ({end_time - start_time} secs)")
                print("all the task ids:", config['task_id'])


            archive = None
            if pfwdefs.HOME_ARCHIVE in config:
                archive = config.getfull(pfwdefs.HOME_ARCHIVE)
            logfullnames = dbh.get_log_fullnames(attid, archive)
            #dbh.close()
            print("len(jobinfo) = ", len(jobinfo))
            print("len(wrapinfo) = ", len(wrapinfo))
            job_byblk = pfwutils.index_job_info(jobinfo)
            print("blktid: ", blktid)
            print("job_byblk:", job_byblk)

            if blktid not in job_byblk:
                print(f"Warn: could not find jobs for block {blknum}")
                print("      This is ok if attempt died before jobs ran")
                print("      block task_ids in job_byblk:", list(job_byblk.keys()))
            else:
                wrap_byjob, _ = pfwutils.index_wrapper_info(wrapinfo)
                #for wid,jwr in jobwrap.iteritems():
                    #print wid,jwr

                # in case the post wrapper stuff failed, internally mark the task
                # as failed to retrieve the info later
                for wrapb in wrap_byjob.values():
                    for wrapper in wrapb.values():
                        if wrapper['parent_task_id'] in jobwrap and jobwrap[wrapper['parent_task_id']]['status'] is not None \
                           and wrapper['status'] is not None and jobwrap[wrapper['parent_task_id']]['status'] > wrapper['status']:
                            wrapper['status'] = jobwrap[wrapper['parent_task_id']]['status']

                #print "wrap_bymod:", wrap_bymod
                jobtid = ''
                jobdict = {}
                for jobtid, jobdict in sorted(job_byblk[blktid].items()):
                    failedwraps[jobtid] = []
                    whyfailwraps[jobtid] = []

                    jobkeys = ""

                    # don't print out successful wrappers
                    if jobtid in wrap_byjob and jobdict['status'] == pfwdefs.PF_EXIT_SUCCESS:
                        continue

                    if jobdict['jobkeys'] is not None:
                        jobkeys = jobdict['jobkeys']
                        #print "jobkeys = ", jobkeys, type(jobkeys)

                    submit_job_path = f"{config.getfull('work_dir')}/B{int(config.getfull('blknum')):02d}-{config.getfull('blockname'):s}/{int(jobdict['jobnum']):04d}"
                    msg2 += f"\n\t{pfwutils.pad_jobnum(jobdict['jobnum'])} ({jobkeys}) "


                    if jobtid not in wrap_byjob:
                        msg2 += "\tNo wrapper instances"
                    else:
                        #print "wrapnum in job =", wrap_byjob[jobtid].keys()
                        maxwrap = max(wrap_byjob[jobtid])
                        #print "maxwrap =", maxwrap
                        modname = wrap_byjob[jobtid][maxwrap]['modname']
                        #print "modname =", modname

                        msg2 += f"{len(wrap_byjob[jobtid]):d}/{jobdict['expect_num_wrap']}  {modname}"

                        # determine wrappers for this job without success exit
                        for wrapnum, wdict in wrap_byjob[jobtid].items():
                            if wdict['status'] is None or wdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                                if wdict['modname'] == modname:
                                    failedwraps[jobtid].append(wrapnum)
                                else:
                                    whyfailwraps[jobtid].append(wrapnum)

                    if jobdict['status'] == pfwdefs.PF_EXIT_EUPS_FAILURE:
                        msg2 += " - FAIL - EUPS setup failure"
                        retval = jobdict['status']
                    elif jobdict['status'] == pfwdefs.PF_EXIT_CONDOR:
                        msg2 += " - FAIL - Condor/Globus failure"
                        retval = jobdict['status']
                    elif jobdict['status'] is None:
                        msg2 += " - FAIL - NULL status"
                        retval = pfwdefs.PF_EXIT_FAILURE
                    elif jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                        msg2 += " - FAIL - Non-zero status"
                        retval = jobdict['status']

                if jobdict['status'] != pfwdefs.PF_EXIT_SUCCESS:
                    msg2 += f"\n\t\t{submit_job_path}/runjob.out "

                msg2 += '\n'

                # print pfw_messages
                if 'message' in jobdict:
                    print('\nmessages: ', jobdict['message'])
                    for msgdict in sorted(jobdict['message'], key=lambda k: k['message_time']):
                        level = int(msgdict['message_lvl'])
                        levelstr = 'info'
                        if level == pfwdefs.PFWDB_MSG_WARN:
                            levelstr = 'WARN'
                        elif level == pfwdefs.PFWDB_MSG_ERROR:
                            levelstr = 'ERROR'

                        msg2 += "\t\t{} - {}\n".format(levelstr, msgdict['message'].replace('\n', '\n\t\t\t'))

                if jobtid in wrap_byjob:
                    # print log file name for failed/unfinished wrappers
                    for wrapnum in failedwraps[jobtid]:
                        wrapdict = wrap_byjob[jobtid][wrapnum]
                        if wrapdict['log'] in logfullnames:
                            msg2 += f"\t\t{wrapnum} - {logfullnames[wrapdict['log']]}\n"
                        else:
                            msg2 += f"\t\t{wrapnum} - Could not find log in archive {wrapdict['log']})\n"
                        wrapmsg = get_qcf_messages(qdbh, [wrapdict['task_id']])
                        msg2 += print_qcf_messages(wrapdict, wrapmsg)

                    msg2 += '\n'

                    # If weirdness happened in run, print a message
                    if whyfailwraps[jobtid]:
                        msg2 += "\n*** Contact framework developers.   Wrappers ran after at least 1 wrapper from a previous module that doesn't have success status.\n"
                        msg2 += f"\t{','.join(whyfailwraps[jobtid])}\n"

        except Exception as exc:
            if sem is not None:
                del sem
            msg2 += "\n\nEncountered error trying to gather status information for email."
            msg2 += "\nCheck output for blockpost for further details."
            print("\n\nEncountered error trying to gather status information for email")
            print(f"{exc.__class__.__name__}: {str(exc)}")
            (extype, exvalue, trback) = sys.exc_info()
            traceback.print_exception(extype, exvalue, trback, file=sys.stdout)
            retval = pfwdefs.PF_EXIT_FAILURE
    retval = int(retval) + verify_status
    print("before email retval =", retval)

    when_to_email = 'run'
    if 'when_to_email' in config:
        when_to_email = config.getfull('when_to_email').lower()

    if miscutils.convertBool(dryrun):
        if when_to_email != 'never':
            print("dryrun = ", dryrun)
            print("Sending dryrun email")
            if retval == pfwdefs.PF_EXIT_DRYRUN:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has finished successfully."
            else:
                msg1 = f"{run}:  In dryrun mode, block {blockname} has failed."

            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending dryrun email")
            print("retval = ", retval)
        retval = pfwdefs.PF_EXIT_DRYRUN
    elif retval:
        if when_to_email != 'never':
            print("Sending block failed email\n")
            msg1 = f"{run}:  block {blockname} has failed."
            send_email(config, blockname, retval, "", msg1, msg2)
        else:
            print("Not sending failed email")
            print("retval = ", retval)
    elif retval == pfwdefs.PF_EXIT_SUCCESS:
        if when_to_email == 'block':
            msg1 = f"{run}:  block {blockname} has finished successfully."
            msg2 = ""
            print("Sending success email\n")
            send_email(config, blockname, retval, "", msg1, msg2)
        elif when_to_email == 'run':
            numblocks = len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ','))
            if int(config[pfwdefs.PF_BLKNUM]) == numblocks:
                msg1 = f"{run}:  run has finished successfully."
                msg2 = ""
                print("Sending success email\n")
                send_email(config, blockname, retval, "", msg1, msg2)
            else:
                print("Not sending run email because not last block")
                print("retval = ", retval)
        else:
            print("Not sending success email")
            print("retval = ", retval)
    else:
        print("Not sending email")
        print("retval = ", retval)

    # Store values in DB and hist file
    #dbh = None
    if miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        if dbh is None:
            dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        if blktid is not None:
            print("Updating end of block task", blktid)
            dbh.end_task(blktid, retval, True)
        else:
            print("Could not update end of block task without block task id")
        if retval != pfwdefs.PF_EXIT_SUCCESS:
            print("Updating end of attempt", config['task_id']['attempt'])
            dbh.end_task(config['task_id']['attempt'], retval, True)
        dbh.commit()
        #dbh.close()

    print("before next block retval = ", retval)
    if retval == pfwdefs.PF_EXIT_SUCCESS:
        # Get ready for next block
        config.inc_blknum()
        with open(configfile, 'w') as cfgfh:
            config.write(cfgfh)
        print("new blknum = ", config[pfwdefs.PF_BLKNUM])
        print("number of blocks = ", len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')))
        if int(config[pfwdefs.PF_BLKNUM]) > len(miscutils.fwsplit(config[pfwdefs.SW_BLOCKLIST], ',')) and  miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
            #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
            updatevals = {'PROCESSING_STATE': 'PASS'}
            wherevals = {'PFW_ATTEMPT_ID': attid}
            dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
            dbh.commit()
            #dbh.close()
    elif miscutils.convertBool(config[pfwdefs.PF_USE_DB_OUT]):
        #dbh = pfwdb.PFWDB(config.getfull('submit_des_services'), config.getfull('submit_des_db_section'))
        updatevals = {'PROCESSING_STATE': 'FAIL'}
        wherevals = {'PFW_ATTEMPT_ID': attid}
        dbh.basic_update_row('ATTEMPT_STATE', updatevals, wherevals)
        dbh.commit()
        #dbh.close()
    if dbh is not None:
        dbh.close()
    miscutils.fwdebug_print(f"Returning retval = {retval} ({type(retval)})")
    miscutils.fwdebug_print("END")
    debugfh.close()
    if miscutils.fwdebug_check(3, 'PFWPOST_DEBUG'):
        miscutils.fwdebug_print(f"Exiting with = {exitcode}")
        miscutils.fwdebug_print(f"type of exitcode = {type(exitcode)}")

    sys.stdout = realstdout
    sys.stderr = realstderr

    return int(retval)