Beispiel #1
0
def syncCampaign(Session):

    try:
        output = Client.getAllJobs()
        if output[0] != 0:
            raise Exception("Server error")
        else:
            output = json.loads(output[1])['jobs']
    except Exception as e:
        logging.error(traceback.format_exc())
        Session.rollback()
        sys.exit(1)

    jobsToRepopulate = []
    for j in output:
        try:
            #Check for pre-existing job with this pandaid
            #We have to evaluate these queries lazily to avoid throwing an unnecessary exception
            if (j['pandaid'] and j['jobname']):
                isExistingPandaID = Session.query(Job).filter(
                    Job.pandaID.like(j['pandaid']))
                isExistingJobName = Session.query(Job).filter(
                    Job.serverName.like(j['jobname']))
                if (isExistingPandaID.first() is None
                        and isExistingJobName.first() is None):
                    if (len(j['jobname']) > 37):
                        #See if the jobname fits the format
                        campaignName, i, oF = unpackServerName(j['jobname'])
                        if (campaignName):
                            campaign = Session.query(Campaign).filter(
                                Campaign.name.like(campaignName)).first()
                            if (campaign is None):
                                campaign = Campaign(
                                    name=campaignName,
                                    lastUpdate=datetime.datetime.utcnow())
                                Session.add(campaign)
                                Session.commit()
                            #We can't recover the job script from the monitor output - we do that with another query below
                            job = Job(script="unknown",
                                      campaignID=campaign.id,
                                      pandaID=j['pandaid'],
                                      serverName=j['jobname'],
                                      status=j['jobstatus'],
                                      subStatus=j['jobsubstatus'])
                            if i:
                                job.iterable = i
                            #In some instances panda server can report a null substatus. Converting these to empty strings to fulfil database rules
                            if not j['jobsubstatus']:
                                job.subStatus = ""
                            Session.add(job)
                            Session.commit()

                            #Record that this campaign/job id pair was missing, but only after it's been committed
                            jobsToRepopulate.append((campaign.id, job.pandaID))
        except Exception as e:
            logging.error(traceback.format_exc())
            Session.rollback()

    #We need to query each job individually to get its job parameters
    campsToRepopulate = set([seq[0] for seq in jobsToRepopulate])
    for c in campsToRepopulate:
        try:
            camp = Session.query(Campaign).get(c)
            jobs = [seq[1] for seq in jobsToRepopulate if seq[0] == c]
            #Recreate the jobs that were missing
            camp.updateJobs(Session, recreate=True, jobs_to_query=jobs)
            #Now update them all to make sure everything is legit
            camp.updateJobs(Session)
            #Now check to see if we have duplicate output files
            for OF in Session.query(Job).with_entities(
                    Job.outputFile).group_by(Job.outputFile).all():
                jobsThisOF = Session.query(Job).filter(
                    Job.outputFile.like(OF[0])).count()
                if (jobsThisOF > 1):
                    print(
                        coloured(
                            'Warning:' + str(jobsThisOF) +
                            ' job(s) have shared output file: \n' + OF[0] +
                            '\n', 'red'))
        except Exception as e:
            logging.error(traceback.format_exc())
            Session.rollback()
    return None
Beispiel #2
0
def submitCampaign(Session, jobsFile):

    # read yaml description

    jobdef = None

    try:
        campdef = submissionTools.PandaJobsJSONParser.parse(jobsFile)
        campaign = Session.query(Campaign).filter(
            Campaign.name.like(campdef['campaign'])).first()
        if (campaign is None):
            #Don't let colons into campaign names
            campName = re.sub(':', '', campdef['campaign'])
            campaign = Campaign(name=campName,
                                lastUpdate=datetime.datetime.utcnow())
            Session.add(campaign)
            Session.commit()
    except Exception as e:
        logging.error(traceback.format_exc())
        Session.rollback()
        sys.exit(1)

    aSrvID = None

    for j in campdef['jobs']:
        nodes = j['nodes']
        walltime = j['walltime']
        queuename = j['queuename']
        try:
            outputFile = j['outputFile'].strip()
        except:
            outputFile = None
        command = j['command']

        try:
            iterable = j['iterable'].strip()
        except:
            iterable = None

        #Check to see if this is a duplicate output file
        jobsThisOF = Session.query(Job).filter(
            Job.outputFile.like(outputFile)).count()
        if (jobsThisOF > 0):
            print(
                coloured(
                    'Warning:' + str(jobsThisOF) +
                    ' job(s) already exist with output file: \n' + outputFile +
                    '\n', 'red'))

        dbJob = Job(script=command,
                    nodes=nodes,
                    wallTime=walltime,
                    status="To Submit",
                    subStatus="To Submit",
                    campaignID=campaign.id,
                    outputFile=outputFile)
        dbJob.serverName = 'c:' + campaign.name + ':'
        if iterable:
            dbJob.serverName += 'i:' + iterable + ':'
        if outputFile:
            #Panda Server doesn't like slashes in its job names
            dbJob.serverName += 'oF:' + re.sub('/', ';', outputFile) + ':'
        dbJob.serverName += subprocess.check_output('uuidgen')

        dbJob.iterable = iterable

        jobSpec = submissionTools.createJobSpec(walltime=walltime,
                                                command=command,
                                                outputFile=outputFile,
                                                nodes=nodes,
                                                jobName=dbJob.serverName)
        s, o = Client.submitJobs([jobSpec])
        try:
            print(o)
            dbJob.pandaID = o[0][0]
            dbJob.status = 'submitted'
            dbJob.subStatus = 'submitted'
            print(
                coloured(iterable.strip() + ", " + str(o[0][0]) + "\n",
                         'green'))
        except Exception as e:
            logging.error(traceback.format_exc())
            print(coloured(iterable.strip() + " job failed to submit\n",
                           'red'))
            dbJob.status = 'failed'
            dbJob.subStatus = 'failed'
        Session.add(dbJob)
        Session.commit()

    return None
Beispiel #3
0
def submitCampaign(Session, campSpecFile, listFile):

    # read yaml description

    jobdef = None

    try:
        campdef = submissionTools.PandaJobsJSONParser.parse(campSpecFile)
        campaign = Session.query(Campaign).filter(
            Campaign.name.like(campdef['campaign'])).first()
        if (campaign is None):
            campaign = Campaign(name=campdef['campaign'],
                                lastUpdate=datetime.datetime.utcnow())
            Session.add(campaign)
            Session.commit()
    except Exception as e:
        logging.error(traceback.format_exc())
        Session.rollback()
        sys.exit(1)

    aSrvID = None

    nodes = campdef['jobtemplate']['nodes']
    walltime = campdef['jobtemplate']['walltime']
    queuename = campdef['jobtemplate']['queuename']
    try:
        outputFile = campdef['jobtemplate']['outputFile']
    except:
        outputFile = None
    command = campdef['jobtemplate']['command']

    if (listFile):
        iterList = []
        with open(listFile, 'r') as f:
            for i in f:
                ii = re.sub("\n", "", i)
                iterList.append(ii)
    else:
        iterList = ['']

    for iterable in iterList:
        if (listFile):
            jobCommand = re.sub('<iter>', iterable, command)
            jobOutput = re.sub('<iter>', iterable, outputFile)
        else:
            jobCommand = command
            jobOutput = outputFile
        dbJob = Job(script=jobCommand,
                    nodes=nodes,
                    wallTime=walltime,
                    status="To Submit",
                    campaignID=campaign.id,
                    outputFile=jobOutput)
        dbJob.servername = campaign.name + subprocess.check_output('uuidgen')
        if (listFile):
            dbJob.iterable = iterable

        Session.add(dbJob)
        Session.commit()

        jobSpec = submissionTools.createJobSpec(walltime=walltime,
                                                command=jobCommand,
                                                outputFile=jobOutput,
                                                nodes=nodes,
                                                jobName=dbJob.servername)
        s, o = Client.submitJobs([jobSpec])
        try:
            dbJob.pandaID = o[0][0]
            dbJob.status = 'submitted'
            dbJob.subStatus = 'submitted'
            print(
                coloured(iterable.strip() + ", " + str(o[0][0]) + "\n",
                         'green'))
        except Exception as e:
            logging.error(traceback.format_exc())
            print(coloured(iterable.strip() + " job failed to submit\n",
                           'red'))
            dbJob.status = 'failed'
            dbJob.subStatus = 'failed'
        Session.commit()

    return None