예제 #1
0
def howManyJobs(eM, tableId, jobInt):
    tableStr = str(tableId)
    tableInt = int(tableId)
    eM = jobDB.JobState(tableInt)
    tableStr = str(tableId)
    t0 = eM.queryState(tableStr + 'NumJobs')
    if t0 == None: t0 = 0
    else: t0 = int(t0)
    print 'howManyJobs: Current num: ', t0
    return t0
예제 #2
0
from lsst.sims.catalogs.generation.db import jobDB

import sys

eM = jobDB.JobState(sys.argv[1])
eM.updateState(sys.argv[1] + 'blah', 'blah')
eM.showStates()
t0 = eM.queryState(sys.argv[1] + 'NumJobs')
print t0
        possibleTestModeStr = ' testMode'
        print '>>> Using test mode.'

obsList = []
f = open('inList.txt', 'r')
for line in f:
    t0 = line.split()
    if len(t0) != 1: continue
    obsList.append(t0[0])

f.close()
print 'obsList:', obsList

numJobs = len(obsList)

executionDBManager = jobDB.JobState()
t0 = executionDBManager.getJobId()

nFN = '%s_%s' % (t0.getOwner(), t0.getId())
print 'Using job ID: %s' % nFN

for i in range(len(obsList)):
    jobId = '%s_%i' % (nFN, i)
    throttleUtils.throttle(executionDBManager, maxNumJobs, waitTime)
    # For now, call addJob() before actually starting the job,
    #  because there could be a race condition if addJob()
    #  and removeJob() are called simultaneously.
    t0 = int(time.time())
    t1 = '%s_%i' % (obsList[i], t0)
    throttleUtils.addJob(executionDBManager, jobId, t1)
if __name__ == '__main__':
    print 'Registering signal handler.'
    signal.signal(signal.SIGTERM, signalHandler)

    print 'Started with args:'
    for i in range(1, len(sys.argv)):
        print sys.argv[i]

    testMode = False
    if sys.argv[-1].lower() == 'testmode':
        testMode = True

    t0 = sys.argv[1].split('_')
    jobId = jobDB.JobId(int(t0[1]), owner=t0[0])
    d = jobDB.JobState(jobId)
    procId = sys.argv[2]
    obsId = sys.argv[3]
    rad = sys.argv[4]
    startTime = time.time()
    t0 = int(startTime)
    d.updateState(procId, 'JobRunning_%s_%i' % (obsId, t0))
    print 'Update state: %s to JobRunning_%s_%i' % (procId, obsId, t0)
    #HACK this should be changed to just call the catalog generation classes
    #Rob says this may be an issue because he checks the error code on exit.
    if testMode == False:
        t0 = 'python $CATALOGS_GENERATION_DIR/bin/runFiles.py %s %s'
        t1 = t0 % (obsId, rad)
    else:
        t0 = None
        t1 = 'python $CATALOGS_GENERATION_DIR/bin/fakeRunFiles.py'
def getCopyDBM(iD):
    eDBM = jobDB.JobState(iD)
    return eDBM
예제 #6
0
            print 'Waking to check again.'
        else:
            done = True


if not len(sys.argv) == 4:
    print "usage: %python myJobMonitor.py tableId state jobId"
    quit()

tableId = sys.argv[1]
state = sys.argv[2]
jobId = sys.argv[3]

if state == 'qsubbed':
    tableId = int(tableId)
    eM = jobDB.JobState(tableId)
    qsubJob(eM, tableId, jobId)

if state == 'running':
    tableId = int(tableId)
    eM = jobDB.JobState(tableId)
    jobRunning(eM, tableId, jobId)

if state == 'finished':
    #jid = jobDB.JobId(id, owner)
    tableId = int(tableId)
    eM = jobDB.JobState(tableId)
    jobFinished(eM, tableId, jobId)

if state == 'howmany':
    tableId = int(tableId)
예제 #7
0
raftmap = {"01":"0,1", "02":"0,2", "03":"0,3", \
           "10":"1,0", "11":"1,1", "12":"1,2", "13":"1,3", "14":"1,4", \
           "20":"2,0", "21":"2,1", "22":"2,2", "23":"2,3", "24":"2,4", \
           "30":"3,0", "31":"3,1", "32":"3,2", "33":"3,3", "34":"3,4", \
           "41":"4,1", "42":"4,2", "43":"4,3"}

sensormap = {"00":"0,0", "01":"0,1", "02":"0,2", \
             "10":"1,0", "11":"1,1", "12":"1,2", \
             "20":"2,0", "21":"2,1", "22":"2,2"}

# constructed to have the form "R:rx,ry S:sx,sy:snap"
# which is how the fpaFig.map keys are constructed
sensorid = "R:"+raftmap[rxry]+" "+"S:"+sensormap[sxsy]+":"+ex

jobid = jobDB.JobId(id=obshistid, owner=username)
jobStr = str(jobid)
eM = jobDB.JobState(jobid=jobid)
stateKey = jobStr + '_%s' %(sensorid) + '_JS'

if state == 'qsubbed':
    qsubJob(eM, sensorid, jobStr)

if state == 'running':
    jobRunning(eM, sensorid, jobStr)

if state == 'finished':
    jobFinished(eM, sensorid, jobStr) 

if state == 'error':
    jobError(eM, sensorid, jobStr) 
예제 #8
0
    def doOneCatalogType(self, catalogType, queryTypes, obsHistID):
        #nFN = self.getNextGoodFileNum()
        fullTimeStart = time.time()
        self.executionDBManager = jobDB.JobState()
        t0 = self.executionDBManager.getJobId()

        nFN = '%s_%s' % (t0.getOwner(), t0.getId())
        print 'Using job ID: %s' % nFN
        print 'queryTypes:', queryTypes
        jobNum = 0
        jobTypes = []; jobNums = []; jobPickleFiles = []; useTypes = []
        allOutputFiles = []; curMD = None
        self.metaDataManager.reset()
        os.system('free -m')
        for objectType in queryTypes:
            if objectType not in useTypes: useTypes.append(objectType)
            print 'Getting first %s instance catalog of size %i...' % (
                objectType, self.chunkSize)
            t0 = time.time()
            myQDB = queryDB.queryDB(
                chunksize=self.chunkSize, objtype=objectType)
            print '   ...setting up QDB took %i sec.' % (time.time() - t0)
            t0 = time.time()
            instanceCat = myQDB.getInstanceCatalogById(obsHistID)
            print '   ...and getting catalog took %i sec.' % (time.time() - t0)

            numCats = 0
            if instanceCat != None:
                # This code adds some needed fields to the metadata
                mUtils.trimGeneration.derivedTrimMetadata(instanceCat)
                os.system('free -m')
                # Deep copy so we can store this after instanceCat disappears
                if curMD == None:
                    curMD = copy.deepcopy(instanceCat.metadata)
                else:
                    curMD.mergeMetadata(instanceCat.metadata)

            while instanceCat != None:
                t0 = self.WorkDir + 'catData%s_%i.ja' % (nFN, jobNum)
                t1 = self.WorkDir + 'catData%s_%i.p' % (nFN, jobNum)
                print 'Now pickling query type: %s' % objectType
                # Store job data files in instance
                time0 = time.time()
                instanceCat.jobAllocatorDataFile = t0
                allOutputFiles.append(t0) # Order is important
                instanceCat.jobAllocatorCatalogType = catalogType
                instanceCat.jobAllocatorObjectType = objectType
                cPickle.dump(instanceCat, open(t1, 'w'))
                print '   ...pickling took %i sec.' % (time.time() - time0)
                jobTypes.append(catalogType)
                jobNums.append(jobNum)
                jobPickleFiles.append(t1)
                jobNum += 1
                if numCats > 0:
                    curMD.mergeMetadata(instanceCat.metadata)

                # *** RRG:  Free up memory somehow here for instanceCat...
                del(instanceCat); instanceCat = None
                os.system('free -m')
                if self.maxCats >= 0 and (numCats + 1) >= self.maxCats:
                    instanceCat = None
                else:
                    print 'Querying DB for next chunk.'
                    t0 = time.time()
                    instanceCat = myQDB.getNextChunk()
                    print '   ...took %i sec.' % (time.time() - t0)
                    if instanceCat != None:
                        # This code adds some needed fields to the metadata
                        mUtils.trimGeneration.derivedTrimMetadata(instanceCat)
                    os.system('free -m')
                    numCats += 1

        # RRG:  For now this must be disabled
        #curMD.validateMetadata(catalogType, myQDB.opsim)
        mFName = self.WorkDir + 'metaData%s_%s.ja' % (nFN, catalogType)
        curMD.writeMetadata(mFName, catalogType, myQDB.opsim, newfile=True)

        # Finished with queryDB; clean up nicely.
        myQDB.closeSession()
        
        # For debug mode, don't start the clients
        if self.QueryOnly == True:
            print 'Full time for this file: %i sec' % (time.time()-fullTimeStart)
            print 'DEBUG:  Finished, no client processes started.'

        # Now fire off the jobs
        for i in range(len(jobNums)):
            jobId = '%s_%i' % (nFN, jobNums[i])
            self.executionDBManager.updateState(jobId, 'JAAdded')
            print 'Added job to execution DB: %s' % jobId
            #t0 = '/astro/apps/pkg/python64/bin/python jobAllocatorRun.py %i %s %s&' % (nFN, jobId, jobPickleFiles[i])
            #t0 = 'qsub ./runOneAthena.csh %i %s %s&' % (nFN, jobId, jobPickleFiles[i])
            #t0 = 'ssh minerva0 "(cd $PBS_O_WORKDIR; qsub ./runOneAthena.csh %i %s %s)"' % (nFN, jobId, jobPickleFiles[i])
            cwd0 = os.getcwd()
            f0 = open('tmpJA%s.csh' % jobId, 'w')
	    f0.write('#!/bin/csh\n#PBS -N jA%s\n#PBS -l walltime=1:00:00\n#PBS -e jA%s.err\n#PBS -o jA%s.out\ncd %s\nsource setupAthena.csh\npython jobAllocatorRun.py %s %s %s\necho Finished.' % (jobId, jobId, jobId, cwd0, nFN, jobId, jobPickleFiles[i]))
            f0.close()
            t0 = 'ssh minerva0 "(cd %s; /opt/torque/bin/qsub tmpJA%s.csh)"' % (cwd0, jobId)
            print t0
            os.system(t0)

        # Check that everything started within a certain time limit
        # On minerva, jobs may be queued indefinitely, so this won't work
        for i in range(len(jobNums)):
            jobId = '%s_%i' % (nFN, jobNums[i])
            tryNum = 0
            t0 = self.executionDBManager.queryState(jobId)
            while t0 != 'JAFinished':
                print 'Try %i: JA sees state for %s: %s' % (tryNum, jobId, t0)
                time.sleep(10)
                # Give it up to a day
                if tryNum > 60 * 60 * 24:
                    raise RuntimeError, '*** Job not started: %s' % jobId
                tryNum += 1
                t0 = self.executionDBManager.queryState(jobId)
            print 'Finished (Try %i):  JA sees state for %s: %s' % (tryNum, jobId, t0)

        # Finally, merge the output trim file
        trimFile = self.WorkDir + 'trim%s_%s.ja' % (nFN, catalogType)
        t0 = 'cat %s > %s' % (mFName, trimFile)
        print t0
        os.system(t0)
        for f in allOutputFiles:
            t0 = 'cat %s >> %s' % (f, trimFile)
            print t0
            os.system(t0)
        print 'Full time for this file: %i sec' % (time.time()-fullTimeStart)
        print 'Finished catting trim file: ', trimFile