def parseWL( MainDir, XMLhandler ):
    
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict( DictionaryOfApps, 'runTime', 
                                                       AIStorageUtils.SORT_TYPE_FLOAT, 
                                                       AIStorageUtils.SORT_ASCENDING )
    NTotalJobs = len( ListOfApps )
    print "Found", NTotalJobs, "apps. Sorting...done."
    
    RunDir = os.path.join( MainDir, "run" )
    JDFsDir = os.path.join( MainDir, "jdfs" )
    
    Timers = {}
    Timers['TurnAround'] = ItemTimer()
    Timers['SuccessfulTurnAround'] = ItemTimer()
    Timers['SuccessfulRun'] = ItemTimer()
    Timers['SuccessfulRunMPI'] = ItemTimer()
    
    AllowedFirstChar = ['2','3']
    DataList = []
    for (id, App) in ListOfApps: 
        #-- generate item
        DataItem = {}
        DataItem['id'] = id
        if DataItem['id'][0] not in AllowedFirstChar: continue
        DataItem['name'] = App['name']
        DataItem['jdf'] = App['jdf']
        DataItem['SubmitStdOut'] = os.path.join( RunDir, "%s.out" % id ).replace(':', '-')
        DataItem['SubmitStdErr'] = os.path.join( RunDir, "%s.out" % id ).replace(':', '-')
        TurnAroundTime, RunnerJobID, JobResult, GlobusMessages = parseKOALASubmissionFile( DataItem['SubmitStdOut'] )
        #print RunnerJobID, JobResult, TurnAroundTime
        if RunnerJobID:
            DataItem['RunnerJobID'] = RunnerJobID
            DataItem['JobResult'] = JobResult
            DataItem['TurnAroundTime'] = TurnAroundTime
            DataItem['GlobusMessages'] = GlobusMessages
            DataList.append( DataItem )
            if TurnAroundTime:
                Timers['TurnAround'].addValue(TurnAroundTime)
                if JobResult == 'SUCCESS': 
                    Timers['SuccessfulTurnAround'].addValue(TurnAroundTime)
                    if os.path.exists(DataItem['jdf']):
                        OutputFilesList = getOutputFiles(DataItem['jdf'])
                        for FileName in OutputFilesList:
                            DirFileName = os.path.join(JDFsDir, FileName)
                            RunTime, RunTimes = parseSMPIOutputFile( DirFileName )
                            if RunTime and RunTimes['Overall']['Max']:
                                Timers['SuccessfulRun'].addValue(RunTime)
                                Timers['SuccessfulRunMPI'].addValue(RunTimes['Overall']['Max'])
                            ##else:
                            ##    print "RunTime", RunTime, "RunTimes", RunTimes
                
    print "All     TurnAround time [s]: avg=%8.3f | min=%8d | max=%8d | #=%8d" % Timers['TurnAround'].getInfo()
    print "SUCCESS TurnAround time [s]: avg=%8.3f | min=%8d | max=%8d | #=%8d" % Timers['SuccessfulTurnAround'].getInfo()
    print "SUCCESS Run time        [s]: avg=%8.3f | min=%8d | max=%8d | #=%8d" % Timers['SuccessfulRun'].getInfo()
    print "SUCCESS Run time MPI    [s]: avg=%8.3f | min=%8d | max=%8d | #=%8d" % Timers['SuccessfulRunMPI'].getInfo()
Example #2
0
def usage(progname):
    global Defaults
    
    ReplaceDic = {}
    VarsDic = vars();
    for Key in VarsDic.keys():
        ReplaceDic[Key] = VarsDic[Key]
    ListOfTestsData = AIStorageUtils.dict_sortbykey( TestData, AIStorageUtils.SORT_ASCENDING )
    ListOfSortedNames = []
    for Name, Dummy in ListOfTestsData:
        ListOfSortedNames.append(Name)
    ReplaceDic['AvailableTests'] = ','.join(ListOfSortedNames)
    
    print __doc__ % ReplaceDic
Example #3
0
def main(argv):                  

    try:                                
        opts, args = getopt.getopt(argv, "h", ["help", "version"])
    except getopt.GetoptError:
        print "Error while converting options: unknown option(s) encountered.\n\n"
        usage(os.path.basename(sys.argv[0]))
        sys.exit(2)
    
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage(os.path.basename(sys.argv[0]))
            sys.exit()
        elif opt in ["--version"]:
            version()
            sys.exit()
        else:
            print "Unknown parameter", opt
    
    if len(args) < 1:
        #print "Error: No sites file given.\n\n"
        #usage(os.path.basename(sys.argv[0]))
        #sys.exit(3)
        SitesFileName = "grid-sites.xml"
    else:
        SitesFileName = args[0];
        
    if not os.path.isfile( SitesFileName ):
        if os.path.exists( SitesFileName ):
            print "\n\n****\nError: %s is not a file!\n****\n" % SitesFileName
        else:
            print "\n\n****\nError: %s does not exist!\n****\n" % SitesFileName
        usage(os.path.basename(sys.argv[0]))
        sys.exit(1)
        
    #---Read a sites file
    #print "%s Parsing sites file %s" % \
    #      ( time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time())), SitesFileName )
    handlerXML = WLDocHandlers.readSiteFile(SitesFileName)
    #print "%s Sites file processed, proceeding to submission"  % \
    #      time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(time.time()))  
    
    DictionaryOfSites = handlerXML.getDictionaryOfSites()
    ListOfSites = AIStorageUtils.dict_sortbykey( DictionaryOfSites, AIStorageUtils.SORT_DESCENDING )
    for (id, Site) in ListOfSites: 
        print id, Site['location'], Site['machines']
    print
Example #4
0
def printWorkload( XMLhandler ):
    
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()
        
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict( DictionaryOfApps, 'jdf', 
                                                       AIStorageUtils.SORT_TYPE_STRING, 
                                                       AIStorageUtils.SORT_ASCENDING )
    NTotalJobs = len( ListOfApps )
    print "Found", NTotalJobs, "apps. Sorting...done."

    for (id, App) in ListOfApps:
        print id, App['jdf'],
        if 'dependsOn' in App:
            print 'dependsOn=', App['dependsOn']
        else:
            print
        
    print "--Full description----------"
        
    for (id, App) in ListOfApps:
        print id, App
Example #5
0
def runWL( OutputDir, XMLhandler, NPoolThreads, NoSubmit=0, NoBackground=0, OneFile=0, testid=0, projectid = "default", testerid=0, timediff=0.0 ):
    
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()
    
            
        
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict( DictionaryOfApps, 'runTime', 
                                                       AIStorageUtils.SORT_TYPE_FLOAT, 
                                                       AIStorageUtils.SORT_ASCENDING )

    #    ListOfApps = AIStorageUtils.dict_sortbykey( DictionaryOfApps, AIStorageUtils.SORT_DESCENDING )
    for (id, App) in ListOfApps: 
        print "Found ", App['id'], "due to start at", App['runTime']
        
    NTotalJobs = len( ListOfApps )
    #print "### Found", NTotalJobs, "apps. Sorting...done."
                                                    
    startTime = float(ListOfApps[0][1]['runTime'])
    if startTime < 0.0: startTime = 0.0
    for (id, App) in ListOfApps: 
        App['runTime'] = float(App['runTime']) - startTime
        #print "### runTime:", App['runTime'], "\n"
        if App['runTime'] < 0.0: App['runTime'] = 0.0
        #print "ID", id, "starts in %.3fs." % float(App['runTime']/1000.0)
        
    #-- generate all work units
    try:
        os.mkdir( OutputDir )
    except:
        pass
    FirstSubmission = time.time()
    CommandLinesList = []
    for (id, App) in ListOfApps: 
        #-- generate item
        CommandLineItem = {}
        CommandLineItem['id'] = id
        CommandLineItem['firstSubmission'] = FirstSubmission
        CommandLineItem['startTime'] = float(App['runTime']/1000.0)
        #CommandLineItem['commandLine'] = "drunner -g -e -o -f %s 1> %s.out 2> %s.err &" % (App['jdf'], id, id)
        
        if OneFile == 0:
            StdOutFile = os.path.join( OutputDir, "%s.out" % id )
            StdErrFile = os.path.join( OutputDir, "%s.err" % id )
            ActualCommand = "%s 1> %s 2> %s" % ( App['submitCommand'], StdOutFile, StdErrFile )
            #ActualCommand = "%s 2>%s" % ( App['submitCommand'], StdErrFile )
        else:
            StdOutFile = os.path.join( OutputDir, "onefile.out" )
            StdErrFile = os.path.join( OutputDir, "onefile.err" )
            ActualCommand = "%s 1>> %s 2>> %s" % ( App['submitCommand'], StdOutFile, StdErrFile )
            #ActualCommand = "%s" % ( App['submitCommand'] )

        if NoBackground == 0:
            CommandLineItem['commandLine'] = ActualCommand #+ ' &'
        else:
            CommandLineItem['commandLine'] = ActualCommand
        
	CommandLineItem['stdout'] = StdOutFile
	CommandLineItem['stderr'] = StdErrFile
	CommandLineItem['onefile'] = OneFile        

        #-- amod v.0.12: just generate commands
        CommandLineItem['NoSubmit'] = NoSubmit
        CommandLineItem['testid'] = testid
        CommandLineItem['projectid'] = projectid
        CommandLineItem['testerid'] = testerid
        CommandLineItem['timediff'] = timediff
            
        #-- append item
        #if os.path.exists(App['jdf']):
        CommandLinesList.append(CommandLineItem)
        #else:
        #    print "Could not locate JDF", App['jdf'], "... skipping job"
        
    #-- build a WorkRequest object for each work unit
    requests = ASPNThreadPool.makeRequests(submitJob, CommandLinesList, printSubmitJobResults)
    
    #-- create a pool of NPoolThreads worker threads
    StdOutLock.acquire()
    print "[wl-submit.py] Starting a thread pool with", NPoolThreads, "threads"
    StdOutLock.release()

    submitThreadPool = ASPNThreadPool.ThreadPool(NPoolThreads, StdOutLock)
    
    
    StartSubmissionTime = time.time()
    #-- add all work units into the thread pool
    #   NOTE: We expect the thread pool to be based on Queues,
    #         beacause our applications need to be run at specified times
    #         and the submit job waits until the current work unit is done
    #         -> if we are NOT using Queues, it may happen that a work unit
    #         that needs to be submitted at time T will get submitted much 
    #         later, due to other jobs starting the submission before it,
    #         but waiting for their later start time
    for req in requests:
        submitThreadPool.putRequest(req)
        #DEBUG:print req.args

        StdOutLock.acquire()
        print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % \
              (req.requestID, req.args[0]['id'], req.args[0]['startTime'])
        StdOutLock.release()
    
    #-- wait for all submissions to be completed
    submitThreadPool.wait()
    while 1:
        try:
            submitThreadPool.poll()
            EndSubmissionTime = time.time()
            #print "Main thread working..."
            time.sleep(0.5)
        except (KeyboardInterrupt, ASPNThreadPool.NoResultsPending):
            break
    
    EndSubmissionTime = time.time()
    NTotalJobsInQueue = len(submitThreadPool.workRequests)

    # should send to the database the 'onefile.out' and 'onefile.err' (not tested)

    if OneFile != 0:
        StdOutFile = os.path.join(OutputDir, "onefile.out")
        StdErrFile = os.path.join(OutputDir, "onefile.err")

        try:
            fin = open(StdOutFile)
            lines = (fin.read()).split("\n")

            for line in lines:
                if (len(line) > 1):
                    sLine = "\n" + LOGFILE_PREFIX + str(testid) + "\1" + str(projectid) + "\1" + str(testerid) + "\1" + "0" + "\1" + line + "\n"

                    StdOutLock.acquire()   
                    sys.stdout.write(sLine)
                    StdOutLock.release()

            fin.close()
        except:
                pass

        try:
            fin = open(StdErrFile)          
            lines = (fin.read()).split("\n")

            for line in lines:
                if (len(line) > 1):
                    sLine = "\n" + LOGFILE_PREFIX + str(testid) + "\1" + str(projectid) + "\1" + str(testerid) + "\1" + "0" + "\1" + line + "\n"

                    StdOutLock.acquire()
                    sys.stdout.write(sLine)
                    StdOutLock.release()   

            fin.close()
        except:
                pass

    return StartSubmissionTime, EndSubmissionTime, NTotalJobs, NTotalJobsInQueue
Example #6
0
def runWL(OutputDir, XMLhandler, NPoolThreads, NoSubmit=0, Background=0, OneFile=0):

    # --- get applications
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()

    # --- create composite structure manager
    TheCompositeApplicationData = CompositeApplicationData()

    ##    ListOfApps = AIStorageUtils.dict_sortbykey( DictionaryOfApps, AIStorageUtils.SORT_DESCENDING )
    ##    for (id, App) in ListOfApps:
    ##        print "Found ", App['id'], "due to start at", App['runTime']
    ##
    # -- sort jobs
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict(
        DictionaryOfApps, "runTime", AIStorageUtils.SORT_TYPE_FLOAT, AIStorageUtils.SORT_ASCENDING
    )
    NTotalJobs = len(ListOfApps)
    print "Found", NTotalJobs, "apps. Sorting...done."

    # Modification - C.S.: make all the tasks have the start time 0
    # startTime = float(ListOfApps[0][1]['runTime'])
    # if startTime < 0.0: startTime = 0.0
    startTime = 0.0

    # -- correct start times and add all applications to the composite structure manager
    for (id, App) in ListOfApps:
        App["runTime"] = 0
        # App['runTime'] = float(App['runTime']) - startTime
        # if App['runTime'] < 0.0: App['runTime'] = 0.0
        print "ID", id, "starts in %.3fs." % float(App["runTime"] / 1000.0)
        # add the 'dependsOn' key if missing
        if "dependsOn" not in App:
            App["dependsOn"] = []
        TheCompositeApplicationData.addJob(id, App)

        # -- create all 'enables' relations
    TheCompositeApplicationData.buildEnablesRelations()
    # -- mark all the starting jobs as 'can run'
    for id in TheCompositeApplicationData.JobsWithDeps:
        TheCompositeApplicationData.triggerCanRunCheck(id)

    # --- generate all work units
    try:
        os.mkdir(OutputDir)
    except:
        pass

    # --- build a WorkRequest object for each work unit
    FirstSubmission = time.time()
    CommandLinesList = []
    for (id, App) in ListOfApps:
        # -- generate item
        CommandLineItem = {}
        CommandLineItem[".CompositeApplicationData"] = TheCompositeApplicationData
        CommandLineItem["id"] = id
        CommandLineItem["firstSubmission"] = FirstSubmission
        CommandLineItem["startTime"] = float(App["runTime"] / 1000.0)
        # CommandLineItem['commandLine'] = "drunner -g -e -o -f %s 1> %s.out 2> %s.err &" % (App['jdf'], id, id)

        if OneFile == 0:
            StdOutFile = os.path.join(OutputDir, "%s.out" % id)
            StdErrFile = os.path.join(OutputDir, "%s.err" % id)
            ActualCommand = "%s 1> %s 2> %s" % (App["submitCommand"], StdOutFile, StdErrFile)
        else:
            StdOutFile = os.path.join(OutputDir, "onefile.out")
            StdErrFile = os.path.join(OutputDir, "onefile.err")
            ActualCommand = "%s 1>> %s 2>> %s" % (App["submitCommand"], StdOutFile, StdErrFile)
        if Background == 1:
            CommandLineItem["commandLine"] = ActualCommand + " &"
        else:
            CommandLineItem["commandLine"] = ActualCommand

        # -- amod v.0.12: just generate commands
        CommandLineItem["NoSubmit"] = NoSubmit

        # -- append item
        if os.path.exists(App["jdf"]):
            CommandLinesList.append(CommandLineItem)
        else:
            print "Could not locate JDF", App["jdf"], "... skipping job"

    requests = ASPNThreadPool.makeRequests(runJob, CommandLinesList, printJobResults)

    # --- create a pool of NPoolThreads worker threads
    print "[wl-exec-dagman.py] Starting a thread pool with", NPoolThreads, "threads"
    submitThreadPool = ASPNThreadPool.ThreadPool(NPoolThreads, StdOutLock)

    StartSubmissionTime = time.time()
    # --- add all work units into the thread pool
    #   NOTE: We expect the thread pool to be based on Queues,
    #         beacause our applications need to be run at specified times
    #         and the submit job waits until the current work unit is done
    #         -> if we are NOT using Queues, it may happen that a work unit
    #         that needs to be submitted at time T will get submitted much
    #         later, due to other jobs starting the submission before it,
    #         but waiting for their later start time

    # Modification - corina: the requests are put in the thread pool only when
    # their dependencies are satisfied
    requestsBkp = requests[:]
    for req in requestsBkp:
        reqId = req.args[0]["id"]
        # take only the runnable jobs
        if TheCompositeApplicationData.isRunnable(reqId):
            submitThreadPool.putRequest(req)
            # remove the request from the list if it was submitted to the pool
            requests.remove(req)
            # DEBUG:print req.args
            print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % (
                req.requestID,
                req.args[0]["id"],
                req.args[0]["startTime"],
            )

        # --- wait for all submissions to be completed
        # submitThreadPool.wait()
    while 1:
        try:
            submitThreadPool.poll()
            EndSubmissionTime = time.time()
            time.sleep(0.5)
        ##            if TheCompositeApplicationData.isCompositeApplicationFinished():
        ##                #submitThreadPool.wait()
        ##                EndSubmissionTime = time.time()
        ##                break
        ##            time.sleep(1)
        ##            #print "Main thread working..."

        except ASPNThreadPool.NoResultsPending:
            # -- check that all jobs have actually finished or failed
            if TheCompositeApplicationData.isCompositeApplicationFinished():
                EndSubmissionTime = time.time()
                break
            else:
                # see if we have some more runnable jobs and add them to the pool
                requestsBkp2 = requests[:]
                for req in requestsBkp2:
                    reqId = req.args[0]["id"]
                    if TheCompositeApplicationData.isRunnable(reqId):
                        submitThreadPool.putRequest(req)
                        requests.remove(req)
                        # DEBUG:print req.args
                        print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % (
                            req.requestID,
                            req.args[0]["id"],
                            req.args[0]["startTime"],
                        )

                print "[wl-exec-dagman] Got ASPNThreadPool.NoResultsPending"
                print "         All:", TheCompositeApplicationData.TotalJobs, "Done:", TheCompositeApplicationData.TotalSuccessful, "Failed:", TheCompositeApplicationData.TotalFailed
                time.sleep(2)
        except KeyboardInterrupt:
            break
        except:
            print ">>>" + traceback.print_exc()
            raise Exception, "aaaaaaaaaaaaaaaaaaaaaaa"

    NTotalJobsInQueue = len(submitThreadPool.workRequests)
    ##    print ">>>", "NTotalJobsInQueue:", NTotalJobsInQueue
    ##
    ##    #-- mark all the starting jobs as 'can run'
    ##    for id in TheCompositeApplicationData.JobsWithDeps:
    ##        print "ID", id, "isFailed:", TheCompositeApplicationData.isFailed(id), "isSuccessful:", TheCompositeApplicationData.isSuccessful(id)

    return StartSubmissionTime, EndSubmissionTime, NTotalJobs, NTotalJobsInQueue