예제 #1
0
def parse_jobs_list(jobs_list):
    ''' parse a jobs list by first getting the status of all jobs
    '''
    from DIRAC.Interfaces.API.Dirac import Dirac
    dirac = Dirac()
    # status of all jobs
    status = dirac.status(jobs_list)
    # parse it
    sites_dict = {}
    status_dict = copy.copy(BASE_STATUS_DIR)
    for job in jobs_list:
        site = status['Value'][int(job)]['Site']
        minstatus = status['Value'][int(job)]['MinorStatus']
        majstatus = status['Value'][int(job)]['Status']
        if majstatus not in status_dict.keys():
            DIRAC.gLogger.notice('Add %s to BASE_STATUS_DIR' % majstatus)
            DIRAC.sys.exit(1)
        status_dict[majstatus] += 1
        status_dict['Total'] += 1
        if site not in sites_dict.keys():
            if site.find('.') == -1:
                site = '    None'  # note that blank spaces are needed
            sites_dict[site] = copy.copy(BASE_STATUS_DIR)
            sites_dict[site][majstatus] = 1
            sites_dict[site]["Total"] = 1
        else:
            sites_dict[site]["Total"] += 1
            if majstatus not in sites_dict[site].keys():
                sites_dict[site][majstatus] = 1
            else:
                sites_dict[site][majstatus] += 1
    return status_dict, sites_dict
예제 #2
0
j.setCPUTime(500)
j.setExecutable('/bin/echo hello')
j.setExecutable('/bin/hostname')
j.setExecutable('/bin/echo hello again')
j.setName('API')

result = dirac.submitJob(j)
print 'Submission Result: '
pprint.pprint(result)

jobid = result['JobID']

# print job id to file for future reference
joblog = open("jobid.log", "a")
joblog.write(str(jobid) + '\n')
joblog.close()

# to interactively check on job status do:
# dirac-wms-job-status -f jobid.log
print "\nThe current status of this job is:"
pprint.pprint(dirac.status(jobid))

joblog = open("jobid.log", "r")
# list comprehension :-D
all_jobids = [jobid.strip() for jobid in joblog.readlines()]

print "\nThe current status of all jobs is:"
all_status = dirac.status(all_jobids)
pprint.pprint(all_status)
    args = visit + ' ' + insidename + ' ' + str(startsensor) + ' ' + str(
        numsensors) + ' ' + str(idx)
    outputname = 'fits_' + visit + '_' + str(idx) + '.tar'

    j.setCPUTime(1209600)
    j.setExecutable('runimsim2.1.sh', arguments=args)
    j.stderr = "std.err"
    j.stdout = "std.out"
    #!!! May need the 2.1i directory here depending on visit number !!!
    j.setInputSandbox([
        "runimsim2.1.sh", "run_imsim_nersc.py",
        "LFN:/lsst/user/j/james.perry/instcats/2.1i/" + instcatname
    ])
    j.setOutputSandbox(["std.out", "std.err"])
    j.setTag(["8Processors"])
    #j.setOutputData([visit + "/" + outputname], outputPath="", outputSE=["IN2P3-CC-disk"])
    j.setOutputData([visit + "/" + outputname],
                    outputPath="",
                    outputSE=["UKI-NORTHGRID-LANCS-HEP-disk"])
    j.setPlatform("AnyPlatform")

    j.setDestination(site)

    jobID = dirac.submitJob(j)
    print("Submitted job to " + site + " as ID " + str(jobID))
    print "Status is:", dirac.status(jobID['JobID'])

    joblistfile.write(str(jobID['JobID']) + '\n')

joblistfile.close()
예제 #4
0
class CEBaseTest( TestBase ):
  """
    CEBaseTest is base class for all the CE test classes. Real  CE test should
    implement its _judge method.
  """

  def __init__( self, args = None, apis = None ):
    super( CEBaseTest, self ).__init__( args, apis )

    self.timeout = self.args.get( 'timeout', 1800 )
    self.vo = self.args.get( 'VO' )
    self.testType = self.args[ 'TestType' ]
    self.executable = self.args[ 'executable' ]
    self.__logPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/log'
    self.__scriptPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/sam_script'

    if 'WMSAdministrator' in self.apis:
      self.wmsAdmin = self.apis[ 'WMSAdministrator' ]
    else:
      self.wmsAdmin = RPCClient( 'WorkloadManagement/WMSAdministrator' )

    if 'Dirac' in self.apis:
      self.dirac = self.apis[ 'Dirac' ]
    else:
      self.dirac = Dirac()


  def doTest( self, elementDict ):
    """
      submit test job to the specified ce or cloud..
    """

    elementName = elementDict[ 'ElementName' ]
    elementType = elementDict[ 'ElementType' ]
    vos = elementDict[ 'VO' ]

    site = None; ce = None
    if elementType == 'ComputingElement':
      ce = elementName
    if elementType == 'CLOUD':
      site = elementName

    if self.vo:
      submitVO = self.vo
    elif vos:
      submitVO = vos[ 0 ]
    else:
      submitVO = 'bes'

    submissionTime = datetime.utcnow().replace( microsecond = 0 )
    sendRes = self.__submit( site, ce, submitVO )
    if not sendRes[ 'OK' ]:
      return sendRes
    jobID = sendRes[ 'Value' ]

    result = { 'Result' : { 'JobID' : jobID,
                           'VO' : submitVO,
                           'SubmissionTime' : submissionTime },
              'Finish' : False }

    return S_OK( result )


  def __submit( self, site, CE, vo ):
    """
      set the job and submit.
    """

    job = Job()
    job.setName( self.testType )
    job.setJobGroup( 'CE-Test' )
    job.setExecutable( self.executable )
    job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) )
    if site and not CE:
      job.setDestination( site )
    if CE:
      job.setDestinationCE( CE )

    LOCK.acquire()
    proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo )
    if not proxyPath[ 'OK' ]:
      LOCK.release()
      return proxyPath
    proxyPath = proxyPath[ 'Value' ]
    oldProxy = os.environ.get( 'X509_USER_PROXY' )
    os.environ[ 'X509_USER_PROXY' ] = proxyPath
    result = self.dirac.submit( job )
    if oldProxy is None:
      del os.environ[ 'X509_USER_PROXY' ]
    else:
      os.environ[ 'X509_USER_PROXY' ] = oldProxy
    LOCK.release()

    return result


  def getTestResult( self, elementName, vo, jobID, submissionTime ):
    """
      download output sandbox and judge the test status from the log file.
    """

    isFinish = False

    res = self.__getJobOutput( jobID, vo )
    if not res[ 'OK' ]:
      return res
    output = res[ 'Value' ]
    status = res[ 'Status' ]

    resDict = { 'CompletionTime' : None, 'Status' : None, 'Log' : None, 'ApplicationTime' : None }
    utcNow = datetime.utcnow().replace( microsecond = 0 )

    if output:
      isFinish = True
      resDict[ 'CompletionTime' ] = utcNow
      log = output[ 'Log' ]
      if not output[ 'Download' ]:
        resDict[ 'Status' ] = 'Unknown'
        resDict[ 'Log' ] = 'Fail to download log file for job %s: %s' % ( jobID, log )
      else:
        resDict[ 'Log' ] = log
        resDict[ 'Status' ] = self._judge( log )
        resDict[ 'AppliactionTime' ] = self.__getAppRunningTime( log )

    else:
      if utcNow - submissionTime >= timedelta( seconds = self.timeout ):
        isFinish = True
        if elementName.split( '.' )[ 0 ] == 'CLOUD':
          site = elementName
        else:
          site = BESUtils.getSiteForCE( elementName )
        jobCount = self.wmsAdmin.getSiteSummaryWeb( { 'Site' : site }, [], 0, 0 )
        if not jobCount[ 'OK' ]:
          return jobCount
        params = jobCount[ 'Value' ][ 'ParameterNames' ]
        records = jobCount[ 'Value' ][ 'Records' ][ 0 ]
        run = records[ params.index( 'Running' ) ]
        done = records[ params.index( 'Done' ) ]
        if status == 'Waiting' and run == 0 and done == 0:
          resDict[ 'Status' ] = 'Bad'
          resDict[ 'Log' ] = 'The test job is waiting for %d seconds, but no running and done jobs at this site.' % self.timeout
        else:
          if run != 0:
            resDict[ 'Status' ] = 'Busy'
            resDict[ 'Log' ] = 'Site %s is too busy to execute this test job, job status is %s' % ( site, status )
          else:
            resDict[ 'Status' ] = 'Unknown'
            resDict[ 'Log' ] = 'Test did not complete within the timeout of %d seconds, job status is %s' % ( self.timeout, status )
        self.dirac.kill( jobID )

    if not isFinish:
      return S_OK()
    else:
      return S_OK( resDict )


  def __getJobOutput( self, jobID, vo ):
    status = self.dirac.status( jobID )
    if not status[ 'OK' ]:
      return status
    status = status[ 'Value' ][ jobID ][ 'Status' ]

    if status in ( 'Done', 'Failed' ):
      LOCK.acquire()
      proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo )
      if not proxyPath[ 'OK' ]:
        LOCK.release()
        return proxyPath
      proxyPath = proxyPath[ 'Value' ]
      oldProxy = os.environ.get( 'X509_USER_PROXY' )
      os.environ[ 'X509_USER_PROXY' ] = proxyPath
      outputRes = self.dirac.getOutputSandbox( jobID, self.__logPath )
      if oldProxy is None:
        del os.environ[ 'X509_USER_PROXY' ]
      else:
        os.environ[ 'X509_USER_PROXY' ] = oldProxy
      LOCK.release()

      if not outputRes[ 'OK' ]:
        ret = S_OK( { 'Download'  : False, 'Log' : outputRes[ 'Message' ] } )
      else:
        try:
          logfile = open( '%s/%d/Script1_CodeOutput.log' % ( self.__logPath, jobID ), 'r' )
          log = logfile.read()
          logfile.close()
        except IOError, e:
          raise IOError
        os.system( 'rm -rf %s/%d' % ( self.__logPath, jobID ) )
        ret = S_OK( { 'Download' : True, 'Log' : log } )
    else:
예제 #5
0
]))
Script.parseCommandLine(ignoreErrors=True)
args = Script.getPositionalArgs()

if len(args) < 1:
    Script.showHelp()

from DIRAC.Interfaces.API.Dirac import Dirac
dirac = Dirac()
exitCode = 0

try:
    jobs = [int(job) for job in args]
except Exception, x:
    print 'Expected integer for jobID'
    exitCode = 2
    DIRAC.exit(exitCode)

result = dirac.status(jobs)
if result['OK']:
    for job in result['Value']:
        print 'JobID=' + str(job),
        for status in result['Value'][job]:
            print status + '=' + result['Value'][job][status] + ';',
        print
else:
    exitCode = 2
    print "ERROR: %s" % error

DIRAC.exit(exitCode)
예제 #6
0
    if not result['OK']:
      print "Error:", result['Message']
      DIRACExit( -1 )
    jobs += result['Value']  
        
if len( args ) < 1 and not jobs:
  Script.showHelp()

if len(args) > 0:
  jobs += args

try:
  jobs = [ int( job ) for job in jobs ]
except Exception, x:
  print 'Expected integer for jobID'
  exitCode = 2
  DIRAC.exit( exitCode )

result = dirac.status( jobs )
if result['OK']:
  for job in result['Value']:
    print 'JobID=' + str( job ),
    for status in result['Value'][job]:
      print status + '=' + result['Value'][job][status] + ';',
    print
else:
  exitCode = 2
  print "ERROR: %s" % result['Message']

DIRAC.exit( exitCode )
예제 #7
0
from DIRAC.Interfaces.API.Job import Job
from DIRAC.Interfaces.API.Dirac import Dirac

if (len(sys.argv) < 2) :
    print 'the input file with ids should be specified'
    print 'if the 3rd argument is <get_output> the job output sandbox will be downloaded'
    sys.exit(os.EX_USAGE)

list = sys.argv[1]

get_output = False
if (len(sys.argv) > 2):
    if (sys.argv[2] == 'get_output'): get_output = True

id_list_file = open(list, 'r')

for line in id_list_file:
##    line = line.strip().decode("utf-8").replace("True","true").replace("False","false")
    line = line.replace("True","true").replace("False","false")
    line = line.replace("'","\"")
    j = json.loads(line)

    dirac = Dirac()
    print dirac.status(j['Value'])

    if get_output: print dirac.getOutputSandbox(j['Value'])

id_list_file.close()

예제 #8
0
#!/bin/env python

import sys

from DIRAC.Core.Base import Script
Script.parseCommandLine()

from DIRAC.Interfaces.API.Job import Job
from DIRAC.Interfaces.API.Dirac import Dirac

dirac = Dirac()
jobid = sys.argv[1]

print dirac.status(jobid)

summary_file = str(jobid) + "_summary.txt"
dirac.getJobSummary(jobid, outputFile=summary_file, printOutput=True)

print dirac.getJobDebugOutput(jobid)

print dirac.getJobLoggingInfo(jobid, printOutput=False)

예제 #9
0
    line = line.strip()
    bits = line.split(' ')

    # visit, idx, job ID, site
    jobid = int(bits[2])
    site = bits[3]
    jobids.append(jobid)

    # tally up failures by site
    if site in sitefailures:
        sitefailures[site] = sitefailures[site] + 1
    else:
        sitefailures[site] = 1

# get statuses
statuslist = dirac.status(jobids)
if not 'Value' in statuslist:
    print "Error getting job status from DIRAC!"
    sys.exit(1)

cvmfsProblemCount = 0

# tally up minor status
for i in jobids:
    minorstatus = statuslist['Value'][i]['MinorStatus']
    if minorstatus in minorstatuses:
        minorstatuses[minorstatus] = minorstatuses[minorstatus] + 1
    else:
        minorstatuses[minorstatus] = 1

    if minorstatus == "Application Finished With Errors":
예제 #10
0
onehour = datetime.timedelta(hours = 1)
now=datetime.datetime.now()
Script.gLogger.notice(now)

results=dirac.selectJobs(jobGroup=jobGroup, owner=owner, date=now-nHours*onehour)
if not results.has_key('Value'):
    Script.gLogger.notice("No job found for group \"%s\" and owner \"%s\" in the past %s hours"%
       (jobGroup, owner, nHours))
    Script.sys.exit(0)

# Found some jobs, print information
jobsList=results['Value']
Script.gLogger.notice("%s jobs found for group \"%s\" and owner \"%s\" in the past %s hours\n"%
       (len(jobsList), jobGroup, owner, nHours))

status=dirac.status(jobsList)

# for details
#print dirac.getJobSummary(3075536)

# print out my favourite tables
SitesDict={}

for job in jobsList:
#    print job, status['Value'][int(job)]
    site=status['Value'][int(job)]['Site']
#    site=status['Value'][int(job)]['CE']
    minstatus=status['Value'][int(job)]['MinorStatus']
    majstatus=status['Value'][int(job)]['Status']

    if majstatus not in {'Done', 'Failed'}:
예제 #11
0
		j.setExecutable(EXEC,arguments="-W 600 -H 600 -X -0.77568377 -Y -0.13646737 -P %f -M 500 %s" % (pitch, out_bmp))
        	pitch+=step
		
	j.setOutputSandbox(out_bmp_list + ["StdOut"] + ["StdErr"])
	result = dirac.submit(j)
	print 'Submission Result: ',result
	return result

n_produced_pict = 0
jobinfo_list = []
while n_produced_pict < N_PICT:
	pitch_start = n_produced_pict * P_STEP + P_START
	jobinfo = launch_batch_pict(pitch_start, P_STEP, N_PICT_BATCH)
	jobinfo_list.append(jobinfo)
	n_produced_pict += N_PICT_BATCH


job_state_dict = dict()
job_in_flight = ['start'] 
while len(job_in_flight) != 0  :
	for jobinfo in jobinfo_list :
		jobid=jobinfo['Value']
		status = dirac.status(jobid)
		state = status['Value'][jobid]['Status']
		job_state_dict[jobid] = state
	job_in_flight = [k for k, v in job_state_dict.items() if v != 'Done']
	print "Job still running :"
	print job_in_flight
	time.sleep(2)

     local_time = datetime.datetime.utcnow()
     timedelta = local_time-datetime.timedelta(seconds=86400)
     if specialOptions.has_key("dayspassed"):
         timedelta = local_time-datetime.timedelta(seconds=float(specialOptions["dayspassed"])*3600)
     res = w.getJobs(my_dict,timedelta.strftime( '%Y-%m-%d %H:%M:%S' ))
 
     if not res['OK']:
         gLogger.error("Could not get list of running jobs.",res['Message'])
         dexit(1)
     
     job_list = res['Value']
 else:
     job_list = specialOptions["JobID"].split(",")
     doLogging = True
 #for j in job_list:
 res = d.status(job_list)   
 
 if not res['OK']:
     gLogger.error("Could not get status of job_list,",res['Message'])
     dexit(1)
 
 status = res['Value']
 # get sites info
 sites = None
 res = w.getJobsSites(job_list)
 if not res['OK']:
     gLogger.error("Could not get sites;",res['Message'])
 else:
     sites = res['Value']
 
 if not do_xml:
예제 #13
0
j.setCPUTime(500)
j.setExecutable('/bin/echo hello')
j.setExecutable('/bin/hostname')
j.setExecutable('/bin/echo hello again')
j.setName('API')

result = dirac.submitJob(j)
print 'Submission Result: '
pprint.pprint(result)

jobid = result['JobID']

# print job id to file for future reference
joblog = open("jobid.log", "a")
joblog.write(str(jobid)+'\n')
joblog.close()

# to interactively check on job status do:
# dirac-wms-job-status -f jobid.log
print "\nThe current status of this job is:"
pprint.pprint(dirac.status(jobid))

joblog = open("jobid.log", "r")
# list comprehension :-D
all_jobids = [jobid.strip() for jobid in joblog.readlines()]

print "\nThe current status of all jobs is:"
all_status = dirac.status(all_jobids)
pprint.pprint(all_status)
예제 #14
0
class DiracDaemon(Daemonize):
    """Dirac Daemon."""
    def __init__(self, address, **kwargs):
        """Initialise."""
        super(DiracDaemon, self).__init__(action=self.main, **kwargs)
        self._address = address
        self._dirac_api = Dirac()

    def main(self):
        """Daemon main."""
        # Defer creation of server to inside the daemon context otherwise the socket will be
        # closed when daemonising
        dirac_server = SimpleXMLRPCServer(self._address)
        dirac_server.register_introspection_functions()
        dirac_server.register_instance(self._dirac_api)
        # override Dirac().status to make sure that the keys are strings.
        dirac_server.register_function(self.status)
        dirac_server.register_function(self.submit_job)
        dirac_server.serve_forever()

    def status(self, ids):
        """
        Return the status of Dirac jobs with ids.

        This method will essentially be overriding Dirac().status to ensure that the dict
        keys which are the ids of the jobs are cast to strings such that they can be sent
        over xmlrpc socket.
        """
        return {
            str(k): v
            for k, v in self._dirac_api.status(ids).get("Value",
                                                        {}).iteritems()
        }

    def submit_job(self,
                   request_id,
                   executable,
                   macro,
                   starting_seed=8000000,
                   njobs=10,
                   platform='ANY',
                   output_data_site='UKI-LT2-IC-HEP-disk',
                   output_log='lzproduction_output.log'):
        """
        Submit LZProduction job to DIRAC.

        Args:
            request_id (int): The id number of the associated request
            executable (str): The full path to the executable job script
            macro (str): The full path to the macro for this job
            starting_seed (int): The random seed for the first of the parametric jobs
            njobs (int): The number of parametric jobs to create
            platform (str): The required platform
            output_data_site (str): The name of the grid site to store the output data at
            output_log (str): The file name for the output log file

        Returns:
           list: The list of created parametric job DIRAC ids
        """
        j = Job()
        j.setName(os.path.splitext(os.path.basename(macro))[0] + '%(args)s')
        j.setExecutable(os.path.basename(executable),
                        os.path.basename(macro) + ' %(args)s', output_log)
        j.setInputSandbox([executable, macro])
        j.setOutputData('*.root', output_data_site, str(request_id))
        j.setParameterSequence(
            "args",
            [str(i) for i in xrange(starting_seed, starting_seed + njobs)],
            addToWorkflow=True)
        j.setPlatform(platform)

        return self.status(self._dirac_api.submit(j).get("Value", []))
     if specialOptions.has_key("dayspassed"):
         timedelta = local_time-datetime.timedelta(seconds=float(specialOptions["dayspassed"])*3600)
     res = w.getJobs(my_dict,timedelta.strftime( '%Y-%m-%d %H:%M:%S' ))
 
     if not res['OK']:
         gLogger.error("Could not get list of running jobs.",res['Message'])
         dexit(1)
     
     job_list = res['Value']
 else:
     job_list = specialOptions["JobID"].split(",")
     doLogging = True
 status = {}
 sites = {} 
 for chunk in breakListIntoChunks(job_list,1000):
     res = d.status(chunk)   
     if not res['OK']:
         gLogger.error("Could not get status of job list chunk,",res['Message'])
         continue
     status.update(res['Value'])
 # get sites info
     res = w.getJobsSites(chunk)
     if not res['OK']:
         gLogger.error("Could not get sites;",res['Message'])
     sites.update(res['Value'])
 
 if not do_xml:
     print('# ID\thostname\tStatus\tSubmitted\tStarted\tEnded\tCPUtime\tMemory')
 for j in job_list:
     status_j=status[int(j)]
     if doLogging: