from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations # get necessary credentials op = Operations("glast.org") shifter = op.getValue("Pipeline/Shifter","/DC=org/DC=doegrids/OU=People/CN=Stephan Zimmer 799865") shifter_group = op.getValue("Pipeline/ShifterGroup","glast_user") result = gProxyManager.downloadProxyToFile(shifter,shifter_group,requiredTimeLeft=10000) if not result['OK']: gLogger.error("ERROR: No valid proxy found; ",result['Message']) dexit(1) proxy = result[ 'Value' ] environ['X509_USER_PROXY'] = proxy gLogger.info("using proxy %s"%proxy) dirac = Dirac(True,"myRepo.rep") exitCode = 0 errorList = [] if len( args ) < 1: Script.showHelp() for job in args: result = dirac.kill( job ) if result['OK']: gLogger.info('Killed job %s' % ( job )) else: errorList.append( ( job, result['Message'] ) ) exitCode = 2 for error in errorList: gLogger.error("ERROR %s: %s" % error) dexit.exit( exitCode )
class CEBaseTest( TestBase ): """ CEBaseTest is base class for all the CE test classes. Real CE test should implement its _judge method. """ def __init__( self, args = None, apis = None ): super( CEBaseTest, self ).__init__( args, apis ) self.timeout = self.args.get( 'timeout', 1800 ) self.vo = self.args.get( 'VO' ) self.testType = self.args[ 'TestType' ] self.executable = self.args[ 'executable' ] self.__logPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/log' self.__scriptPath = '/opt/dirac/pro/BESDIRAC/ResourceStatusSystem/SAM/sam_script' if 'WMSAdministrator' in self.apis: self.wmsAdmin = self.apis[ 'WMSAdministrator' ] else: self.wmsAdmin = RPCClient( 'WorkloadManagement/WMSAdministrator' ) if 'Dirac' in self.apis: self.dirac = self.apis[ 'Dirac' ] else: self.dirac = Dirac() def doTest( self, elementDict ): """ submit test job to the specified ce or cloud.. """ elementName = elementDict[ 'ElementName' ] elementType = elementDict[ 'ElementType' ] vos = elementDict[ 'VO' ] site = None; ce = None if elementType == 'ComputingElement': ce = elementName if elementType == 'CLOUD': site = elementName if self.vo: submitVO = self.vo elif vos: submitVO = vos[ 0 ] else: submitVO = 'bes' submissionTime = datetime.utcnow().replace( microsecond = 0 ) sendRes = self.__submit( site, ce, submitVO ) if not sendRes[ 'OK' ]: return sendRes jobID = sendRes[ 'Value' ] result = { 'Result' : { 'JobID' : jobID, 'VO' : submitVO, 'SubmissionTime' : submissionTime }, 'Finish' : False } return S_OK( result ) def __submit( self, site, CE, vo ): """ set the job and submit. """ job = Job() job.setName( self.testType ) job.setJobGroup( 'CE-Test' ) job.setExecutable( self.executable ) job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) ) if site and not CE: job.setDestination( site ) if CE: job.setDestinationCE( CE ) LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath result = self.dirac.submit( job ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() return result def getTestResult( self, elementName, vo, jobID, submissionTime ): """ download output sandbox and judge the test status from the log file. """ isFinish = False res = self.__getJobOutput( jobID, vo ) if not res[ 'OK' ]: return res output = res[ 'Value' ] status = res[ 'Status' ] resDict = { 'CompletionTime' : None, 'Status' : None, 'Log' : None, 'ApplicationTime' : None } utcNow = datetime.utcnow().replace( microsecond = 0 ) if output: isFinish = True resDict[ 'CompletionTime' ] = utcNow log = output[ 'Log' ] if not output[ 'Download' ]: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Fail to download log file for job %s: %s' % ( jobID, log ) else: resDict[ 'Log' ] = log resDict[ 'Status' ] = self._judge( log ) resDict[ 'AppliactionTime' ] = self.__getAppRunningTime( log ) else: if utcNow - submissionTime >= timedelta( seconds = self.timeout ): isFinish = True if elementName.split( '.' )[ 0 ] == 'CLOUD': site = elementName else: site = BESUtils.getSiteForCE( elementName ) jobCount = self.wmsAdmin.getSiteSummaryWeb( { 'Site' : site }, [], 0, 0 ) if not jobCount[ 'OK' ]: return jobCount params = jobCount[ 'Value' ][ 'ParameterNames' ] records = jobCount[ 'Value' ][ 'Records' ][ 0 ] run = records[ params.index( 'Running' ) ] done = records[ params.index( 'Done' ) ] if status == 'Waiting' and run == 0 and done == 0: resDict[ 'Status' ] = 'Bad' resDict[ 'Log' ] = 'The test job is waiting for %d seconds, but no running and done jobs at this site.' % self.timeout else: if run != 0: resDict[ 'Status' ] = 'Busy' resDict[ 'Log' ] = 'Site %s is too busy to execute this test job, job status is %s' % ( site, status ) else: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Test did not complete within the timeout of %d seconds, job status is %s' % ( self.timeout, status ) self.dirac.kill( jobID ) if not isFinish: return S_OK() else: return S_OK( resDict ) def __getJobOutput( self, jobID, vo ): status = self.dirac.status( jobID ) if not status[ 'OK' ]: return status status = status[ 'Value' ][ jobID ][ 'Status' ] if status in ( 'Done', 'Failed' ): LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath outputRes = self.dirac.getOutputSandbox( jobID, self.__logPath ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() if not outputRes[ 'OK' ]: ret = S_OK( { 'Download' : False, 'Log' : outputRes[ 'Message' ] } ) else: try: logfile = open( '%s/%d/Script1_CodeOutput.log' % ( self.__logPath, jobID ), 'r' ) log = logfile.read() logfile.close() except IOError, e: raise IOError os.system( 'rm -rf %s/%d' % ( self.__logPath, jobID ) ) ret = S_OK( { 'Download' : True, 'Log' : log } ) else:
for l in logs: logging_obj.append( LoggingRecord(l) ) logging_info = {'Submitted': logging_obj[0].time, 'Started': None, 'Ended': None, 'JobID': str(job)} for record in logging_obj: if record.major_status == 'Application': logging_info['Started'] = record.time if status_j['Status'] == 'Done': logging_info['Ended']=logging_obj[-1].time status_j.update(logging_info) new_stat = InternalJobStatus(job,status_j) sys.stdout = stdout if new_stat.getStatus()=="Failed": if not new_stat.getEndTime(): gLogger.info("Time stamp for ended job %i not provided, setting it to 1 day in the past!" %job) new_stat.setEndTime() gLogger.info("Requesting to kill job %i" %job) d.kill(job) if job in sites: new_stat.setSite(sites[job]['Site']) #print new_stat._toxml().toprettyxml() if do_xml: firstChild.appendChild(new_stat._toxml()) else: print(new_stat) # TODO: # pretty print & parse in java sys.stdout = stdout if do_xml: print(xmlfile.toprettyxml())
Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... JobID ...' % Script.scriptName, 'Arguments:', ' JobID: DIRAC Job ID' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 errorList = [] for job in args: result = dirac.kill(job) if result['OK']: print 'Killed job %s' % (job) else: errorList.append((job, result['Message'])) exitCode = 2 for error in errorList: print "ERROR %s: %s" % error DIRAC.exit(exitCode)
'JobID': str(job) } for record in logging_obj: if record.major_status == 'Application': logging_info['Started'] = record.time if status_j['Status'] == 'Done': logging_info['Ended'] = logging_obj[-1].time status_j.update(logging_info) new_stat = InternalJobStatus(job, status_j) sys.stdout = stdout if new_stat.getStatus() == "Failed": if not new_stat.getEndTime(): gLogger.info( "Time stamp for ended job %i not provided, setting it to 1 day in the past!" % job) new_stat.setEndTime() gLogger.info("Requesting to kill job %i" % job) d.kill(job) if job in sites: new_stat.setSite(sites[job]['Site']) #print new_stat._toxml().toprettyxml() if do_xml: firstChild.appendChild(new_stat._toxml()) else: print(new_stat) # TODO: # pretty print & parse in java sys.stdout = stdout if do_xml: print(xmlfile.toprettyxml())