Beispiel #1
0
  def __submit( self, site, CE, vo ):
    """
      set the job and submit.
    """

    job = Job()
    job.setName( self.testType )
    job.setJobGroup( 'CE-Test' )
    job.setExecutable( self.executable )
    job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) )
    if site and not CE:
      job.setDestination( site )
    if CE:
      job.setDestinationCE( CE )

    LOCK.acquire()
    proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo )
    if not proxyPath[ 'OK' ]:
      LOCK.release()
      return proxyPath
    proxyPath = proxyPath[ 'Value' ]
    oldProxy = os.environ.get( 'X509_USER_PROXY' )
    os.environ[ 'X509_USER_PROXY' ] = proxyPath
    result = self.dirac.submit( job )
    if oldProxy is None:
      del os.environ[ 'X509_USER_PROXY' ]
    else:
      os.environ[ 'X509_USER_PROXY' ] = oldProxy
    LOCK.release()

    return result
Beispiel #2
0
  def __getJobOutput( self, jobID, vo ):
    status = self.dirac.status( jobID )
    if not status[ 'OK' ]:
      return status
    status = status[ 'Value' ][ jobID ][ 'Status' ]

    if status in ( 'Done', 'Failed' ):
      LOCK.acquire()
      proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo )
      if not proxyPath[ 'OK' ]:
        LOCK.release()
        return proxyPath
      proxyPath = proxyPath[ 'Value' ]
      oldProxy = os.environ.get( 'X509_USER_PROXY' )
      os.environ[ 'X509_USER_PROXY' ] = proxyPath
      outputRes = self.dirac.getOutputSandbox( jobID, self.__logPath )
      if oldProxy is None:
        del os.environ[ 'X509_USER_PROXY' ]
      else:
        os.environ[ 'X509_USER_PROXY' ] = oldProxy
      LOCK.release()

      if not outputRes[ 'OK' ]:
        ret = S_OK( { 'Download'  : False, 'Log' : outputRes[ 'Message' ] } )
      else:
        try:
          logfile = open( '%s/%d/Script1_CodeOutput.log' % ( self.__logPath, jobID ), 'r' )
          log = logfile.read()
          logfile.close()
        except IOError, e:
          raise IOError
        os.system( 'rm -rf %s/%d' % ( self.__logPath, jobID ) )
        ret = S_OK( { 'Download' : True, 'Log' : log } )
Beispiel #3
0
  def getTestResult( self, elementName, vo, jobID, submissionTime ):
    """
      download output sandbox and judge the test status from the log file.
    """

    isFinish = False

    res = self.__getJobOutput( jobID, vo )
    if not res[ 'OK' ]:
      return res
    output = res[ 'Value' ]
    status = res[ 'Status' ]

    resDict = { 'CompletionTime' : None, 'Status' : None, 'Log' : None, 'ApplicationTime' : None }
    utcNow = datetime.utcnow().replace( microsecond = 0 )

    if output:
      isFinish = True
      resDict[ 'CompletionTime' ] = utcNow
      log = output[ 'Log' ]
      if not output[ 'Download' ]:
        resDict[ 'Status' ] = 'Unknown'
        resDict[ 'Log' ] = 'Fail to download log file for job %s: %s' % ( jobID, log )
      else:
        resDict[ 'Log' ] = log
        resDict[ 'Status' ] = self._judge( log )
        resDict[ 'AppliactionTime' ] = self.__getAppRunningTime( log )

    else:
      if utcNow - submissionTime >= timedelta( seconds = self.timeout ):
        isFinish = True
        if elementName.split( '.' )[ 0 ] == 'CLOUD':
          site = elementName
        else:
          site = BESUtils.getSiteForCE( elementName )
        jobCount = self.wmsAdmin.getSiteSummaryWeb( { 'Site' : site }, [], 0, 0 )
        if not jobCount[ 'OK' ]:
          return jobCount
        params = jobCount[ 'Value' ][ 'ParameterNames' ]
        records = jobCount[ 'Value' ][ 'Records' ][ 0 ]
        run = records[ params.index( 'Running' ) ]
        done = records[ params.index( 'Done' ) ]
        if status == 'Waiting' and run == 0 and done == 0:
          resDict[ 'Status' ] = 'Bad'
          resDict[ 'Log' ] = 'The test job is waiting for %d seconds, but no running and done jobs at this site.' % self.timeout
        else:
          if run != 0:
            resDict[ 'Status' ] = 'Busy'
            resDict[ 'Log' ] = 'Site %s is too busy to execute this test job, job status is %s' % ( site, status )
          else:
            resDict[ 'Status' ] = 'Unknown'
            resDict[ 'Log' ] = 'Test did not complete within the timeout of %d seconds, job status is %s' % ( self.timeout, status )
        self.dirac.kill( jobID )

    if not isFinish:
      return S_OK()
    else:
      return S_OK( resDict )
  def export_getSiteVO(self, siteName):
    """
    Returns the VO for the given site.
    """

    gLogger.info('getSiteVO')
    
    vos = BESUtils.getSiteVO( siteName )
    return S_OK( vos )
Beispiel #5
0
  def doTest( self, elementDict ):
    """
      Test upload and download for specified SE.
    """

    elementName = elementDict[ 'ElementName' ]
    vo = elementDict[ 'VO' ]

    testFilePath = self.__localPath + self.__testFile
    if not os.path.exists( testFilePath ) or not os.path.isfile( testFilePath ):
      f = open( testFilePath, 'w' )
      f.write( 'hello' )
      f.close()

    status = 'OK'
    log = ''
    lfnPath = self.__lfnPath.format(vo=vo) + elementName + '-' + self.__testFile
    submissionTime = datetime.utcnow().replace( microsecond = 0 )

    proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo )
    if not proxyPath[ 'OK' ]:
      gLogger.error('Can not get proxy for VO %s' % vo)
      return proxyPath
    proxyPath = proxyPath[ 'Value' ]

    env_test = os.environ.copy()
    env_test[ 'X509_USER_PROXY' ] = proxyPath
    cmd = [os.path.join(self.__scriptPath, self.__scriptName), '-o', '/DIRAC/Security/UseServerCertificate=no', lfnPath, testFilePath, elementName]
    result = systemCall(300, cmd, env=env_test)
    print result
    if not result['OK']:
      status = 'Bad'
      log += 'Call %s failed: %s' % (self.__scriptName, result['Message'])
    elif result['Value'][0] != 0:
      status = 'Bad'
      log += '%s exit with error %s:\n%s' % (self.__scriptName, result['Value'][0], result['Value'][1])
    else:
      log += '%s exit successfully:\n%s' % (self.__scriptName, result['Value'][1])

    completionTime = datetime.utcnow().replace( microsecond = 0 )
    applicationTime = ( completionTime - submissionTime ).total_seconds()

    result = { 'Result' : { 'Status' : status,
                            'Log' : log,
                            'SubmissionTime' : submissionTime,
                            'CompletionTime' : completionTime,
                            'ApplicationTime' : applicationTime },
               'Finish' : True }

#    if os.path.exists( testFilePath ) and os.path.isfile( testFilePath ):
#      os.remove( testFilePath )
    localFile = self.__localPath + elementName +'-' + self.__testFile
    if os.path.exists( localFile ) and os.path.isfile( localFile ):
      os.remove( localFile )

    return S_OK( result )
Beispiel #6
0
  def execute(self):
    """
      The main method of the agent. It get elements which need to be tested and
      evaluated from CS. Then it instantiates TestExecutor and StatusEvaluate and
      calls their main method to finish all the work.
    """

    from BESDIRAC.ResourceStatusSystem.SAM.SAMTest import TestConfiguration
    self.tests = TestConfiguration.TESTS
    self.__loadTestObj()

    self.testExecutor = TestExecutor( self.tests, self.apis )
    self.statusEvaluator = StatusEvaluator( self.apis )

    elements = []
    sitesCEs = {}

    # CE tests
    noTestSites = [ site.strip() for site in self.am_getOption( 'noTestSite', '' ).split( ',' ) if site != '' ]
    diracAdmin = DiracAdmin()
    activeSites = diracAdmin.getSiteMask()
#    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
#    activeSites = wmsAdmin.getSiteMask()
    if not activeSites[ 'OK' ]:
      return activeSites
    activeSites = [ site for site in activeSites[ 'Value' ] if site not in noTestSites ]
    gLogger.info('Active sites: %s', activeSites)

    for siteName in activeSites:
      domain = siteName.split('.')[ 0 ]
      vos = BESUtils.getSiteVO( siteName )
      if 'CLOUD' != domain:
        siteCEs = CSHelpers.getSiteComputingElements( siteName )
        sitesCEs[ siteName ] = siteCEs
        for ce in siteCEs:
          elements.append( { 'ElementName' : ce,
                                                  'ElementType' : 'ComputingElement',
                                                  'VO' : vos } )
          gLogger.debug("List of elements: %s" % ce)

      else:
        sitesCEs[ siteName ] = [ siteName ]
        elements.append( { 'ElementName' : siteName,
                                                'ElementType' : 'CLOUD',
                                                'VO' : vos } )

    # SE tests
    ses = gConfig.getValue( 'Resources/StorageElementGroups/SE-USER' )
    for se in ses.split( ', ' ):
      seSites = BESUtils.getSitesForSE( se )
      for seSite in seSites:
        gLogger.debug( 'Site for SE %s: %s' % (se, seSite) )
        if seSite not in activeSites:
          continue
        vos = BESUtils.getSiteVO( seSite )
        gLogger.debug( 'vos for SE %s under site %s: %s' % (se, seSite, vos) )
        if len(vos) == 0:
          continue
        vo = vos[0]
        elements.append( { 'ElementName' : se,
                                              'ElementType' : 'StorageElement',
                                              'VO' : vo } )
        gLogger.info( 'VO for SE %s: %s' % ( se, vo ) )
        break

    lastCheckTime = datetime.utcnow().replace(microsecond = 0)
    self.elementsStatus = {}

    threads = []
    for elementDict in elements:
      t = threading.Thread( target = self._execute, args = ( elementDict, ) )
      threads.append( t )
      t.start()

    for thread in threads:
      thread.join()

    for siteName in activeSites:
      seList = CSHelpers.getSiteStorageElements( siteName )
      se = ''
      if [] != seList:
        se = seList[ 0 ]
      try:
        seStatus = self.elementsStatus[ se ][ 'all' ]
      except KeyError:
        seStatus = None

      voStatus = { 'all' : [] }
      for ce in sitesCEs[ siteName ]:
        if not self.elementsStatus.has_key( ce ):
          continue
        for vo, status in self.elementsStatus[ ce ].items():
          if vo not in voStatus:
            voStatus[ vo ] = []
          voStatus[ vo ].append( status )

      for vo, ceStatusList in voStatus.items():
        if ceStatusList == [] and seStatus == None:
          continue
        res = self.statusEvaluator.evaluateSiteStatus( siteName, ceStatusList, seStatus, vo = vo, lastCheckTime = lastCheckTime)
        if not res[ 'OK' ]:
          gLogger.error( 'StatusEvaluator.evaluateSiteStatus: %s' % res[ 'Message' ] )
          break

    return S_OK()
Beispiel #7
0
  def execute(self):
    """ 
      The main method of the agent. It get elements which need to be tested and 
      evaluated from CS. Then it instantiates TestExecutor and StatusEvaluate and 
      calls their main method to finish all the work.
    """
    
    elements = []
    sitesCEs = {}

    ses = gConfig.getValue( 'Resources/StorageElementGroups/SE-USER' )
    for se in ses.split( ', ' ):
      elements.append( { 'ElementName' : se, 
                                              'ElementType' : 'StorageElement' } )    
    
    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    activeSites = wmsAdmin.getSiteMask()
    if not activeSites[ 'OK' ]:
      return activeSites
    activeSites = activeSites[ 'Value' ]

    for siteName in activeSites:
      domain = siteName.split('.')[ 0 ]
      vos = BESUtils.getSiteVO( siteName )
      if 'CLOUD' != domain:
        siteCEs = CSHelpers.getSiteComputingElements( siteName )
        sitesCEs[ siteName ] = siteCEs
        for ce in siteCEs:
          elements.append( { 'ElementName' : ce, 
                                                  'ElementType' : 'ComputingElement',
                                                  'VO' : vos } )
      else:
        sitesCEs[ siteName ] = [ siteName ] 
        elements.append( { 'ElementName' : siteName,
                                                'ElementType' : 'CLOUD',
                                                'VO' : vos } )
        
    lastCheckTime = datetime.utcnow().replace(microsecond = 0)
    self.elementsStatus = {}

    threads = []
    for elementDict in elements:
      t = threading.Thread( target = self._execute, args = ( elementDict, ) )
      threads.append( t )
      t.start()
      
    for thread in threads:
      thread.join()

    for siteName in activeSites:
      seList = CSHelpers.getSiteStorageElements( siteName )
      se = ''
      if [] != seList:
        se = seList[ 0 ]
      try:
        seStatus = self.elementsStatus[ se ][ 'all' ]
      except KeyError:
        seStatus = None

      voStatus = { 'all' : [] }
      for ce in sitesCEs[ siteName ]:
        if not self.elementsStatus.has_key( ce ):
          continue
        for vo, status in self.elementsStatus[ ce ].items():
          if vo not in voStatus:
            voStatus[ vo ] = []
          voStatus[ vo ].append( status )
          
      for vo, ceStatusList in voStatus.items():
        if ceStatusList == [] and seStatus == None:
          continue
        res = self.statusEvaluator.evaluateSiteStatus( siteName, ceStatusList, seStatus, vo = vo, lastCheckTime = lastCheckTime)
        if not res[ 'OK' ]:
          gLogger.error( 'StatusEvaluator.evaluateSiteStatus: %s' % res[ 'Message' ] )
          break
        
    return S_OK()