コード例 #1
0
ファイル: GetSRMFile.py プロジェクト: LCDsoft/ILCDIRAC
  def execute(self):
    """ Run this.
    """
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      LOG.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK']))
      return S_OK('Workflow status is not OK')
    result = self.resolveInputVariables()
    if not result['OK']:
      LOG.error("Failed to resolve input parameters:", result["Message"])
      return result
    if not self.srmfiles:
      LOG.error('Files txt was not found correctly: %s' % self.srmfiles)
      return S_ERROR('Files txt was not found correctly')
    
    if not isinstance( self.files[0], dict ):
      LOG.error('Files were not found correctly: %s' % self.files)
      return S_ERROR('Files were not found correctly')

    ##Now need to check that there are not that many concurrent jobs getting the overlay at the same time
    max_concurrent_running = self.ops.getValue('/GetSRM/MaxConcurrentRunning', 100)
    error_count = 0
    while 1:
      if error_count > 10 :
        LOG.error('JobDB Content does not return expected dictionary')
        return S_ERROR('Failed to get number of concurrent overlay jobs')
      jobMonitor = JobMonitoringClient()
      res = jobMonitor.getCurrentJobCounters({'ApplicationStatus':'Downloading SRM files'})
      if not res['OK']:
        error_count += 1 
        time.sleep(60)
        continue
      running = 0
      if 'Running' in res['Value']:
        running = res['Value']['Running']
      if running < max_concurrent_running:
        break
      else:
        time.sleep(60)        

    self.setApplicationStatus('Downloading SRM files')
    for filed in self.files:
      if 'file' not in filed or 'site' not in filed:
        LOG.error('Dictionnary does not contain correct keys')
        return S_ERROR('Dictionnary does not contain correct keys')
      start = os.getcwd()
      downloadDir = tempfile.mkdtemp(prefix = 'InputData_%s' % (self.counter), dir = start)
      os.chdir(downloadDir)
      storageElement = StorageElement( filed['site'] )
      result = storageElement.getFile( filed['file'] )
      if result['Value']['Failed']:
        result = storageElement.getFile( filed['file'] )
      os.chdir(start)
      if result['Value']['Failed']:
        LOG.error("Failed to get the file from storage:", result['Value']['Failed'])
        return result
      self.counter += 1
      
       
    return S_OK()
コード例 #2
0
ファイル: ResetCounters.py プロジェクト: hamzazafar/ILCDIRAC
class ResetCounters(AgentModule):
    """ Reset the number of jobs at all sites: some sites are not updated properly, so 
  once in a while it's needed to restore the correct number of jobs.
  It does not need to be exact, but enough to clear some of the jobs.
  """
    def initialize(self):
        """ Initialize the agent.
    """
        self.am_setOption("PollingTime", 60)
        self.ovc = OverlaySystemClient()
        self.jobmon = JobMonitoringClient()
        return S_OK()

    def execute(self):
        """ This is called by the Agent Reactor
    """
        res = self.ovc.getSites()
        if not res['OK']:
            return res
        sitedict = {}
        sites = res['Value']
        gLogger.info("Will update info for sites %s" % sites)
        for site in sites:
            attribdict = {
                "Site": site,
                "ApplicationStatus": 'Getting overlay files'
            }
            res = self.jobmon.getCurrentJobCounters(attribdict)
            if not res['OK']:
                continue
            if 'Running' in res['Value']:
                sitedict[site] = res['Value']['Running']
            else:
                sitedict[site] = 0
        gLogger.info("Setting new values %s" % sitedict)
        res = self.ovc.setJobsAtSites(sitedict)
        if not res['OK']:
            gLogger.error(res['Message'])
            return res

        return S_OK()
コード例 #3
0
class ResetCounters ( AgentModule ):
  """ Reset the number of jobs at all sites: some sites are not updated properly, so 
  once in a while it's needed to restore the correct number of jobs.
  It does not need to be exact, but enough to clear some of the jobs.
  """
  def initialize(self):
    """ Initialize the agent.
    """
    self.am_setOption( "PollingTime", 60 )
    self.ovc = OverlaySystemClient()
    self.jobmon = JobMonitoringClient()
    return S_OK()
  
  def execute(self):
    """ This is called by the Agent Reactor
    """
    res = self.ovc.getSites()
    if not res['OK']:
      return res
    sitedict = {}
    sites = res['Value']
    gLogger.info("Will update info for sites %s" % sites)
    for site in sites:
      attribdict = {"Site" : site, "ApplicationStatus": 'Getting overlay files'}
      res = self.jobmon.getCurrentJobCounters(attribdict)
      if not res['OK']:
        continue
      if res['Value'].has_key('Running'):
        sitedict[site] = res['Value']['Running']
      else:
        sitedict[site] = 0
    gLogger.info("Setting new values %s" % sitedict)    
    res = self.ovc.setJobsAtSites(sitedict)
    if not res['OK']:
      gLogger.error(res['Message'])
      return res
    
    return S_OK()
コード例 #4
0
    def test_JobStateUpdateAndJobMonitoringMultuple(self):
        """ # Now, let's submit some jobs. Different sites, types, inputs
    """
        wmsClient = WMSClient()
        jobMonitor = JobMonitoringClient()
        jobStateUpdate = RPCClient('WorkloadManagement/JobStateUpdate')

        jobIDs = []
        dests = ['DIRAC.site1.org', 'DIRAC.site2.org']
        lfnss = [['/a/1.txt', '/a/2.txt'],
                 ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []]
        types = ['User', 'Test']
        for dest in dests:
            for lfns in lfnss:
                for jobType in types:
                    job = helloWorldJob()
                    job.setDestination(dest)
                    job.setInputData(lfns)
                    job.setType(jobType)
                    jobDescription = createFile(job)
                    res = wmsClient.submitJob(
                        job._toJDL(xmlFile=jobDescription))
                    self.assert_(res['OK'])
                    jobID = res['Value']
                    jobIDs.append(jobID)

        res = jobMonitor.getSites()
        self.assert_(res['OK'])
        self.assert_(
            set(res['Value']) <= set(dests + ['ANY', 'DIRAC.Jenkins.ch']))
        res = jobMonitor.getJobTypes()
        self.assert_(res['OK'])
        self.assertEqual(sorted(res['Value']), sorted(types))
        res = jobMonitor.getApplicationStates()
        self.assert_(res['OK'])
        self.assertEqual(sorted(res['Value']), sorted(['Unknown']))

        res = jobMonitor.getOwners()
        self.assert_(res['OK'])
        res = jobMonitor.getOwnerGroup()
        self.assert_(res['OK'])
        res = jobMonitor.getProductionIds()
        self.assert_(res['OK'])
        res = jobMonitor.getJobGroups()
        self.assert_(res['OK'])
        res = jobMonitor.getStates()
        self.assert_(res['OK'])
        self.assert_(
            sorted(res['Value']) in [['Received'],
                                     sorted(['Received', 'Waiting'])])
        res = jobMonitor.getMinorStates()
        self.assert_(res['OK'])
        self.assert_(
            sorted(res['Value']) in [['Job accepted'],
                                     sorted(['Job accepted', 'matching'])])
        self.assert_(res['OK'])
        res = jobMonitor.getJobs()
        self.assert_(res['OK'])
        self.assert_(set([str(x) for x in jobIDs]) <= set(res['Value']))
        #     res = jobMonitor.getCounters(attrList)
        #     self.assert_( res['OK'] )
        res = jobMonitor.getCurrentJobCounters()
        self.assert_(res['OK'])
        try:
            self.assert_(
                res['Value'].get('Received') + res['Value'].get('Waiting') >=
                long(len(dests) * len(lfnss) * len(types)))
        except TypeError:
            pass
        res = jobMonitor.getJobsSummary(jobIDs)
        self.assert_(res['OK'])
        res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100)
        self.assert_(res['OK'])

        res = jobStateUpdate.setJobStatusBulk(
            jobID, {
                str(datetime.datetime.utcnow()): {
                    'Status': 'Running',
                    'MinorStatus': 'MinorStatus',
                    'ApplicationStatus': 'ApplicationStatus',
                    'Source': 'Unknown'
                }
            })
        self.assert_(res['OK'])
        res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']})
        self.assert_(res['OK'])

        # delete the jobs - this will just set its status to "deleted"
        wmsClient.deleteJob(jobIDs)
コード例 #5
0
ファイル: TestClientWMS.py プロジェクト: avedaee/TestDIRAC
  def test_JobStateUpdateAndJobMonitoringMultuple( self ):
    """ # Now, let's submit some jobs. Different sites, types, inputs
    """
    wmsClient = WMSClient()
    jobMonitor = JobMonitoringClient()
    jobStateUpdate = RPCClient( 'WorkloadManagement/JobStateUpdate' )

    jobIDs = []
    dests = ['DIRAC.site1.org', 'DIRAC.site2.org']
    lfnss = [['/a/1.txt', '/a/2.txt'], ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []]
    types = ['User', 'Test']
    for dest in dests:
      for lfns in lfnss:
        for jobType in types:
          job = helloWorldJob()
          job.setDestination( dest )
          job.setInputData( lfns )
          job.setType( jobType )
          jobDescription = createFile( job )
          res = wmsClient.submitJob( job._toJDL( xmlFile = jobDescription ) )
          self.assert_( res['OK'] )
          jobID = res['Value']
          jobIDs.append( jobID )

    res = jobMonitor.getSites()
    self.assert_( res['OK'] )
    self.assert_( set( res['Value'] ) <= set( dests + ['ANY', 'DIRAC.Jenkins.org'] ) )
    res = jobMonitor.getJobTypes()
    self.assert_( res['OK'] )
    self.assertEqual( sorted( res['Value'] ), sorted( types ) )
    res = jobMonitor.getApplicationStates()
    self.assert_( res['OK'] )
    self.assertEqual( sorted( res['Value'] ), sorted( ['Unknown'] ) )

    res = jobMonitor.getOwners()
    self.assert_( res['OK'] )
    res = jobMonitor.getOwnerGroup()
    self.assert_( res['OK'] )
    res = jobMonitor.getProductionIds()
    self.assert_( res['OK'] )
    res = jobMonitor.getJobGroups()
    self.assert_( res['OK'] )
    res = jobMonitor.getStates()
    self.assert_( res['OK'] )
    self.assert_( sorted( res['Value'] ) in [['Received'], sorted( ['Received', 'Waiting'] )] )
    res = jobMonitor.getMinorStates()
    self.assert_( res['OK'] )
    self.assert_( sorted( res['Value'] ) in [['Job accepted'], sorted( ['Job accepted', 'matching'] ) ] )
    self.assert_( res['OK'] )
    res = jobMonitor.getJobs()
    self.assert_( res['OK'] )
    self.assert_( set( [str( x ) for x in jobIDs] ) <= set( res['Value'] ) )
#     res = jobMonitor.getCounters(attrList)
#     self.assert_( res['OK'] )
    res = jobMonitor.getCurrentJobCounters()
    self.assert_( res['OK'] )
    try:
      self.assert_( res['Value'].get( 'Received' ) + res['Value'].get( 'Waiting' ) >= long( len( dests ) * len( lfnss ) * len( types ) ) )
    except TypeError:
      pass
    res = jobMonitor.getJobsSummary( jobIDs )
    self.assert_( res['OK'] )
    res = jobMonitor.getJobPageSummaryWeb( {}, [], 0, 100 )
    self.assert_( res['OK'] )

    res = jobStateUpdate.setJobStatusBulk( jobID, {str( datetime.datetime.utcnow() ):{'Status': 'Running',
                                                                                      'MinorStatus': 'MinorStatus',
                                                                                      'ApplicationStatus': 'ApplicationStatus',
                                                                                      'Source': 'Unknown'}} )
    self.assert_( res['OK'] )
    res = jobStateUpdate.setJobsParameter( {jobID:['Status', 'Running']} )
    self.assert_( res['OK'] )

    # delete the jobs - this will just set its status to "deleted"
    wmsClient.deleteJob( jobIDs )
コード例 #6
0
    def test_JobStateUpdateAndJobMonitoringMultuple(self):
        """ # Now, let's submit some jobs. Different sites, types, inputs
    """
        wmsClient = WMSClient()
        jobMonitor = JobMonitoringClient()
        jobStateUpdate = JobStateUpdateClient()

        jobIDs = []
        lfnss = [['/a/1.txt', '/a/2.txt'],
                 ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []]
        types = ['User', 'Test']
        for lfns in lfnss:
            for jobType in types:
                job = helloWorldJob()
                job.setDestination('DIRAC.Jenkins.ch')
                job.setInputData(lfns)
                job.setType(jobType)
                jobDescription = createFile(job)
                res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription))
                self.assertTrue(res['OK'], res.get('Message'))
                jobID = res['Value']
            jobIDs.append(jobID)

        res = jobMonitor.getSites()
        print(res)
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertTrue(set(res['Value']) <= {'ANY', 'DIRAC.Jenkins.ch'},
                        msg="Got %s" % res['Value'])
        res = jobMonitor.getJobTypes()
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertEqual(sorted(res['Value']),
                         sorted(types),
                         msg="Got %s" % str(sorted(res['Value'])))
        res = jobMonitor.getApplicationStates()
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertEqual(sorted(res['Value']),
                         sorted(['Unknown']),
                         msg="Got %s" % sorted(str(res['Value'])))

        res = jobMonitor.getOwners()
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getOwnerGroup()
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getProductionIds()
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getJobGroups()
        self.assertTrue(res['OK'], res.get('Message'))
        resJG_empty = res['Value']
        res = jobMonitor.getJobGroups(None, datetime.datetime.utcnow())
        self.assertTrue(res['OK'], res.get('Message'))
        resJG_olderThanNow = res['Value']
        self.assertEqual(resJG_empty, resJG_olderThanNow)
        res = jobMonitor.getJobGroups(
            None,
            datetime.datetime.utcnow() - datetime.timedelta(days=365))
        self.assertTrue(res['OK'], res.get('Message'))
        resJG_olderThanOneYear = res['Value']
        self.assertTrue(
            set(resJG_olderThanOneYear).issubset(set(resJG_olderThanNow)))
        res = jobMonitor.getStates()
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertTrue(
            sorted(res['Value']) in [['Received'],
                                     sorted(['Received', 'Waiting'])])
        res = jobMonitor.getMinorStates()
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertTrue(
            sorted(res['Value']) in [['Job accepted'],
                                     sorted(
                                         ['Job accepted', 'Job Rescheduled'])])
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getJobs()
        self.assertTrue(res['OK'], res.get('Message'))
        self.assertTrue(set([str(x) for x in jobIDs]) <= set(res['Value']))
        #     res = jobMonitor.getCounters(attrList)
        #     self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getCurrentJobCounters()
        self.assertTrue(res['OK'], res.get('Message'))
        try:
            self.assertTrue(
                res['Value'].get('Received') +
                res['Value'].get('Waiting') >= int(len(lfnss) * len(types)))
        except TypeError:
            pass
        res = jobMonitor.getJobsSummary(jobIDs)
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100)
        self.assertTrue(res['OK'], res.get('Message'))

        res = jobStateUpdate.setJobStatusBulk(
            jobID, {
                str(datetime.datetime.utcnow()): {
                    'Status': 'Running',
                    'MinorStatus': 'MinorStatus',
                    'ApplicationStatus': 'ApplicationStatus',
                    'Source': 'Unknown'
                }
            })
        self.assertTrue(res['OK'], res.get('Message'))
        res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']})
        self.assertTrue(res['OK'], res.get('Message'))

        # delete the jobs - this will just set its status to "deleted"
        wmsClient.deleteJob(jobIDs)