コード例 #1
0
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        self.vmTypeDict = {}
        self.vmTypeCECache = {}
        self.vmTypeSlots = {}
        self.failedVMTypes = defaultdict(int)
        self.firstPass = True

        self.vo = ''
        self.group = ''
        # self.voGroups contain all the eligible user groups for clouds submitted by this SiteDirector
        self.voGroups = []
        self.cloudDN = ''
        self.cloudGroup = ''
        self.platforms = []
        self.sites = []
        self.wmsClient = WMSAdministratorClient()

        self.proxy = None

        self.updateStatus = True
        self.getOutput = False
        self.sendAccounting = True
コード例 #2
0
ファイル: JobCommand.py プロジェクト: DIRACGrid/DIRAC
    def __init__(self, args=None, clients=None):

        super(JobsWMSCommand, self).__init__(args, clients)

        if "WMSAdministrator" in self.apis:
            self.wmsAdmin = self.apis["WMSAdministrator"]
        else:
            self.wmsAdmin = WMSAdministratorClient()
コード例 #3
0
ファイル: SiteStatus.py プロジェクト: DIRACGrid/DIRAC
  def getSiteStatuses(self, siteNames=None):
    """
    Method that queries the database for status of the sites in a given list.
    A single string site name may also be provides as "siteNames"
    If the input is None, it is interpreted as * ( all ).

    If match is positive, the output looks like::

      {
       'test1.test1.org': 'Active',
       'test2.test2.org': 'Banned',
      }

    Examples::

      >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' }  )
      >>> siteStatus.getSiteStatuses( 'NotExists')
          S_ERROR( ... ))
      >>> siteStatus.getSiteStatuses( None )
          S_OK( { 'test1.test1.org': 'Active',
                  'test2.test2.net': 'Banned', },
                  ...
                }
              )

    :param siteNames: name(s) of the sites to be matched
    :type siteNames: list, str
    :return: S_OK() || S_ERROR()
    """

    if self.rssFlag:
      return self.__getRSSSiteStatus(siteNames)
    else:
      siteStatusDict = {}
      wmsAdmin = WMSAdministratorClient()
      if siteNames:
        if isinstance(siteNames, basestring):
          siteNames = [siteNames]
        for siteName in siteNames:
          result = wmsAdmin.getSiteMaskStatus(siteName)
          if not result['OK']:
            return result
          else:
            siteStatusDict[siteName] = result['Value']
      else:
        result = wmsAdmin.getSiteMaskStatus()
        if not result['OK']:
          return result
        else:
          siteStatusDict = result['Value']

      return S_OK(siteStatusDict)
コード例 #4
0
ファイル: SiteStatus.py プロジェクト: rob-c/DIRAC
    def getSiteStatuses(self, siteNames=None):
        """
    Method that queries the database for status of the sites in a given list.
    A single string site name may also be provides as "siteNames"
    If the input is None, it is interpreted as * ( all ).

    If match is positive, the output looks like::

      {
       'test1.test1.org': 'Active',
       'test2.test2.org': 'Banned',
      }

    Examples::

      >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] )
          S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' }  )
      >>> siteStatus.getSiteStatuses( 'NotExists')
          S_ERROR( ... ))
      >>> siteStatus.getSiteStatuses( None )
          S_OK( { 'test1.test1.org': 'Active',
                  'test2.test2.net': 'Banned', },
                  ...
                }
              )

    :param siteNames: name(s) of the sites to be matched
    :type siteNames: list, str
    :return: S_OK() || S_ERROR()
    """

        if self.rssFlag:
            return self.__getRSSSiteStatus(siteNames)
        else:
            siteStatusDict = {}
            wmsAdmin = WMSAdministratorClient()
            if siteNames:
                if isinstance(siteNames, six.string_types):
                    siteNames = [siteNames]
                for siteName in siteNames:
                    result = wmsAdmin.getSiteMaskStatus(siteName)
                    if not result['OK']:
                        return result
                    else:
                        siteStatusDict[siteName] = result['Value']
            else:
                result = wmsAdmin.getSiteMaskStatus()
                if not result['OK']:
                    return result
                else:
                    siteStatusDict = result['Value']

            return S_OK(siteStatusDict)
コード例 #5
0
    def __init__(self, args=None, clients=None):

        super(PilotCommand, self).__init__(args, clients)

        if 'WMSAdministrator' in self.apis:
            self.wmsAdmin = self.apis['WMSAdministrator']
        else:
            self.wmsAdmin = WMSAdministratorClient()

        if 'ResourceManagementClient' in self.apis:
            self.rmClient = self.apis['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()
コード例 #6
0
ファイル: JobCommand.py プロジェクト: DIRACGrid/DIRAC
    def __init__(self, args=None, clients=None):

        super(JobCommand, self).__init__(args, clients)

        if "WMSAdministrator" in self.apis:
            self.wmsAdmin = self.apis["WMSAdministrator"]
        else:
            self.wmsAdmin = WMSAdministratorClient()

        if "ResourceManagementClient" in self.apis:
            self.rmClient = self.apis["ResourceManagementClient"]
        else:
            self.rmClient = ResourceManagementClient()
コード例 #7
0
def initSites():
    '''
    Initializes Sites statuses taking their values from the "SiteMask" table of "JobDB" database.
  '''

    rssClient = ResourceStatusClient.ResourceStatusClient()

    sites = WMSAdministratorClient().getAllSiteMaskStatus()

    if not sites['OK']:
        subLogger.error(sites['Message'])
        DIRACExit(1)

    for site, elements in sites['Value'].iteritems():
        result = rssClient.addOrModifyStatusElement(
            "Site",
            "Status",
            name=site,
            statusType='all',
            status=elements[0],
            elementType=site.split('.')[0],
            tokenOwner='rs_svc',
            reason='dirac-rss-sync')
        if not result['OK']:
            subLogger.error(result['Message'])
            DIRACExit(1)

    return S_OK()
コード例 #8
0
    def initialize(self):
        """Sets defaults
    """

        self.am_setOption('PollingTime', 120)
        self.am_setOption('GridEnv', '')
        self.am_setOption('PilotStalledDays', 3)
        self.pilotDB = PilotAgentsDB()
        self.diracadmin = DiracAdmin()
        self.jobDB = JobDB()
        self.clearPilotsDelay = self.am_getOption('ClearPilotsDelay', 30)
        self.clearAbortedDelay = self.am_getOption('ClearAbortedPilotsDelay',
                                                   7)
        self.WMSAdministrator = WMSAdministratorClient()

        return S_OK()
コード例 #9
0
    def banSite(self, site, comment, printOutput=False):
        """Removes the site from the site mask.

       Example usage:

         >>> gLogger.notice(diracAdmin.banSite())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        mask = self.getSiteMask(status='Banned')
        if not mask['OK']:
            return mask
        siteMask = mask['Value']
        if site in siteMask:
            if printOutput:
                gLogger.notice('Site %s is already Banned' % site)
            return S_OK('Site %s is already Banned' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Banned', comment)
        else:
            result = WMSAdministratorClient().banSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            gLogger.notice('Site %s status is set to Banned' % site)

        return result
コード例 #10
0
ファイル: dirac_rss_sync.py プロジェクト: TaykYoku/DIRAC
def initSites():
    """
    Initializes Sites statuses taking their values from the "SiteMask" table of "JobDB" database.
    """
    from DIRAC.WorkloadManagementSystem.Client.WMSAdministratorClient import WMSAdministratorClient
    from DIRAC.ResourceStatusSystem.Client import ResourceStatusClient

    rssClient = ResourceStatusClient.ResourceStatusClient()

    sites = WMSAdministratorClient().getAllSiteMaskStatus()

    if not sites["OK"]:
        subLogger.error(sites["Message"])
        DIRACExit(1)

    for site, elements in sites["Value"].items():
        result = rssClient.addOrModifyStatusElement(
            "Site",
            "Status",
            name=site,
            statusType="all",
            status=elements[0],
            elementType=site.split(".")[0],
            tokenOwner="rs_svc",
            reason="dirac-rss-sync",
        )
        if not result["OK"]:
            subLogger.error(result["Message"])
            DIRACExit(1)

    return S_OK()
コード例 #11
0
    def allowSite(self, site, comment, printOutput=False):
        """Adds the site to the site mask.

       Example usage:

         >>> gLogger.notice(diracAdmin.allowSite())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        result = self.getSiteMask(status='Active')
        if not result['OK']:
            return result
        siteMask = result['Value']
        if site in siteMask:
            if printOutput:
                gLogger.notice('Site %s is already Active' % site)
            return S_OK('Site %s is already Active' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Active', comment)
        else:
            result = WMSAdministratorClient().allowSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            gLogger.notice('Site %s status is set to Active' % site)

        return result
コード例 #12
0
ファイル: CacheFeederAgent.py プロジェクト: sparsh35/DIRAC
  def initialize(self):
    """ Define the commands to be executed, and instantiate the clients that will be used.
    """

    res = ObjectLoader().loadObject('DIRAC.ResourceStatusSystem.Client.ResourceStatusClient',
                                    'ResourceStatusClient')
    if not res['OK']:
      self.log.error('Failed to load ResourceStatusClient class: %s' % res['Message'])
      return res
    rsClass = res['Value']

    res = ObjectLoader().loadObject('DIRAC.ResourceStatusSystem.Client.ResourceManagementClient',
                                    'ResourceManagementClient')
    if not res['OK']:
      self.log.error('Failed to load ResourceManagementClient class: %s' % res['Message'])
      return res
    rmClass = res['Value']

    self.commands['Downtime'] = [{'Downtime': {}}]
    self.commands['GOCDBSync'] = [{'GOCDBSync': {}}]
    self.commands['FreeDiskSpace'] = [{'FreeDiskSpace': {}}]

    # PilotsCommand
#    self.commands[ 'Pilots' ] = [
#                                 { 'PilotsWMS' : { 'element' : 'Site', 'siteName' : None } },
#                                 { 'PilotsWMS' : { 'element' : 'Resource', 'siteName' : None } }
#                                 ]

    # FIXME: do not forget about hourly vs Always ...etc
    # AccountingCacheCommand
#    self.commands[ 'AccountingCache' ] = [
#                                          {'SuccessfullJobsBySiteSplitted'    :{'hours' :24, 'plotType' :'Job' }},
#                                          {'FailedJobsBySiteSplitted'         :{'hours' :24, 'plotType' :'Job' }},
#                                          {'SuccessfullPilotsBySiteSplitted'  :{'hours' :24, 'plotType' :'Pilot' }},
#                                          {'FailedPilotsBySiteSplitted'       :{'hours' :24, 'plotType' :'Pilot' }},
#                                          {'SuccessfullPilotsByCESplitted'    :{'hours' :24, 'plotType' :'Pilot' }},
#                                          {'FailedPilotsByCESplitted'         :{'hours' :24, 'plotType' :'Pilot' }},
#                                          {'RunningJobsBySiteSplitted'        :{'hours' :24, 'plotType' :'Job' }},
# #                                          {'RunningJobsBySiteSplitted'        :{'hours' :168, 'plotType' :'Job' }},
# #                                          {'RunningJobsBySiteSplitted'        :{'hours' :720, 'plotType' :'Job' }},
# #                                          {'RunningJobsBySiteSplitted'        :{'hours' :8760, 'plotType' :'Job' }},
#                                          ]

    # VOBOXAvailability
#    self.commands[ 'VOBOXAvailability' ] = [
#                                            { 'VOBOXAvailability' : {} }
#

    # Reuse clients for the commands
    self.clients['GOCDBClient'] = GOCDBClient()
    self.clients['ReportsClient'] = ReportsClient()
    self.clients['ResourceStatusClient'] = rsClass()
    self.clients['ResourceManagementClient'] = rmClass()
    self.clients['WMSAdministrator'] = WMSAdministratorClient()
    self.clients['Pilots'] = PilotManagerClient()

    self.cCaller = CommandCaller

    return S_OK()
コード例 #13
0
    def getPilotOutput(self, gridReference, directory=''):
        """Retrieve the pilot output  (std.out and std.err) for an existing job in the WMS.

         >>> print dirac.getJobPilotOutput(12345)
         {'OK': True, 'Value': {}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        result = WMSAdministratorClient().getPilotOutput(gridReference)
        if not result['OK']:
            return result

        gridReferenceSmall = gridReference.split('/')[-1]
        if not gridReferenceSmall:
            gridReferenceSmall = 'reference'
        outputPath = '%s/pilot_%s' % (directory, gridReferenceSmall)

        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if 'StdOut' in outputs:
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.info('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if 'StdErr' in outputs:
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdErr'])
            self.log.info('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result
コード例 #14
0
ファイル: PilotCommand.py プロジェクト: DIRACGrid/DIRAC
  def __init__(self, args=None, clients=None):

    super(PilotCommand, self).__init__(args, clients)

    if 'WMSAdministrator' in self.apis:
      self.wmsAdmin = self.apis['WMSAdministrator']
    else:
      self.wmsAdmin = WMSAdministratorClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis['ResourceManagementClient']
    else:
      self.rmClient = ResourceManagementClient()
コード例 #15
0
    def finalize(self):
        """ Job Agent finalization method
    """

        gridCE = gConfig.getValue('/LocalSite/GridCE', '')
        queue = gConfig.getValue('/LocalSite/CEQueue', '')
        result = WMSAdministratorClient().setPilotStatus(
            str(self.pilotReference), 'Done', gridCE, 'Report from JobAgent',
            self.siteName, queue)
        if not result['OK']:
            self.log.warn(result['Message'])

        return S_OK()
コード例 #16
0
    def initialize(self):
        """ Define the commands to be executed, and instantiate the clients that will be used.
    """

        self.am_setOption('shifterProxy', 'DataManager')

        self.rmClient = ResourceManagementClient()

        self.commands['Downtime'] = [{'Downtime': {}}]
        self.commands['GOCDBSync'] = [{'GOCDBSync': {}}]
        self.commands['FreeDiskSpace'] = [{'FreeDiskSpace': {}}]

        # PilotsCommand
        #    self.commands[ 'Pilots' ] = [
        #                                 { 'PilotsWMS' : { 'element' : 'Site', 'siteName' : None } },
        #                                 { 'PilotsWMS' : { 'element' : 'Resource', 'siteName' : None } }
        #                                 ]

        # FIXME: do not forget about hourly vs Always ...etc
        # AccountingCacheCommand
        #    self.commands[ 'AccountingCache' ] = [
        #                                          {'SuccessfullJobsBySiteSplitted'    :{'hours' :24, 'plotType' :'Job' }},
        #                                          {'FailedJobsBySiteSplitted'         :{'hours' :24, 'plotType' :'Job' }},
        #                                          {'SuccessfullPilotsBySiteSplitted'  :{'hours' :24, 'plotType' :'Pilot' }},
        #                                          {'FailedPilotsBySiteSplitted'       :{'hours' :24, 'plotType' :'Pilot' }},
        #                                          {'SuccessfullPilotsByCESplitted'    :{'hours' :24, 'plotType' :'Pilot' }},
        #                                          {'FailedPilotsByCESplitted'         :{'hours' :24, 'plotType' :'Pilot' }},
        #                                          {'RunningJobsBySiteSplitted'        :{'hours' :24, 'plotType' :'Job' }},
        # #                                          {'RunningJobsBySiteSplitted'        :{'hours' :168, 'plotType' :'Job' }},
        # #                                          {'RunningJobsBySiteSplitted'        :{'hours' :720, 'plotType' :'Job' }},
        # #                                          {'RunningJobsBySiteSplitted'        :{'hours' :8760, 'plotType' :'Job' }},
        #                                          ]

        # VOBOXAvailability
        #    self.commands[ 'VOBOXAvailability' ] = [
        #                                            { 'VOBOXAvailability' : {} }
        #

        # Reuse clients for the commands
        self.clients['GOCDBClient'] = GOCDBClient()
        self.clients['ReportGenerator'] = RPCClient(
            'Accounting/ReportGenerator')
        self.clients['ReportsClient'] = ReportsClient()
        self.clients['ResourceStatusClient'] = ResourceStatusClient()
        self.clients['ResourceManagementClient'] = ResourceManagementClient()
        self.clients['WMSAdministrator'] = WMSAdministratorClient()

        self.cCaller = CommandCaller

        return S_OK()
コード例 #17
0
    def killPilot(self, gridReference):
        """Kill the pilot specified

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        result = WMSAdministratorClient().killPilot(gridReference)
        return result
コード例 #18
0
    def getJobPilotOutput(self, jobID, directory=""):
        """Retrieve the pilot output for an existing job in the WMS.
        The output will be retrieved in a local directory unless
        otherwise specified.

          >>> gLogger.notice(dirac.getJobPilotOutput(12345))
          {'OK': True, StdOut:'',StdError:''}

        :param job: JobID
        :type job: integer or string
        :return: S_OK,S_ERROR
        """
        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport("Directory %s does not exist" % directory)

        result = WMSAdministratorClient().getJobPilotOutput(jobID)
        if not result["OK"]:
            return result

        outputPath = "%s/pilot_%s" % (directory, jobID)
        if os.path.exists(outputPath):
            self.log.info("Remove %s and retry to continue" % outputPath)
            return S_ERROR("Remove %s and retry to continue" % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose("Creating directory %s" % outputPath)
            os.mkdir(outputPath)

        outputs = result["Value"]
        if "StdOut" in outputs:
            stdout = "%s/std.out" % (outputPath)
            with open(stdout, "w") as fopen:
                fopen.write(outputs["StdOut"])
            self.log.verbose("Standard output written to %s" % (stdout))
        else:
            self.log.warn("No standard output returned")

        if "StdError" in outputs:
            stderr = "%s/std.err" % (outputPath)
            with open(stderr, "w") as fopen:
                fopen.write(outputs["StdError"])
            self.log.verbose("Standard error written to %s" % (stderr))
        else:
            self.log.warn("No standard error returned")

        self.log.always("Outputs retrieved in %s" % outputPath)
        return result
コード例 #19
0
    def getPilotLoggingInfo(self, gridReference):
        """Retrieve the pilot logging info for an existing job in the WMS.

         >>> print dirac.getPilotLoggingInfo(12345)
         {'OK': True, 'Value': {"The output of the command"}}

       :param gridReference: Gridp pilot job reference Id
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        return WMSAdministratorClient().getPilotLoggingInfo(gridReference)
コード例 #20
0
    def getJobPilotOutput(self, jobID, directory=''):
        """Retrieve the pilot output for an existing job in the WMS.
       The output will be retrieved in a local directory unless
       otherwise specified.

         >>> gLogger.notice(dirac.getJobPilotOutput(12345))
         {'OK': True, StdOut:'',StdError:''}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        result = WMSAdministratorClient().getJobPilotOutput(jobID)
        if not result['OK']:
            return result

        outputPath = '%s/pilot_%s' % (directory, jobID)
        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if 'StdOut' in outputs:
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.verbose('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if 'StdError' in outputs:
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdError'])
            self.log.verbose('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result
コード例 #21
0
    def getSiteMaskLogging(self, site=None, printOutput=False):
        """Retrieves site mask logging information.

       Example usage:

         >>> gLogger.notice(diracAdmin.getSiteMaskLogging('LCG.AUVER.fr'))
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        if self.rssFlag:
            result = ResourceStatusClient().selectStatusElement('Site',
                                                                'History',
                                                                name=site)
        else:
            result = WMSAdministratorClient().getSiteMaskLogging(site)

        if not result['OK']:
            return result

        if printOutput:
            if site:
                gLogger.notice('\nSite Mask Logging Info for %s\n' % site)
            else:
                gLogger.notice('\nAll Site Mask Logging Info\n')

            sitesLogging = result['Value']
            if isinstance(sitesLogging, dict):
                for siteName, tupleList in sitesLogging.items(
                ):  # can be an iterator
                    if not siteName:
                        gLogger.notice('\n===> %s\n' % siteName)
                    for tup in tupleList:
                        stup = str(tup[0]).ljust(8) + str(tup[1]).ljust(20)
                        stup += '( ' + str(tup[2]).ljust(len(str(
                            tup[2]))) + ' )  "' + str(tup[3]) + '"'
                        gLogger.notice(stup)
                    gLogger.notice(' ')
            elif isinstance(sitesLogging, list):
                sitesLoggingList = [(sl[1], sl[3], sl[4])
                                    for sl in sitesLogging]
                for siteLog in sitesLoggingList:
                    gLogger.notice(siteLog)

        return S_OK()
コード例 #22
0
ファイル: PilotStatusAgent.py プロジェクト: DIRACGrid/DIRAC
  def initialize(self):
    """Sets defaults
    """

    self.am_setOption('PollingTime', 120)
    self.am_setOption('GridEnv', '')
    self.am_setOption('PilotStalledDays', 3)
    self.pilotDB = PilotAgentsDB()
    self.diracadmin = DiracAdmin()
    self.jobDB = JobDB()
    self.clearPilotsDelay = self.am_getOption('ClearPilotsDelay', 30)
    self.clearAbortedDelay = self.am_getOption('ClearAbortedPilotsDelay', 7)
    self.WMSAdministrator = WMSAdministratorClient()

    return S_OK()
コード例 #23
0
    def getPilotInfo(self, gridReference):
        """Retrieve info relative to a pilot reference

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        result = WMSAdministratorClient().getPilotInfo(gridReference)
        return result
コード例 #24
0
    def getJobPilots(self, jobID):
        """Extract the list of submitted pilots and their status for a given
       jobID from the WMS.  Useful information is printed to the screen.

         >>> print dirac.getJobPilots()
         {'OK': True, 'Value': {PilotID:{StatusDict}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR

    """
        if isinstance(jobID, basestring):
            try:
                jobID = int(jobID)
            except Exception as x:
                return self._errorReport(
                    str(x), 'Expected integer or string for existing jobID')

        result = WMSAdministratorClient().getPilots(jobID)
        if result['OK']:
            print self.pPrint.pformat(result['Value'])
        return result
コード例 #25
0
    def __getJobPilotStatus(self, jobID):
        """ Get the job pilot status
"""
        result = JobMonitoringClient().getJobParameter(jobID,
                                                       'Pilot_Reference')
        if not result['OK']:
            return result
        pilotReference = result['Value'].get('Pilot_Reference')
        if not pilotReference:
            # There is no pilot reference, hence its status is unknown
            return S_OK('NoPilot')

        result = WMSAdministratorClient().getPilotInfo(pilotReference)
        if not result['OK']:
            if "No pilots found" in result['Message']:
                self.log.warn(result['Message'])
                return S_OK('NoPilot')
            self.log.error('Failed to get pilot information',
                           'for job %d: ' % jobID + result['Message'])
            return S_ERROR('Failed to get the pilot status')
        pilotStatus = result['Value'][pilotReference]['Status']

        return S_OK(pilotStatus)
コード例 #26
0
    def getPilotSummary(self, startDate='', endDate=''):
        """Retrieve the pilot output for an existing job in the WMS.  Summary is
       printed at INFO level, full dictionary of results also returned.

         >>> print dirac.getPilotSummary()
         {'OK': True, 'Value': {CE:{Status:Count}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        result = WMSAdministratorClient().getPilotSummary(startDate, endDate)
        if not result['OK']:
            return result

        ceDict = result['Value']
        headers = 'CE'.ljust(28)
        i = 0
        for ce, summary in ceDict.iteritems():
            states = summary.keys()
            if len(states) > i:
                i = len(states)

        for i in xrange(i):
            headers += 'Status'.ljust(12) + 'Count'.ljust(12)
        print headers

        for ce, summary in ceDict.iteritems():
            line = ce.ljust(28)
            states = sorted(summary)
            for state in states:
                count = str(summary[state])
                line += state.ljust(12) + count.ljust(12)
            print line

        return result
コード例 #27
0
ファイル: PilotStatusAgent.py プロジェクト: DIRACGrid/DIRAC
class PilotStatusAgent(AgentModule):
  """
      The specific agents must provide the following methods:
        - initialize() for initial settings
        - beginExecution()
        - execute() - the main method called in the agent cycle
        - endExecution()
        - finalize() - the graceful exit of the method, this one is usually used
                   for the agent restart
  """

  queryStateList = ['Ready', 'Submitted', 'Running', 'Waiting', 'Scheduled']
  finalStateList = ['Done', 'Aborted', 'Cleared', 'Deleted', 'Failed']

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)

    self.jobDB = None
    self.pilotDB = None
    self.diracadmin = None

  #############################################################################
  def initialize(self):
    """Sets defaults
    """

    self.am_setOption('PollingTime', 120)
    self.am_setOption('GridEnv', '')
    self.am_setOption('PilotStalledDays', 3)
    self.pilotDB = PilotAgentsDB()
    self.diracadmin = DiracAdmin()
    self.jobDB = JobDB()
    self.clearPilotsDelay = self.am_getOption('ClearPilotsDelay', 30)
    self.clearAbortedDelay = self.am_getOption('ClearAbortedPilotsDelay', 7)
    self.WMSAdministrator = WMSAdministratorClient()

    return S_OK()

  #############################################################################
  def execute(self):
    """The PilotAgent execution method.
    """

    self.pilotStalledDays = self.am_getOption('PilotStalledDays', 3)
    self.gridEnv = self.am_getOption('GridEnv')
    if not self.gridEnv:
      # No specific option found, try a general one
      setup = gConfig.getValue('/DIRAC/Setup', '')
      if setup:
        instance = gConfig.getValue('/DIRAC/Setups/%s/WorkloadManagement' % setup, '')
        if instance:
          self.gridEnv = gConfig.getValue('/Systems/WorkloadManagement/%s/GridEnv' % instance, '')
    result = self.pilotDB._getConnection()
    if result['OK']:
      connection = result['Value']
    else:
      return result

    # Now handle pilots not updated in the last N days (most likely the Broker is no
    # longer available) and declare them Deleted.
    result = self.handleOldPilots(connection)

    connection.close()

    result = self.WMSAdministrator.clearPilots(self.clearPilotsDelay, self.clearAbortedDelay)
    if not result['OK']:
      self.log.warn('Failed to clear old pilots in the PilotAgentsDB')

    return S_OK()

  def clearWaitingPilots(self, condDict):
    """ Clear pilots in the faulty Waiting state
    """

    last_update = Time.dateTime() - MAX_WAITING_STATE_LENGTH * Time.hour
    clearDict = {'Status': 'Waiting',
                 'OwnerDN': condDict['OwnerDN'],
                 'OwnerGroup': condDict['OwnerGroup'],
                 'GridType': condDict['GridType'],
                 'Broker': condDict['Broker']}
    result = self.pilotDB.selectPilots(clearDict, older=last_update)
    if not result['OK']:
      self.log.warn('Failed to get the Pilot Agents for Waiting state')
      return result
    if not result['Value']:
      return S_OK()
    refList = result['Value']

    for pilotRef in refList:
      self.log.info('Setting Waiting pilot to Stalled: %s' % pilotRef)
      result = self.pilotDB.setPilotStatus(pilotRef, 'Stalled', statusReason='Exceeded max waiting time')

    return S_OK()

  def clearParentJob(self, pRef, pDict, connection):
    """ Clear the parameteric parent job from the PilotAgentsDB
    """

    childList = pDict['ChildRefs']

    # Check that at least one child is in the database
    children_ok = False
    for child in childList:
      result = self.pilotDB.getPilotInfo(child, conn=connection)
      if result['OK']:
        if result['Value']:
          children_ok = True

    if children_ok:
      return self.pilotDB.deletePilot(pRef, conn=connection)
    else:
      self.log.verbose('Adding children for parent %s' % pRef)
      result = self.pilotDB.getPilotInfo(pRef)
      parentInfo = result['Value'][pRef]
      tqID = parentInfo['TaskQueueID']
      ownerDN = parentInfo['OwnerDN']
      ownerGroup = parentInfo['OwnerGroup']
      broker = parentInfo['Broker']
      gridType = parentInfo['GridType']
      result = self.pilotDB.addPilotTQReference(childList, tqID, ownerDN, ownerGroup,
                                                broker=broker, gridType=gridType)
      if not result['OK']:
        return result
      children_added = True
      for chRef, chDict in pDict['ChildDicts'].items():
        result = self.pilotDB.setPilotStatus(chRef, chDict['Status'],
                                             destination=chDict['DestinationSite'],
                                             conn=connection)
        if not result['OK']:
          children_added = False
      if children_added:
        result = self.pilotDB.deletePilot(pRef, conn=connection)
      else:
        return S_ERROR('Failed to add children')
    return S_OK()

  def handleOldPilots(self, connection):
    """
      select all pilots that have not been updated in the last N days and declared them
      Deleted, accounting for them.
    """
    pilotsToAccount = {}
    timeLimitToConsider = Time.toString(Time.dateTime() - Time.day * self.pilotStalledDays)
    result = self.pilotDB.selectPilots({'Status': self.queryStateList},
                                       older=timeLimitToConsider,
                                       timeStamp='LastUpdateTime')
    if not result['OK']:
      self.log.error('Failed to get the Pilot Agents')
      return result
    if not result['Value']:
      return S_OK()

    refList = result['Value']
    result = self.pilotDB.getPilotInfo(refList)
    if not result['OK']:
      self.log.error('Failed to get Info for Pilot Agents')
      return result

    pilotsDict = result['Value']

    for pRef in pilotsDict:
      if pilotsDict[pRef].get('Jobs') and self._checkJobLastUpdateTime(pilotsDict[pRef]['Jobs'], self.pilotStalledDays):
        self.log.debug('%s should not be deleted since one job of %s is running.' %
                       (str(pRef), str(pilotsDict[pRef]['Jobs'])))
        continue
      deletedJobDict = pilotsDict[pRef]
      deletedJobDict['Status'] = 'Deleted'
      deletedJobDict['StatusDate'] = Time.dateTime()
      pilotsToAccount[pRef] = deletedJobDict
      if len(pilotsToAccount) > 100:
        self.accountPilots(pilotsToAccount, connection)
        self._killPilots(pilotsToAccount)
        pilotsToAccount = {}

    self.accountPilots(pilotsToAccount, connection)
    self._killPilots(pilotsToAccount)

    return S_OK()

  def accountPilots(self, pilotsToAccount, connection):
    """ account for pilots
    """
    accountingFlag = False
    pae = self.am_getOption('PilotAccountingEnabled', 'yes')
    if pae.lower() == "yes":
      accountingFlag = True

    if not pilotsToAccount:
      self.log.info('No pilots to Account')
      return S_OK()

    accountingSent = False
    if accountingFlag:
      retVal = self.pilotDB.getPilotInfo(pilotsToAccount.keys(), conn=connection)
      if not retVal['OK']:
        self.log.error('Fail to retrieve Info for pilots', retVal['Message'])
        return retVal
      dbData = retVal['Value']
      for pref in dbData:
        if pref in pilotsToAccount:
          if dbData[pref]['Status'] not in self.finalStateList:
            dbData[pref]['Status'] = pilotsToAccount[pref]['Status']
            dbData[pref]['DestinationSite'] = pilotsToAccount[pref]['DestinationSite']
            dbData[pref]['LastUpdateTime'] = pilotsToAccount[pref]['StatusDate']

      retVal = self.__addPilotsAccountingReport(dbData)
      if not retVal['OK']:
        self.log.error('Fail to retrieve Info for pilots', retVal['Message'])
        return retVal

      self.log.info("Sending accounting records...")
      retVal = gDataStoreClient.commit()
      if not retVal['OK']:
        self.log.error("Can't send accounting reports", retVal['Message'])
      else:
        self.log.info("Accounting sent for %s pilots" % len(pilotsToAccount))
        accountingSent = True

    if not accountingFlag or accountingSent:
      for pRef in pilotsToAccount:
        pDict = pilotsToAccount[pRef]
        self.log.verbose('Setting Status for %s to %s' % (pRef, pDict['Status']))
        self.pilotDB.setPilotStatus(pRef,
                                    pDict['Status'],
                                    pDict['DestinationSite'],
                                    pDict['StatusDate'],
                                    conn=connection)

    return S_OK()

  def __addPilotsAccountingReport(self, pilotsData):
    """ fill accounting data
    """
    for pRef in pilotsData:
      pData = pilotsData[pRef]
      pA = PilotAccounting()
      pA.setEndTime(pData['LastUpdateTime'])
      pA.setStartTime(pData['SubmissionTime'])
      retVal = CS.getUsernameForDN(pData['OwnerDN'])
      if not retVal['OK']:
        userName = '******'
        self.log.error("Can't determine username for dn:", pData['OwnerDN'])
      else:
        userName = retVal['Value']
      pA.setValueByKey('User', userName)
      pA.setValueByKey('UserGroup', pData['OwnerGroup'])
      result = getSiteForCE(pData['DestinationSite'])
      if result['OK'] and result['Value'].strip():
        pA.setValueByKey('Site', result['Value'].strip())
      else:
        pA.setValueByKey('Site', 'Unknown')
      pA.setValueByKey('GridCE', pData['DestinationSite'])
      pA.setValueByKey('GridMiddleware', pData['GridType'])
      pA.setValueByKey('GridResourceBroker', pData['Broker'])
      pA.setValueByKey('GridStatus', pData['Status'])
      if 'Jobs' not in pData:
        pA.setValueByKey('Jobs', 0)
      else:
        pA.setValueByKey('Jobs', len(pData['Jobs']))
      self.log.verbose("Added accounting record for pilot %s" % pData['PilotID'])
      retVal = gDataStoreClient.addRegister(pA)
      if not retVal['OK']:
        return retVal
    return S_OK()

  def _killPilots(self, acc):
    for i in sorted(acc.keys()):
      result = self.diracadmin.getPilotInfo(i)
      if result['OK'] and i in result['Value'] and 'Status' in result['Value'][i]:
        ret = self.diracadmin.killPilot(str(i))
        if ret['OK']:
          self.log.info("Successfully deleted: %s (Status : %s)" % (i, result['Value'][i]['Status']))
        else:
          self.log.error("Failed to delete pilot: ", "%s : %s" % (i, ret['Message']))
      else:
        self.log.error("Failed to get pilot info", "%s : %s" % (i, str(result)))

  def _checkJobLastUpdateTime(self, joblist, StalledDays):
    timeLimitToConsider = Time.dateTime() - Time.day * StalledDays
    ret = False
    for jobID in joblist:
      result = self.jobDB.getJobAttributes(int(jobID))
      if result['OK']:
        if 'LastUpdateTime' in result['Value']:
          lastUpdateTime = result['Value']['LastUpdateTime']
          if Time.fromString(lastUpdateTime) > timeLimitToConsider:
            ret = True
            self.log.debug(
                'Since %s updates LastUpdateTime on %s this does not to need to be deleted.' %
                (str(jobID), str(lastUpdateTime)))
            break
      else:
        self.log.error("Error taking job info from DB", result['Message'])
    return ret
コード例 #28
0
ファイル: JobCommand.py プロジェクト: DIRACGrid/DIRAC
class JobCommand(Command):
    """
    Job "master" Command.
    """
    def __init__(self, args=None, clients=None):

        super(JobCommand, self).__init__(args, clients)

        if "WMSAdministrator" in self.apis:
            self.wmsAdmin = self.apis["WMSAdministrator"]
        else:
            self.wmsAdmin = WMSAdministratorClient()

        if "ResourceManagementClient" in self.apis:
            self.rmClient = self.apis["ResourceManagementClient"]
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        """
        Stores the results of doNew method on the database.
        """

        for jobDict in result:

            resQuery = self.rmClient.addOrModifyJobCache(
                jobDict["Site"], jobDict["MaskStatus"], jobDict["Efficiency"],
                jobDict["Status"])
            if not resQuery["OK"]:
                return resQuery
        return S_OK()

    def _prepareCommand(self):
        """
        JobCommand requires one arguments:
        - name : <str>
        """

        if "name" not in self.args:
            return S_ERROR('"name" not found in self.args')
        name = self.args["name"]

        return S_OK(name)

    def doNew(self, masterParams=None):
        """
        Gets the parameters to run, either from the master method or from its
        own arguments.

        It contacts the WMSAdministrator with a list of site names, or a single
        site.

        If there are jobs, are recorded and then returned.
        """

        if masterParams is not None:
            name = masterParams
        else:
            params = self._prepareCommand()
            if not params["OK"]:
                return params
            name = params["Value"]

        # selectDict, sortList, startItem, maxItems
        # Returns statistics of Last day !
        results = self.wmsAdmin.getSiteSummaryWeb({"Site": name}, [], 0, 0)
        if not results["OK"]:
            return results
        results = results["Value"]

        if "ParameterNames" not in results:
            return S_ERROR('Wrong result dictionary, missing "ParameterNames"')
        params = results["ParameterNames"]

        if "Records" not in results:
            return S_ERROR('Wrong formed result dictionary, missing "Records"')
        records = results["Records"]

        uniformResult = []

        for record in records:

            # This returns a dictionary with the following keys
            # 'Site', 'GridType', 'Country', 'Tier', 'MaskStatus', 'Received',
            # 'Checking', 'Staging', 'Waiting', 'Matched', 'Running', 'Stalled',
            # 'Done', 'Completed', 'Failed', 'Efficiency', 'Status'
            jobDict = dict(zip(params, record))

            # We cast efficiency to a float
            jobDict["Efficiency"] = float(jobDict["Efficiency"])

            uniformResult.append(jobDict)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes["OK"]:
            return storeRes

        return S_OK(uniformResult)

    def doCache(self):
        """
        Method that reads the cache table and tries to read from it. It will
        return a list of dictionaries if there are results.
        """

        params = self._prepareCommand()
        if not params["OK"]:
            return params
        name = params["Value"]

        result = self.rmClient.selectJobCache(name)
        if result["OK"]:
            result = S_OK(
                [dict(zip(result["Columns"], res)) for res in result["Value"]])

        return result

    def doMaster(self):
        """
        Master method.

        Gets all sites and calls doNew method.
        """

        siteNames = getSites()
        if not siteNames["OK"]:
            return siteNames
        siteNames = siteNames["Value"]

        jobsResults = self.doNew(siteNames)
        if not jobsResults["OK"]:
            self.metrics["failed"].append(jobsResults["Message"])

        return S_OK(self.metrics)
コード例 #29
0
ファイル: PilotCommand.py プロジェクト: DIRACGrid/DIRAC
class PilotCommand(Command):
  """
    Pilot "master" Command.
  """

  def __init__(self, args=None, clients=None):

    super(PilotCommand, self).__init__(args, clients)

    if 'WMSAdministrator' in self.apis:
      self.wmsAdmin = self.apis['WMSAdministrator']
    else:
      self.wmsAdmin = WMSAdministratorClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis['ResourceManagementClient']
    else:
      self.rmClient = ResourceManagementClient()

  def _storeCommand(self, result):
    """
      Stores the results of doNew method on the database.
    """

    for pilotDict in result:

      resQuery = self.rmClient.addOrModifyPilotCache(pilotDict['Site'],
                                                     pilotDict['CE'],
                                                     pilotDict['PilotsPerJob'],
                                                     pilotDict['PilotJobEff'],
                                                     pilotDict['Status'])
      if not resQuery['OK']:
        return resQuery

    return S_OK()

  def _prepareCommand(self):
    """
      JobCommand requires one arguments:
      - name : <str>
    """

    if 'name' not in self.args:
      return S_ERROR('"name" not found in self.args')
    name = self.args['name']

    if 'element' not in self.args:
      return S_ERROR('element is missing')
    element = self.args['element']

    if element not in ['Site', 'Resource']:
      return S_ERROR('"%s" is not Site nor Resource' % element)

    return S_OK((element, name))

  def doNew(self, masterParams=None):

    if masterParams is not None:
      element, name = masterParams
    else:
      params = self._prepareCommand()
      if not params['OK']:
        return params
      element, name = params['Value']

    wmsDict = {}

    if element == 'Site':
      wmsDict = {'GridSite': name}
    elif element == 'Resource':
      wmsDict = {'ExpandSite': name}
    else:
      # You should never see this error
      return S_ERROR('"%s" is not  Site nor Resource' % element)

    wmsResults = self.wmsAdmin.getPilotSummaryWeb(wmsDict, [], 0, 0)

    if not wmsResults['OK']:
      return wmsResults
    wmsResults = wmsResults['Value']

    if 'ParameterNames' not in wmsResults:
      return S_ERROR('Wrong result dictionary, missing "ParameterNames"')
    params = wmsResults['ParameterNames']

    if 'Records' not in wmsResults:
      return S_ERROR('Wrong formed result dictionary, missing "Records"')
    records = wmsResults['Records']

    uniformResult = []

    for record in records:

      # This returns a dictionary with the following keys:
      # 'Site', 'CE', 'Submitted', 'Ready', 'Scheduled', 'Waiting', 'Running',
      # 'Done', 'Aborted', 'Done_Empty', 'Aborted_Hour', 'Total', 'PilotsPerJob',
      # 'PilotJobEff', 'Status', 'InMask'
      pilotDict = dict(zip(params, record))

      pilotDict['PilotsPerJob'] = float(pilotDict['PilotsPerJob'])
      pilotDict['PilotJobEff'] = float(pilotDict['PilotJobEff'])

      uniformResult.append(pilotDict)

    storeRes = self._storeCommand(uniformResult)
    if not storeRes['OK']:
      return storeRes

    return S_OK(uniformResult)

  def doCache(self):

    params = self._prepareCommand()
    if not params['OK']:
      return params
    element, name = params['Value']

    if element == 'Site':
      # WMS returns Site entries with CE = 'Multiple'
      site, ce = name, 'Multiple'
    elif element == 'Resource':
      site, ce = None, name
    else:
      # You should never see this error
      return S_ERROR('"%s" is not  Site nor Resource' % element)

    result = self.rmClient.selectPilotCache(site, ce)
    if result['OK']:
      result = S_OK([dict(zip(result['Columns'], res)) for res in result['Value']])

    return result

  def doMaster(self):

    siteNames = getSites()
    if not siteNames['OK']:
      return siteNames
    siteNames = siteNames['Value']

    ces = CSHelpers.getComputingElements()
    if not ces['OK']:
      return ces
    ces = ces['Value']

    pilotResults = self.doNew(('Site', siteNames))
    if not pilotResults['OK']:
      self.metrics['failed'].append(pilotResults['Message'])

    pilotResults = self.doNew(('Resource', ces))
    if not pilotResults['OK']:
      self.metrics['failed'].append(pilotResults['Message'])

    return S_OK(self.metrics)
コード例 #30
0
    labels = [
        'pilotUUID', 'timestamp', 'source', 'phase', 'status', 'messageContent'
    ]
    for log in logs:
        content.append([log[label] for label in labels])
    printTable(labels, content, numbering=False, columnSeparator=' | ')


from DIRAC.WorkloadManagementSystem.Client.PilotsLoggingClient import PilotsLoggingClient
from DIRAC.WorkloadManagementSystem.Client.WMSAdministratorClient import WMSAdministratorClient

if uuid:
    result = PilotsLoggingClient().getPilotsLogging(uuid)
    if not result['OK']:
        print 'ERROR: %s' % result['Message']
        DIRAC.exit(1)
    printPilotsLogging(result['Value'])
    DIRAC.exit(0)
else:
    info = WMSAdministratorClient().getPilots(jobid)
    if not info['OK']:
        print info['Message']
        DIRAC.exit(1)
    for pilot in info['Value']:
        logging = PilotsLoggingClient().getPilotsLogging(
            pilot['PilotJobReference'])
        if not logging['OK']:
            print logging['Message']
        printPilotsLogging(logging)
    DIRAC.exit(0)
コード例 #31
0
    def test_JobDBWMSAdmin(self):

        wmsAdministrator = WMSAdministratorClient()

        sitesList = ['My.Site.org', 'Your.Site.org']
        res = wmsAdministrator.setSiteMask(sitesList)
        self.assertTrue(res['OK'])
        res = wmsAdministrator.getSiteMask()
        self.assertTrue(res['OK'])
        self.assertEqual(sorted(res['Value']), sorted(sitesList))
        res = wmsAdministrator.banSite('My.Site.org', 'This is a comment')
        self.assertTrue(res['OK'])
        res = wmsAdministrator.getSiteMask()
        self.assertTrue(res['OK'])
        self.assertEqual(sorted(res['Value']), ['Your.Site.org'])
        res = wmsAdministrator.allowSite('My.Site.org', 'This is a comment')
        self.assertTrue(res['OK'])
        res = wmsAdministrator.getSiteMask()
        self.assertTrue(res['OK'])
        self.assertEqual(sorted(res['Value']), sorted(sitesList))

        res = wmsAdministrator.getSiteMaskLogging(sitesList)
        self.assertTrue(res['OK'])
        self.assertEqual(res['Value']['My.Site.org'][0][3], 'No comment')
        res = wmsAdministrator.getSiteMaskSummary()
        self.assertTrue(res['OK'])
        self.assertEqual(res['Value']['My.Site.org'], 'Active')

        res = wmsAdministrator.getSiteSummaryWeb({}, [], 0, 100)
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value']['TotalRecords'] in [0, 1, 2, 34])
        res = wmsAdministrator.getSiteSummarySelectors()
        self.assertTrue(res['OK'])

        res = wmsAdministrator.clearMask()
        self.assertTrue(res['OK'])
        res = wmsAdministrator.getSiteMask()
        self.assertTrue(res['OK'])
        self.assertEqual(res['Value'], [])
コード例 #32
0
class CloudDirector(AgentModule):
    """
      The specific agents must provide the following methods:
      - initialize() for initial settings
      - beginExecution()
      - execute() - the main method called in the agent cycle
      - endExecution()
      - finalize() - the graceful exit of the method, this one is usually used
                 for the agent restart
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        self.vmTypeDict = {}
        self.vmTypeCECache = {}
        self.vmTypeSlots = {}
        self.failedVMTypes = defaultdict(int)
        self.firstPass = True

        self.vo = ''
        self.group = ''
        # self.voGroups contain all the eligible user groups for clouds submitted by this SiteDirector
        self.voGroups = []
        self.cloudDN = ''
        self.cloudGroup = ''
        self.platforms = []
        self.sites = []
        self.wmsClient = WMSAdministratorClient()

        self.proxy = None

        self.updateStatus = True
        self.getOutput = False
        self.sendAccounting = True

    def initialize(self):
        """ Standard constructor
    """
        return S_OK()

    def beginExecution(self):

        # The Director is for a particular user community
        self.vo = self.am_getOption("VO", '')
        if not self.vo:
            self.vo = CSGlobals.getVO()
        # The SiteDirector is for a particular user group
        self.group = self.am_getOption("Group", '')

        # Choose the group for which clouds will be submitted. This is a hack until
        # we will be able to match clouds to VOs.
        if not self.group:
            if self.vo:
                result = Registry.getGroupsForVO(self.vo)
                if not result['OK']:
                    return result
                self.voGroups = []
                for group in result['Value']:
                    if 'NormalUser' in Registry.getPropertiesForGroup(group):
                        self.voGroups.append(group)
        else:
            self.voGroups = [self.group]

        result = findGenericCloudCredentials(vo=self.vo)
        if not result['OK']:
            return result
        self.cloudDN, self.cloudGroup = result['Value']
        self.maxVMsToSubmit = self.am_getOption('MaxVMsToSubmit', 1)
        self.runningPod = self.am_getOption('RunningPod', self.vo)

        # Get the site description dictionary
        siteNames = None
        if not self.am_getOption('Site', 'Any').lower() == "any":
            siteNames = self.am_getOption('Site', [])
            if not siteNames:
                siteNames = None
        ces = None
        if not self.am_getOption('CEs', 'Any').lower() == "any":
            ces = self.am_getOption('CEs', [])
            if not ces:
                ces = None

        result = getVMTypes(vo=self.vo, siteList=siteNames)
        if not result['OK']:
            return result
        resourceDict = result['Value']
        result = self.getEndpoints(resourceDict)
        if not result['OK']:
            return result

        # if not siteNames:
        #  siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' )
        #  if siteName == 'Unknown':
        #    return S_OK( 'No site specified for the SiteDirector' )
        #  else:
        #    siteNames = [siteName]
        #self.siteNames = siteNames

        self.log.always('Sites:', siteNames)
        self.log.always('CEs:', ces)
        self.log.always('CloudDN:', self.cloudDN)
        self.log.always('CloudGroup:', self.cloudGroup)

        self.localhost = socket.getfqdn()
        self.proxy = ''

        if self.firstPass:
            if self.vmTypeDict:
                self.log.always("Agent will serve VM types:")
                for vmType in self.vmTypeDict:
                    self.log.always(
                        "Site: %s, CE: %s, VMType: %s" %
                        (self.vmTypeDict[vmType]['Site'],
                         self.vmTypeDict[vmType]['CEName'], vmType))
        self.firstPass = False
        return S_OK()

    def __generateVMTypeHash(self, vmTypeDict):
        """ Generate a hash of the queue description
    """
        myMD5 = hashlib.md5()
        myMD5.update(str(vmTypeDict))
        hexstring = myMD5.hexdigest()
        return hexstring

    def getEndpoints(self, resourceDict):
        """ Get the list of relevant CEs and their descriptions
    """

        self.vmTypeDict = {}
        ceFactory = EndpointFactory()

        result = getPilotBootstrapParameters(vo=self.vo,
                                             runningPod=self.runningPod)
        if not result['OK']:
            return result
        opParameters = result['Value']

        for site in resourceDict:
            for ce in resourceDict[site]:
                ceDict = resourceDict[site][ce]
                ceTags = ceDict.get('Tag', [])
                if isinstance(ceTags, basestring):
                    ceTags = fromChar(ceTags)
                ceMaxRAM = ceDict.get('MaxRAM', None)
                qDict = ceDict.pop('VMTypes')
                for vmType in qDict:
                    vmTypeName = '%s_%s' % (ce, vmType)
                    self.vmTypeDict[vmTypeName] = {}
                    self.vmTypeDict[vmTypeName]['ParametersDict'] = qDict[
                        vmType]
                    self.vmTypeDict[vmTypeName]['ParametersDict'][
                        'VMType'] = vmType
                    self.vmTypeDict[vmTypeName]['ParametersDict'][
                        'Site'] = site
                    self.vmTypeDict[vmTypeName]['ParametersDict'][
                        'Setup'] = gConfig.getValue('/DIRAC/Setup', 'unknown')
                    self.vmTypeDict[vmTypeName]['ParametersDict'][
                        'CPUTime'] = 99999999

                    vmTypeTags = self.vmTypeDict[vmTypeName][
                        'ParametersDict'].get('Tag')
                    if vmTypeTags and isinstance(vmTypeTags, basestring):
                        vmTypeTags = fromChar(vmTypeTags)
                        self.vmTypeDict[vmTypeName]['ParametersDict'][
                            'Tag'] = vmTypeTags
                    if ceTags:
                        if vmTypeTags:
                            allTags = list(set(ceTags + vmTypeTags))
                            self.vmTypeDict[vmTypeName]['ParametersDict'][
                                'Tag'] = allTags
                        else:
                            self.vmTypeDict[vmTypeName]['ParametersDict'][
                                'Tag'] = ceTags

                    maxRAM = self.vmTypeDict[vmTypeName]['ParametersDict'].get(
                        'MaxRAM')
                    maxRAM = ceMaxRAM if not maxRAM else maxRAM
                    if maxRAM:
                        self.vmTypeDict[vmTypeName]['ParametersDict'][
                            'MaxRAM'] = maxRAM

                    ceWholeNode = ceDict.get('WholeNode', 'true')
                    wholeNode = self.vmTypeDict[vmTypeName][
                        'ParametersDict'].get('WholeNode', ceWholeNode)
                    if wholeNode.lower() in ('yes', 'true'):
                        self.vmTypeDict[vmTypeName][
                            'ParametersDict'].setdefault('Tag', [])
                        self.vmTypeDict[vmTypeName]['ParametersDict'][
                            'Tag'].append('WholeNode')

                    platform = ''
                    if "Platform" in self.vmTypeDict[vmTypeName][
                            'ParametersDict']:
                        platform = self.vmTypeDict[vmTypeName][
                            'ParametersDict']['Platform']
                    elif "Platform" in ceDict:
                        platform = ceDict['Platform']
                    if platform and platform not in self.platforms:
                        self.platforms.append(platform)

                    if "Platform" not in self.vmTypeDict[vmTypeName][
                            'ParametersDict'] and platform:
                        result = Resources.getDIRACPlatform(platform)
                        if result['OK']:
                            self.vmTypeDict[vmTypeName]['ParametersDict'][
                                'Platform'] = result['Value'][0]

                    ceVMTypeDict = dict(ceDict)
                    ceVMTypeDict['CEName'] = ce
                    ceVMTypeDict['VO'] = self.vo
                    ceVMTypeDict['VMType'] = vmType
                    ceVMTypeDict['RunningPod'] = self.runningPod
                    ceVMTypeDict['CSServers'] = gConfig.getValue(
                        "/DIRAC/Configuration/Servers", [])
                    ceVMTypeDict.update(
                        self.vmTypeDict[vmTypeName]['ParametersDict'])

                    # Allow a resource-specifc CAPath to be set (as some clouds have their own CAs)
                    # Otherwise fall back to the system-wide default(s)
                    if 'CAPath' not in ceVMTypeDict:
                        ceVMTypeDict['CAPath'] = gConfig.getValue(
                            '/DIRAC/Security/CAPath',
                            "/opt/dirac/etc/grid-security/certificates/cas.pem"
                        )

                    # Generate the CE object for the vmType or pick the already existing one
                    # if the vmType definition did not change
                    vmTypeHash = self.__generateVMTypeHash(ceVMTypeDict)
                    if vmTypeName in self.vmTypeCECache and self.vmTypeCECache[
                            vmTypeName]['Hash'] == vmTypeHash:
                        vmTypeCE = self.vmTypeCECache[vmTypeName]['CE']
                    else:
                        result = ceFactory.getCEObject(parameters=ceVMTypeDict)
                        if not result['OK']:
                            return result
                        self.vmTypeCECache.setdefault(vmTypeName, {})
                        self.vmTypeCECache[vmTypeName]['Hash'] = vmTypeHash
                        self.vmTypeCECache[vmTypeName]['CE'] = result['Value']
                        vmTypeCE = self.vmTypeCECache[vmTypeName]['CE']
                        vmTypeCE.setBootstrapParameters(opParameters)

                    self.vmTypeDict[vmTypeName]['CE'] = vmTypeCE
                    self.vmTypeDict[vmTypeName]['CEName'] = ce
                    self.vmTypeDict[vmTypeName]['CEType'] = ceDict['CEType']
                    self.vmTypeDict[vmTypeName]['Site'] = site
                    self.vmTypeDict[vmTypeName]['VMType'] = vmType
                    self.vmTypeDict[vmTypeName]['Platform'] = platform
                    self.vmTypeDict[vmTypeName]['MaxInstances'] = ceDict[
                        'MaxInstances']
                    if not self.vmTypeDict[vmTypeName]['CE'].isValid():
                        self.log.error(
                            'Failed to instantiate CloudEndpoint for %s' %
                            vmTypeName)
                        continue

                    if site not in self.sites:
                        self.sites.append(site)

        return S_OK()

    def execute(self):
        """ Main execution method
    """

        if not self.vmTypeDict:
            self.log.warn('No site defined, exiting the cycle')
            return S_OK()

        result = self.createVMs()
        if not result['OK']:
            self.log.error('Errors in the job submission: ', result['Message'])

        #cyclesDone = self.am_getModuleParam( 'cyclesDone' )
        # if self.updateStatus and cyclesDone % self.cloudStatusUpdateCycleFactor == 0:
        #  result = self.updatePilotStatus()
        #  if not result['OK']:
        #    self.log.error( 'Errors in updating cloud status: ', result['Message'] )

        return S_OK()

    def createVMs(self):
        """ Go through defined computing elements and submit jobs if necessary
    """

        vmTypeList = self.vmTypeDict.keys()

        # Check that there is some work at all
        setup = CSGlobals.getSetup()
        tqDict = {'Setup': setup, 'CPUTime': 9999999}
        if self.vo:
            tqDict['VO'] = self.vo
        if self.voGroups:
            tqDict['OwnerGroup'] = self.voGroups

        result = Resources.getCompatiblePlatforms(self.platforms)
        if not result['OK']:
            return result
        tqDict['Platform'] = result['Value']
        tqDict['Site'] = self.sites
        tags = []
        for vmType in vmTypeList:
            if 'Tag' in self.vmTypeDict[vmType]['ParametersDict']:
                tags += self.vmTypeDict[vmType]['ParametersDict']['Tag']
        tqDict['Tag'] = list(set(tags))
        tqDict['SubmitPool'] = "wenmrPool"

        self.log.verbose('Checking overall TQ availability with requirements')
        self.log.verbose(tqDict)

        rpcMatcher = RPCClient("WorkloadManagement/Matcher")
        result = rpcMatcher.getMatchingTaskQueues(tqDict)
        if not result['OK']:
            return result
        if not result['Value']:
            self.log.verbose('No Waiting jobs suitable for the director')
            return S_OK()

        jobSites = set()
        anySite = False
        testSites = set()
        totalWaitingJobs = 0
        for tqID in result['Value']:
            if "Sites" in result['Value'][tqID]:
                for site in result['Value'][tqID]['Sites']:
                    if site.lower() != 'any':
                        jobSites.add(site)
                    else:
                        anySite = True
            else:
                anySite = True
            if "JobTypes" in result['Value'][tqID]:
                if "Sites" in result['Value'][tqID]:
                    for site in result['Value'][tqID]['Sites']:
                        if site.lower() != 'any':
                            testSites.add(site)
            totalWaitingJobs += result['Value'][tqID]['Jobs']

        tqIDList = result['Value'].keys()

        result = virtualMachineDB.getInstanceCounters('Status', {})
        totalVMs = 0
        if result['OK']:
            for status in result['Value']:
                if status in ['New', 'Submitted', 'Running']:
                    totalVMs += result['Value'][status]
        self.log.info('Total %d jobs in %d task queues with %d VMs' %
                      (totalWaitingJobs, len(tqIDList), totalVMs))

        # Check if the site is allowed in the mask
        result = self.wmsClient.getSiteMask()
        if not result['OK']:
            return S_ERROR('Can not get the site mask')
        siteMaskList = result['Value']

        vmTypeList = self.vmTypeDict.keys()
        random.shuffle(vmTypeList)
        totalSubmittedPilots = 0
        matchedQueues = 0
        for vmType in vmTypeList:
            ce = self.vmTypeDict[vmType]['CE']
            ceName = self.vmTypeDict[vmType]['CEName']
            vmTypeName = self.vmTypeDict[vmType]['VMType']
            siteName = self.vmTypeDict[vmType]['Site']
            platform = self.vmTypeDict[vmType]['Platform']
            vmTypeTags = self.vmTypeDict[vmType]['ParametersDict'].get(
                'Tag', [])
            siteMask = siteName in siteMaskList
            endpoint = "%s::%s" % (siteName, ceName)
            maxInstances = int(self.vmTypeDict[vmType]['MaxInstances'])
            processorTags = []

            # vms support WholeNode naturally
            processorTags.append('WholeNode')

            if not anySite and siteName not in jobSites:
                self.log.verbose(
                    "Skipping queue %s at %s: no workload expected" %
                    (vmTypeName, siteName))
                continue
            if not siteMask and siteName not in testSites:
                self.log.verbose("Skipping queue %s: site %s not in the mask" %
                                 (vmTypeName, siteName))
                continue

            if 'CPUTime' in self.vmTypeDict[vmType]['ParametersDict']:
                vmTypeCPUTime = int(
                    self.vmTypeDict[vmType]['ParametersDict']['CPUTime'])
            else:
                self.log.warn(
                    'CPU time limit is not specified for queue %s, skipping...'
                    % vmType)
                continue

            # Prepare the queue description to look for eligible jobs
            ceDict = ce.getParameterDict()

            if not siteMask:
                ceDict['JobType'] = "Test"
            if self.vo:
                ceDict['VO'] = self.vo
            if self.voGroups:
                ceDict['OwnerGroup'] = self.voGroups

            result = Resources.getCompatiblePlatforms(platform)
            if not result['OK']:
                continue
            ceDict['Platform'] = result['Value']

            ceDict['Tag'] = list(set(processorTags + vmTypeTags))

            # Get the number of eligible jobs for the target site/queue

            result = rpcMatcher.getMatchingTaskQueues(ceDict)
            if not result['OK']:
                self.log.error(
                    'Could not retrieve TaskQueues from TaskQueueDB',
                    result['Message'])
                return result
            taskQueueDict = result['Value']
            if not taskQueueDict:
                self.log.verbose('No matching TQs found for %s' % vmType)
                continue

            matchedQueues += 1
            totalTQJobs = 0
            tqIDList = taskQueueDict.keys()
            for tq in taskQueueDict:
                totalTQJobs += taskQueueDict[tq]['Jobs']

            self.log.verbose(
                '%d job(s) from %d task queue(s) are eligible for %s queue' %
                (totalTQJobs, len(tqIDList), vmType))

            # Get the number of already instantiated VMs for these task queues
            totalWaitingVMs = 0
            result = virtualMachineDB.getInstanceCounters(
                'Status', {'Endpoint': endpoint})
            if result['OK']:
                for status in result['Value']:
                    if status in ['New', 'Submitted']:
                        totalWaitingVMs += result['Value'][status]
            if totalWaitingVMs >= totalTQJobs:
                self.log.verbose("%d VMs already for all the available jobs" %
                                 totalWaitingVMs)

            self.log.verbose(
                "%d VMs for the total of %d eligible jobs for %s" %
                (totalWaitingVMs, totalTQJobs, vmType))

            # Get proxy to be used to connect to the cloud endpoint
            authType = ce.parameters.get('Auth')
            if authType and authType.lower() in ['x509', 'voms']:
                self.log.verbose("Getting cloud proxy for %s/%s" %
                                 (siteName, ceName))
                result = getProxyFileForCE(ce)
                if not result['OK']:
                    continue
                ce.setProxy(result['Value'])

            # Get the number of available slots on the target site/endpoint
            totalSlots = self.getVMInstances(endpoint, maxInstances)
            if totalSlots == 0:
                self.log.debug('%s: No slots available' % vmType)
                continue

            vmsToSubmit = max(0, min(totalSlots,
                                     totalTQJobs - totalWaitingVMs))
            self.log.info('%s: Slots=%d, TQ jobs=%d, VMs: %d, to submit=%d' %
                          (vmType, totalSlots, totalTQJobs, totalWaitingVMs,
                           vmsToSubmit))

            # Limit the number of VM instances to create to vmsToSubmit
            vmsToSubmit = min(self.maxVMsToSubmit, vmsToSubmit)
            if vmsToSubmit == 0:
                continue

            self.log.info('Going to submit %d VMs to %s queue' %
                          (vmsToSubmit, vmType))
            result = ce.createInstances(vmsToSubmit)

            #result = S_OK()
            if not result['OK']:
                self.log.error('Failed submission to queue %s:\n' % vmType,
                               result['Message'])
                self.failedVMTypes.setdefault(vmType, 0)
                self.failedVMTypes[vmType] += 1
                continue

            # Add VMs to the VirtualMachineDB
            vmDict = result['Value']
            totalSubmittedPilots += len(vmDict)
            self.log.info('Submitted %d VMs to %s@%s' %
                          (len(vmDict), vmTypeName, ceName))

            pilotList = []
            for uuID in vmDict:
                diracUUID = vmDict[uuID]['InstanceID']
                endpoint = '%s::%s' % (self.vmTypeDict[vmType]['Site'], ceName)
                result = virtualMachineDB.insertInstance(
                    uuID, vmTypeName, diracUUID, endpoint, self.vo)
                if not result['OK']:
                    continue
                for ncpu in range(vmDict[uuID]['NumberOfProcessors']):
                    pRef = 'vm://' + ceName + '/' + diracUUID + ':' + str(
                        ncpu).zfill(2)
                    pilotList.append(pRef)

            stampDict = {}
            tqPriorityList = []
            sumPriority = 0.
            for tq in taskQueueDict:
                sumPriority += taskQueueDict[tq]['Priority']
                tqPriorityList.append((tq, sumPriority))
            tqDict = {}
            for pilotID in pilotList:
                rndm = random.random() * sumPriority
                for tq, prio in tqPriorityList:
                    if rndm < prio:
                        tqID = tq
                        break
                if tqID not in tqDict:
                    tqDict[tqID] = []
                tqDict[tqID].append(pilotID)

            for tqID, pilotList in tqDict.items():
                result = pilotAgentsDB.addPilotTQReference(
                    pilotList, tqID, '', '', self.localhost, 'Cloud',
                    stampDict)
                if not result['OK']:
                    self.log.error(
                        'Failed to insert pilots into the PilotAgentsDB: %s' %
                        result['Message'])

        self.log.info(
            "%d VMs submitted in total in this cycle, %d matched queues" %
            (totalSubmittedPilots, matchedQueues))
        return S_OK()

    def getVMInstances(self, endpoint, maxInstances):

        result = virtualMachineDB.getInstanceCounters('Status',
                                                      {'Endpoint': endpoint})
        if not result['OK']:
            return result

        count = 0
        for status in result['Value']:
            if status in ['New', 'Submitted', 'Running']:
                count += int(result['Value'][status])

        return max(0, maxInstances - count)
コード例 #33
0
class PilotCommand(Command):
    """
    Pilot "master" Command.
  """
    def __init__(self, args=None, clients=None):

        super(PilotCommand, self).__init__(args, clients)

        if 'WMSAdministrator' in self.apis:
            self.wmsAdmin = self.apis['WMSAdministrator']
        else:
            self.wmsAdmin = WMSAdministratorClient()

        if 'ResourceManagementClient' in self.apis:
            self.rmClient = self.apis['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        """
      Stores the results of doNew method on the database.
    """

        for pilotDict in result:

            resQuery = self.rmClient.addOrModifyPilotCache(
                pilotDict['Site'], pilotDict['CE'], pilotDict['PilotsPerJob'],
                pilotDict['PilotJobEff'], pilotDict['Status'])
            if not resQuery['OK']:
                return resQuery

        return S_OK()

    def _prepareCommand(self):
        """
      JobCommand requires one arguments:
      - name : <str>
    """

        if 'name' not in self.args:
            return S_ERROR('"name" not found in self.args')
        name = self.args['name']

        if 'element' not in self.args:
            return S_ERROR('element is missing')
        element = self.args['element']

        if element not in ['Site', 'Resource']:
            return S_ERROR('"%s" is not Site nor Resource' % element)

        return S_OK((element, name))

    def doNew(self, masterParams=None):

        if masterParams is not None:
            element, name = masterParams
        else:
            params = self._prepareCommand()
            if not params['OK']:
                return params
            element, name = params['Value']

        wmsDict = {}

        if element == 'Site':
            wmsDict = {'GridSite': name}
        elif element == 'Resource':
            wmsDict = {'ExpandSite': name}
        else:
            # You should never see this error
            return S_ERROR('"%s" is not  Site nor Resource' % element)

        wmsResults = self.wmsAdmin.getPilotSummaryWeb(wmsDict, [], 0, 0)

        if not wmsResults['OK']:
            return wmsResults
        wmsResults = wmsResults['Value']

        if 'ParameterNames' not in wmsResults:
            return S_ERROR('Wrong result dictionary, missing "ParameterNames"')
        params = wmsResults['ParameterNames']

        if 'Records' not in wmsResults:
            return S_ERROR('Wrong formed result dictionary, missing "Records"')
        records = wmsResults['Records']

        uniformResult = []

        for record in records:

            # This returns a dictionary with the following keys:
            # 'Site', 'CE', 'Submitted', 'Ready', 'Scheduled', 'Waiting', 'Running',
            # 'Done', 'Aborted', 'Done_Empty', 'Aborted_Hour', 'Total', 'PilotsPerJob',
            # 'PilotJobEff', 'Status', 'InMask'
            pilotDict = dict(zip(params, record))

            pilotDict['PilotsPerJob'] = float(pilotDict['PilotsPerJob'])
            pilotDict['PilotJobEff'] = float(pilotDict['PilotJobEff'])

            uniformResult.append(pilotDict)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes['OK']:
            return storeRes

        return S_OK(uniformResult)

    def doCache(self):

        params = self._prepareCommand()
        if not params['OK']:
            return params
        element, name = params['Value']

        if element == 'Site':
            # WMS returns Site entries with CE = 'Multiple'
            site, ce = name, 'Multiple'
        elif element == 'Resource':
            site, ce = None, name
        else:
            # You should never see this error
            return S_ERROR('"%s" is not  Site nor Resource' % element)

        result = self.rmClient.selectPilotCache(site, ce)
        if result['OK']:
            result = S_OK(
                [dict(zip(result['Columns'], res)) for res in result['Value']])

        return result

    def doMaster(self):

        siteNames = getSites()
        if not siteNames['OK']:
            return siteNames
        siteNames = siteNames['Value']

        ces = CSHelpers.getComputingElements()
        if not ces['OK']:
            return ces
        ces = ces['Value']

        pilotResults = self.doNew(('Site', siteNames))
        if not pilotResults['OK']:
            self.metrics['failed'].append(pilotResults['Message'])

        pilotResults = self.doNew(('Resource', ces))
        if not pilotResults['OK']:
            self.metrics['failed'].append(pilotResults['Message'])

        return S_OK(self.metrics)
コード例 #34
0
ファイル: JobCommand.py プロジェクト: DIRACGrid/DIRAC
class JobsWMSCommand(Command):
    def __init__(self, args=None, clients=None):

        super(JobsWMSCommand, self).__init__(args, clients)

        if "WMSAdministrator" in self.apis:
            self.wmsAdmin = self.apis["WMSAdministrator"]
        else:
            self.wmsAdmin = WMSAdministratorClient()

    def doCommand(self):
        """
        Returns simple jobs efficiency

        :param args:
           - args[0]: string: should be a ValidElement

           - args[1]: string should be the name of the ValidElement

        :returns: { 'Result': 'Good'|'Fair'|'Poor'|'Idle'|'Bad' }
        """

        if "siteName" not in self.args:
            return self.returnERROR(S_ERROR("siteName is missing"))
        siteName = self.args["siteName"]

        # If siteName is None, we take all sites
        if siteName is None:
            siteName = getSites()
            if not siteName["OK"]:
                return self.returnERROR(siteName)
            siteName = siteName["Value"]

        results = self.wmsAdmin.getSiteSummaryWeb({"Site": siteName}, [], 0,
                                                  500)

        if not results["OK"]:
            return self.returnERROR(results)
        results = results["Value"]

        if "ParameterNames" not in results:
            return self.returnERROR(S_ERROR("Malformed result dictionary"))
        params = results["ParameterNames"]

        if "Records" not in results:
            return self.returnERROR(S_ERROR("Malformed result dictionary"))
        records = results["Records"]

        jobResults = []

        for record in records:

            jobDict = dict(zip(params, record))
            try:
                jobDict["Efficiency"] = float(jobDict["Efficiency"])
            except KeyError as e:
                return self.returnERROR(S_ERROR(e))
            except ValueError as e:
                return self.returnERROR(S_ERROR(e))

            jobResults.append(jobDict)

        return S_OK(jobResults)
コード例 #35
0
ファイル: SiteStatus.py プロジェクト: rob-c/DIRAC
    def setSiteStatus(self, site, status, comment='No comment'):
        """
    Set the status of a site in the 'SiteStatus' table of RSS

    examples
      >>> siteStatus.banSite( 'site1.test.test' )
          S_OK()
      >>> siteStatus.banSite( None )
          S_ERROR( ... )

    :Parameters:
      **site** - `String`
        the site that is going to be banned
      **comment** - `String`
        reason for banning

    :return: S_OK() || S_ERROR()
    """

        if not status:
            return S_ERROR(DErrno.ERESUNK, 'status parameter is empty')

        # fix case sensitive string
        status = status.capitalize()
        allowedStateList = [
            'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'
        ]

        if status not in allowedStateList:
            return S_ERROR(errno.EINVAL,
                           'Not a valid status, parameter rejected')

        if self.rssFlag:
            result = getProxyInfo()
            if result['OK']:
                tokenOwner = result['Value']['username']
            else:
                return S_ERROR("Unable to get user proxy info %s " %
                               result['Message'])

            tokenExpiration = datetime.utcnow() + timedelta(days=1)

            self.rssCache.acquireLock()
            try:
                result = self.rsClient.modifyStatusElement(
                    'Site',
                    'Status',
                    status=status,
                    name=site,
                    tokenExpiration=tokenExpiration,
                    reason=comment,
                    tokenOwner=tokenOwner)
                if result['OK']:
                    self.rssCache.refreshCache()
                else:
                    _msg = 'Error updating status of site %s to %s' % (site,
                                                                       status)
                    gLogger.warn('RSS: %s' % _msg)

            # Release lock, no matter what.
            finally:
                self.rssCache.releaseLock()

        else:
            if status in ['Active', 'Degraded']:
                result = WMSAdministratorClient().allowSite()
            else:
                result = WMSAdministratorClient().banSite()

        return result