Esempio n. 1
0
class DowntimeCommand(Command):
    '''
    Downtime "master" Command or removed DTs.
  '''
    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if 'GOCDBClient' in self.apis:
            self.gClient = self.apis['GOCDBClient']
        else:
            self.gClient = GOCDBClient()

        if 'ResourceManagementClient' in self.apis:
            self.rmClient = self.apis['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        '''
      Stores the results of doNew method on the database.
    '''

        for dt in result:
            resQuery = self.rmClient.addOrModifyDowntimeCache(
                downtimeID=dt['DowntimeID'],
                element=dt['Element'],
                name=dt['Name'],
                startDate=dt['StartDate'],
                endDate=dt['EndDate'],
                severity=dt['Severity'],
                description=dt['Description'],
                link=dt['Link'],
                gOCDBServiceType=dt['gOCDBServiceType'])
        return resQuery

    def _cleanCommand(self, element, elementNames):
        '''
      Clear Cache from expired DT.
    '''

        resQuery = []

        for elementName in elementNames:
            #get the list of all DTs stored in the cache
            result = self.rmClient.selectDowntimeCache(element=element,
                                                       name=elementName)

            if not result['OK']:
                return result

            uniformResult = [
                dict(zip(result['Columns'], res)) for res in result['Value']
            ]

            currentDate = datetime.utcnow()

            if len(uniformResult) == 0:
                continue

            #get the list of all ongoing DTs from GocDB
            gDTLinkList = self.gClient.getCurrentDTLinkList()
            if not gDTLinkList['OK']:
                return gDTLinkList

            for dt in uniformResult:
                #if DT expired or DT not in the list of current DTs, then we remove it from the cache
                if dt['EndDate'] < currentDate or dt[
                        'Link'] not in gDTLinkList['Value']:
                    result = self.rmClient.deleteDowntimeCache(
                        downtimeID=dt['DowntimeID'])
                    resQuery.append(result)

        return S_OK(resQuery)

    def _prepareCommand(self):
        '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

        if 'name' not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args['name']

        if 'element' not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args['element']

        if 'elementType' not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args['elementType']

        if not element in ['Site', 'Resource']:
            return S_ERROR('element is neither Site nor Resource')

        hours = None
        if 'hours' in self.args:
            hours = self.args['hours']

        gOCDBServiceType = None

        # Transform DIRAC site names into GOCDB topics
        if element == 'Site':

            gocSite = getGOCSiteName(elementName)
            if not gocSite[
                    'OK']:  # The site is most probably not a grid site - not an issue, of course
                pass  # so, elementName remains unchanged
            else:
                elementName = gocSite['Value']

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == 'StorageElement':
            # We need to distinguish if it's tape or disk
            seOptions = getStorageElementOptions(elementName)
            if not seOptions['OK']:
                return seOptions
            if seOptions['Value'].get('TapeSE'):
                gOCDBServiceType = "srm.nearline"
            elif seOptions['Value'].get('DiskSE'):
                gOCDBServiceType = "srm"

            seHost = CSHelpers.getSEHost(elementName)
            if not seHost['OK']:
                return seHost
            seHost = seHost['Value']

            if not seHost:
                return S_ERROR('No seHost for %s' % elementName)
            elementName = seHost

        elif elementType in ['FTS', 'FTS3']:
            gOCDBServiceType = 'FTS'
            try:
                #WARNING: this method presupposes that the server is an FTS3 type
                elementName = getGOCFTSName(elementName)
            except:
                return S_ERROR(
                    'No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)'
                )

        return S_OK((element, elementName, hours, gOCDBServiceType))

    def doNew(self, masterParams=None):
        '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

        if masterParams is not None:
            element, elementNames = masterParams
            hours = 120
            elementName = None
            gOCDBServiceType = None

        else:
            params = self._prepareCommand()
            if not params['OK']:
                return params
            element, elementName, hours, gOCDBServiceType = params['Value']
            elementNames = [elementName]

        #WARNING: checking all the DT that are ongoing or starting in given <hours> from now
        try:
            results = self.gClient.getStatus(element,
                                             name=elementNames,
                                             startingInHours=hours)
        except urllib2.URLError:
            try:
                #Let's give it a second chance..
                results = self.gClient.getStatus(element,
                                                 name=elementNames,
                                                 startingInHours=hours)
            except urllib2.URLError, e:
                return S_ERROR(e)

        if not results['OK']:
            return results
        results = results['Value']

        if results is None:  # no downtimes found
            return S_OK(None)

        #cleaning the Cache
        cleanRes = self._cleanCommand(element, elementNames)
        if not cleanRes['OK']:
            return cleanRes

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():

            dt = {}

            if 'HOSTNAME' in downDic.keys():
                dt['Name'] = downDic['HOSTNAME']
            elif 'SITENAME' in downDic.keys():
                dt['Name'] = downDic['SITENAME']
            else:
                return S_ERROR("SITENAME or HOSTNAME are missing")

            if 'SERVICE_TYPE' in downDic.keys():
                dt['gOCDBServiceType'] = downDic['SERVICE_TYPE']
                if gOCDBServiceType:
                    gocdbST = gOCDBServiceType.lower()
                    csST = downDic['SERVICE_TYPE'].lower()
                    if gocdbST != csST:
                        return S_ERROR(
                            "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s"
                            % (gocdbST, csST, dt['Name']))
            else:
                #WARNING: do we want None as default value?
                dt['gOCDBServiceType'] = None

            dt['DowntimeID'] = downtime
            dt['Element'] = element
            dt['StartDate'] = downDic['FORMATED_START_DATE']
            dt['EndDate'] = downDic['FORMATED_END_DATE']
            dt['Severity'] = downDic['SEVERITY']
            dt['Description'] = downDic['DESCRIPTION'].replace('\'', '')
            dt['Link'] = downDic['GOCDB_PORTAL_URL']

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes['OK']:
            return storeRes

        return S_OK()
Esempio n. 2
0
class DowntimeCommand( Command ):
  '''
    Downtime "master" Command.
  '''

  def __init__( self, args = None, clients = None ):

    super( DowntimeCommand, self ).__init__( args, clients )

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis[ 'GOCDBClient' ]
    else:
      self.gClient = GOCDBClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis[ 'ResourceManagementClient' ]
    else:
      self.rmClient = ResourceManagementClient()

  def _storeCommand( self, result ):
    '''
      Stores the results of doNew method on the database.
    '''

    for dt in result:
      resQuery = self.rmClient.addOrModifyDowntimeCache( 
                               downtimeID = dt[ 'DowntimeID' ],
                               element = dt[ 'Element' ],
                               name = dt[ 'Name' ],
                               startDate = dt[ 'StartDate' ],
                               endDate = dt[ 'EndDate' ],
                               severity = dt[ 'Severity' ],
                               description = dt[ 'Description' ],
                               link = dt[ 'Link' ],
                               gocdbServiceType = dt[ 'GOCDBServiceType' ] )
      if not resQuery[ 'OK' ]:
        return resQuery
    return S_OK()

  def _prepareCommand( self ):
    '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

    if 'name' not in self.args:
      return S_ERROR( '"name" not found in self.args' )
    elementName = self.args[ 'name' ]

    if 'element' not in self.args:
      return S_ERROR( '"element" not found in self.args' )
    element = self.args[ 'element' ]

    if 'elementType' not in self.args:
      return S_ERROR( '"elementType" not found in self.args' )
    elementType = self.args[ 'elementType' ]

    if not element in [ 'Site', 'Resource' ]:
      return S_ERROR( 'element is not Site nor Resource' )

    hours = None
    if 'hours' in self.args:
      hours = self.args[ 'hours' ]

    gocdbServiceType = None

    # Transform DIRAC site names into GOCDB topics
    if element == 'Site':

      gocSite = getGOCSiteName( elementName )
      if not gocSite[ 'OK' ]:
        return gocSite
      elementName = gocSite[ 'Value' ]

    # The DIRAC se names mean nothing on the grid, but their hosts do mean.
    elif elementType == 'StorageElement':
      # We need to distinguish if it's tape or disk
      if getStorageElementOptions( elementName )['Value']['TapeSE']:
        gocdbServiceType = "srm"
      elif getStorageElementOptions( elementName )['Value']['DiskSE']:
        gocdbServiceType = "srm.nearline"

      seHost = CSHelpers.getSEHost( elementName )
      if not seHost:
        return S_ERROR( 'No seHost for %s' % elementName )
      elementName = seHost

    return S_OK( ( element, elementName, hours, gocdbServiceType ) )

  def doNew( self, masterParams = None ):
    '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

    if masterParams is not None:
      element, elementNames = masterParams
      hours = None
      elementName = None
      gocdbServiceType = None
    else:
      params = self._prepareCommand()
      if not params[ 'OK' ]:
        return params
      element, elementName, hours, gocdbServiceType = params[ 'Value' ]
      elementNames = [ elementName ]

    startDate = datetime.utcnow() - timedelta( days = 14 )

    try:
      results = self.gClient.getStatus( element, elementName, startDate, 120 )
    except urllib2.URLError:
      try:
        #Let's give it a second chance..
        results = self.gClient.getStatus( element, elementName, startDate, 120 )
      except urllib2.URLError, e:
        return S_ERROR( e )

    if not results[ 'OK' ]:
      return results
    results = results[ 'Value' ]

    if results is None:
      return S_OK( None )

    uniformResult = []

    # Humanize the results into a dictionary, not the most optimal, but readable
    for downtime, downDic in results.items():

      dt = {}
      if gocdbServiceType and downDic[ 'SERVICE_TYPE' ]:
        if  gocdbServiceType.lower() != downDic[ 'SERVICE_TYPE' ].lower():
          continue
      if element == 'Resource':
        dt[ 'Name' ] = downDic[ 'HOSTNAME' ]
      else:
        dt[ 'Name' ] = downDic[ 'SITENAME' ]

      if not dt[ 'Name' ] in elementNames:
        continue

      dt[ 'DowntimeID' ] = downtime
      dt[ 'Element' ] = element
      dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ]
      dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ]
      dt[ 'Severity' ] = downDic[ 'SEVERITY' ]
      dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' )
      dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ]
      try:
        dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ]
      except KeyError:
        # SERVICE_TYPE is not always defined
        pass

      uniformResult.append( dt )

    storeRes = self._storeCommand( uniformResult )
    if not storeRes[ 'OK' ]:
      return storeRes

    # We return only one downtime, if its ongoing at dtDate
    startDate = datetime.utcnow()
    if hours:
      startDate = startDate + timedelta( hours = hours )
    endDate = startDate

    result = None
    dtOutages = []
    dtWarnings = []

    for dt in uniformResult:
      if ( dt[ 'StartDate' ] < str( startDate ) ) and ( dt[ 'EndDate' ] > str( endDate ) ):
        if dt[ 'Severity' ] == 'Outage':
          dtOutages.append( dt )
        else:
          dtWarnings.append( dt )

    #In case many overlapping downtimes have been declared, the first one in
    #severity and then time order will be selected. We want to get the latest one
    #( they are sorted by insertion time )
    if len( dtOutages ) > 0:
      result = dtOutages[-1]
    elif len( dtWarnings ) > 0:
      result = dtWarnings[-1]

    return S_OK( result )
Esempio n. 3
0
class GOCDBSyncCommand(Command):

  def __init__(self, args=None, clients=None):

    super(GOCDBSyncCommand, self).__init__(args, clients)

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis['GOCDBClient']
    else:
      self.gClient = GOCDBClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis['ResourceManagementClient']
    else:
      self.rmClient = ResourceManagementClient()

    self.seenHostnames = set()

  def doNew(self, masterParams=None):
    """
    Gets the downtime IDs and dates of a given hostname from the local database and compares the results
    with the remote database of GOCDB. If the downtime dates have been changed it updates the local database.

    :param: `masterParams` - string
    :return: S_OK / S_ERROR
    """

    if masterParams:
      hostname = masterParams
    else:
      return S_ERROR(errno.EINVAL, 'masterParams is not provided')

    result = self.rmClient.selectDowntimeCache(name=hostname)
    if not result['OK']:
      return result

    for downtimes in result['Value']:

      localDBdict = {'DowntimeID': downtimes[3],
                     'FORMATED_START_DATE': downtimes[6].strftime('%Y-%m-%d %H:%M'),
                     'FORMATED_END_DATE': downtimes[7].strftime('%Y-%m-%d %H:%M')}

      response = self.gClient.getHostnameDowntime(hostname, ongoing=True)

      if not response['OK']:
        return response

      doc = minidom.parseString(response['Value'])
      downtimeElements = doc.getElementsByTagName("DOWNTIME")

      for dtElement in downtimeElements:
        GOCDBdict = _parseSingleElement(dtElement, ['PRIMARY_KEY', 'ENDPOINT',
                                                    'FORMATED_START_DATE', 'FORMATED_END_DATE'])

        localDowntimeID = localDBdict['DowntimeID']
        GOCDBDowntimeID = GOCDBdict['PRIMARY_KEY'] + ' ' + GOCDBdict['ENDPOINT']

        if localDowntimeID == GOCDBDowntimeID:

          if localDBdict['FORMATED_START_DATE'] != GOCDBdict['FORMATED_START_DATE']:
            result = self.rmClient.addOrModifyDowntimeCache(downtimeID=localDBdict['DowntimeID'],
                                                            startDate=GOCDBdict['FORMATED_START_DATE'])
            gLogger.verbose("The start date of %s has been changed!" % downtimes[3])

            if not result['OK']:
              return result

          if localDBdict['FORMATED_END_DATE'] != GOCDBdict['FORMATED_END_DATE']:
            result = self.rmClient.addOrModifyDowntimeCache(downtimeID=localDBdict['DowntimeID'],
                                                            endDate=GOCDBdict['FORMATED_END_DATE'])
            gLogger.verbose("The end date of %s has been changed!" % downtimes[3])

            if not result['OK']:
              return result

    return S_OK()

  def doCache(self):
    return S_OK()

  def doMaster(self):
    """
    This method calls the doNew method for each hostname that exists
    in the DowntimeCache table of the local database.

    :return: S_OK / S_ERROR
    """

    # Query DB for all downtimes
    result = self.rmClient.selectDowntimeCache()
    if not result['OK']:
      return result

    for data in result['Value']:

      # If already processed don't do it again
      if data[0] in self.seenHostnames:
        continue

      # data[0] contains the hostname
      gLogger.verbose("Checking if the downtime of %s has been changed" % data[0])
      result = self.doNew(data[0])
      if not result['OK']:
        return result

      self.seenHostnames.add(data[0])

    return S_OK()
Esempio n. 4
0
class DowntimeCommand( Command ):
  '''
    Downtime "master" Command.    
  '''

  def __init__( self, args = None, clients = None ):
    
    super( DowntimeCommand, self ).__init__( args, clients )

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis[ 'GOCDBClient' ]
    else:
      self.gClient = GOCDBClient() 

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis[ 'ResourceManagementClient' ]
    else:
      self.rmClient = ResourceManagementClient()
      
  def _storeCommand( self, result ):
    '''
      Stores the results of doNew method on the database.
    '''

    for dt in result:
      
      resQuery = self.rmClient.addOrModifyDowntimeCache( dt[ 'DowntimeID' ], 
                                                         dt[ 'Element' ], 
                                                         dt[ 'Name' ], 
                                                         dt[ 'StartDate' ], 
                                                         dt[ 'EndDate' ], 
                                                         dt[ 'Severity' ], 
                                                         dt[ 'Description' ], 
                                                         dt[ 'Link' ] )  
      if not resQuery[ 'OK' ]:
        return resQuery
    return S_OK()
  
  def _prepareCommand( self ):
    '''
      DowntimeCommand requires three arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>  
      
      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''
    
    if 'name' not in self.args:
      return S_ERROR( '"name" not found in self.args' )
    elementName = self.args[ 'name' ]      
    
    if 'element' not in self.args:
      return S_ERROR( '"element" not found in self.args' )
    element = self.args[ 'element' ]
    
    if 'elementType' not in self.args:
      return S_ERROR( '"elementType" not found in self.args' )
    elementType = self.args[ 'elementType' ]
    
    if not element in [ 'Site', 'Resource' ]:
      return S_ERROR( 'element is not Site nor Resource' )   

    hours = None
    if 'hours' in self.args:
      hours = self.args[ 'hours' ]
    
    # Transform DIRAC site names into GOCDB topics
    if element == 'Site':

      gocSite = getGOCSiteName( elementName )
      if not gocSite[ 'OK' ]:
        return gocSite
      elementName = gocSite[ 'Value' ]
          
    # The DIRAC se names mean nothing on the grid, but their hosts do mean.
    elif elementType == 'StorageElement':
      
      seHost = CSHelpers.getSEHost( elementName )
      if not seHost:
        return S_ERROR( 'No seHost for %s' % elementName )
      elementName = seHost
             
    return S_OK( ( element, elementName, hours ) )

  def doNew( self, masterParams = None ):
    '''
      Gets the parameters to run, either from the master method or from its
      own arguments.
      
      For every elementName, unless it is given a list, in which case it contacts 
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.
      
      If there are downtimes, are recorded and then returned.
    '''
    
    if masterParams is not None:
      element, elementNames = masterParams
      hours       = None
      elementName = None
    else:
      params = self._prepareCommand()
      if not params[ 'OK' ]:
        return params
      element, elementName, hours = params[ 'Value' ]  
      elementNames = [ elementName ]     

    startDate = datetime.utcnow() - timedelta( days = 2 )
          
    try:
      results = self.gClient.getStatus( element, elementName, startDate, 120 )
    except urllib2.URLError:
      try:
        #Let's give it a second chance..
        results = self.gClient.getStatus( element, elementName, startDate, 120 )
      except urllib2.URLError, e:
        return S_ERROR( e )
                  
    if not results[ 'OK' ]:
      return results
    results = results[ 'Value' ]

    if results is None:
      return S_OK( None )

    uniformResult = []
      
    # Humanize the results into a dictionary, not the most optimal, but readable
    for downtime, downDic in results.items():

      dt                  = {}
      if element == 'Resource':
        dt[ 'Name' ]        = downDic[ 'HOSTNAME' ]
      else:
        dt[ 'Name' ] = downDic[ 'SITENAME' ]
      
      if not dt[ 'Name' ] in elementNames:
        continue
      
      dt[ 'DowntimeID' ]  = downtime
      dt[ 'Element' ]     = element
      dt[ 'StartDate' ]   = downDic[ 'FORMATED_START_DATE' ]
      dt[ 'EndDate' ]     = downDic[ 'FORMATED_END_DATE' ]
      dt[ 'Severity' ]    = downDic[ 'SEVERITY' ]
      dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' )
      dt[ 'Link' ]        = downDic[ 'GOCDB_PORTAL_URL' ]
     
      uniformResult.append( dt )  
      
    storeRes = self._storeCommand( uniformResult )
    if not storeRes[ 'OK' ]:
      return storeRes
    
    # We return only one downtime, if its ongoind at dtDate
    dtDate = datetime.now()     
    if hours:
      dtDate = dtDate + timedelta( hours = hours )

    result = None           
    for dt in uniformResult:
      
      if ( dt[ 'StartDate' ] < str( dtDate ) ) and ( dt[ 'EndDate' ] > str( dtDate ) ):
        result = dt
        break        
           
    return S_OK( result )            
Esempio n. 5
0
class DowntimeCommand(Command):
    '''
    Downtime "master" Command or removed DTs.
  '''
    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if 'GOCDBClient' in self.apis:
            self.gClient = self.apis['GOCDBClient']
        else:
            self.gClient = GOCDBClient()

        if 'ResourceManagementClient' in self.apis:
            self.rmClient = self.apis['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        '''
      Stores the results of doNew method on the database.
    '''

        for dt in result:
            resQuery = self.rmClient.addOrModifyDowntimeCache(
                downtimeID=dt['DowntimeID'],
                element=dt['Element'],
                name=dt['Name'],
                startDate=dt['StartDate'],
                endDate=dt['EndDate'],
                severity=dt['Severity'],
                description=dt['Description'],
                link=dt['Link'],
                gOCDBServiceType=dt['gOCDBServiceType'])
        return resQuery

    def _cleanCommand(self, element, elementNames):
        '''
      Clear Cache from expired DT.
    '''

        resQuery = []

        for elementName in elementNames:
            # get the list of all DTs stored in the cache
            result = self.rmClient.selectDowntimeCache(element=element,
                                                       name=elementName)

            if not result['OK']:
                return result

            uniformResult = [
                dict(zip(result['Columns'], res)) for res in result['Value']
            ]

            currentDate = datetime.utcnow()

            if not uniformResult:
                continue

            # get the list of all ongoing DTs from GocDB
            gDTLinkList = self.gClient.getCurrentDTLinkList()
            if not gDTLinkList['OK']:
                return gDTLinkList

            for dt in uniformResult:
                # if DT expired or DT not in the list of current DTs, then we remove it from the cache
                if dt['EndDate'] < currentDate or dt[
                        'Link'] not in gDTLinkList['Value']:
                    result = self.rmClient.deleteDowntimeCache(
                        downtimeID=dt['DowntimeID'])
                    resQuery.append(result)

        return S_OK(resQuery)

    def _prepareCommand(self):
        '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

        if 'name' not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args['name']

        if 'element' not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args['element']

        if 'elementType' not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args['elementType']

        if element not in ['Site', 'Resource']:
            return S_ERROR('element is neither Site nor Resource')

        hours = None
        if 'hours' in self.args:
            hours = self.args['hours']

        gOCDBServiceType = None

        # Transform DIRAC site names into GOCDB topics
        if element == 'Site':

            gocSite = getGOCSiteName(elementName)
            if not gocSite[
                    'OK']:  # The site is most probably is not a grid site - not an issue, of course
                pass  # so, elementName remains unchanged
            else:
                elementName = gocSite['Value']

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == 'StorageElement':
            # We need to distinguish if it's tape or disk
            try:
                seOptions = StorageElement(elementName).options
            except AttributeError:  # Sometimes the SE can't be instantiated properly
                self.log.error(
                    "Failure instantiating StorageElement object for %s" %
                    elementName)
                return S_ERROR("Failure instantiating StorageElement")
            if 'SEType' in seOptions:
                # Type should follow the convention TXDY
                seType = seOptions['SEType']
                diskSE = re.search('D[1-9]', seType) != None
                tapeSE = re.search('T[1-9]', seType) != None
                if tapeSE:
                    gOCDBServiceType = "srm.nearline"
                elif diskSE:
                    gOCDBServiceType = "srm"

            seHost = CSHelpers.getSEHost(elementName)
            if not seHost['OK']:
                return seHost
            seHost = seHost['Value']

            if not seHost:
                return S_ERROR('No seHost for %s' % elementName)
            elementName = seHost

        elif elementType in ['FTS', 'FTS3']:
            gOCDBServiceType = 'FTS'
            # WARNING: this method presupposes that the server is an FTS3 type
            gocSite = getGOCFTSName(elementName)
            if not gocSite['OK']:
                self.log.warn("%s not in Resources/FTSEndpoints/FTS3 ?" %
                              elementName)
            else:
                elementName = gocSite['Value']

        return S_OK((element, elementName, hours, gOCDBServiceType))

    def doNew(self, masterParams=None):
        '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

        if masterParams is not None:
            element, elementNames = masterParams
            hours = 120
            elementName = None
            gOCDBServiceType = None

        else:
            params = self._prepareCommand()
            if not params['OK']:
                return params
            element, elementName, hours, gOCDBServiceType = params['Value']
            elementNames = [elementName]

        # WARNING: checking all the DT that are ongoing or starting in given <hours> from now
        try:
            results = self.gClient.getStatus(element,
                                             name=elementNames,
                                             startingInHours=hours)
        except urllib2.URLError:
            try:
                # Let's give it a second chance..
                results = self.gClient.getStatus(element,
                                                 name=elementNames,
                                                 startingInHours=hours)
            except urllib2.URLError as e:
                return S_ERROR(e)

        if not results['OK']:
            return results
        results = results['Value']

        if results is None:  # no downtimes found
            return S_OK(None)

        # cleaning the Cache
        cleanRes = self._cleanCommand(element, elementNames)
        if not cleanRes['OK']:
            return cleanRes

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():

            dt = {}

            if 'HOSTNAME' in downDic.keys():
                dt['Name'] = downDic['HOSTNAME']
            elif 'SITENAME' in downDic.keys():
                dt['Name'] = downDic['SITENAME']
            else:
                return S_ERROR("SITENAME or HOSTNAME are missing")

            if 'SERVICE_TYPE' in downDic.keys():
                dt['gOCDBServiceType'] = downDic['SERVICE_TYPE']
                if gOCDBServiceType:
                    gocdbST = gOCDBServiceType.lower()
                    csST = downDic['SERVICE_TYPE'].lower()
                    if gocdbST != csST:
                        return S_ERROR(
                            "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s"
                            % (gocdbST, csST, dt['Name']))
            else:
                # WARNING: do we want None as default value?
                dt['gOCDBServiceType'] = None

            dt['DowntimeID'] = downtime
            dt['Element'] = element
            dt['StartDate'] = downDic['FORMATED_START_DATE']
            dt['EndDate'] = downDic['FORMATED_END_DATE']
            dt['Severity'] = downDic['SEVERITY']
            dt['Description'] = downDic['DESCRIPTION'].replace('\'', '')
            dt['Link'] = downDic['GOCDB_PORTAL_URL']

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes['OK']:
            return storeRes

        return S_OK()

    def doCache(self):
        '''
      Method that reads the cache table and tries to read from it. It will
      return a list with one dictionary describing the DT if there are results.
    '''

        params = self._prepareCommand()
        if not params['OK']:
            return params
        element, elementName, hours, gOCDBServiceType = params['Value']

        result = self.rmClient.selectDowntimeCache(
            element=element,
            name=elementName,
            gOCDBServiceType=gOCDBServiceType)

        if not result['OK']:
            return result

        uniformResult = [
            dict(zip(result['Columns'], res)) for res in result['Value']
        ]

        #'targetDate' can be either now or some 'hours' later in the future
        targetDate = datetime.utcnow()

        # dtOverlapping is a buffer to assure only one dt is returned
        # when there are overlapping outage/warning dt for same element
        # on top of the buffer we put the most recent outages
        # while at the bottom the most recent warnings,
        # assumption: uniformResult list is already ordered by resource/site name, severity, startdate
        dtOverlapping = []

        if hours is not None:
            # IN THE FUTURE
            targetDate = targetDate + timedelta(hours=hours)
            # sorting by 'StartDate' b/c if we look for DTs in the future
            # then we are interested in the earliest DTs
            uniformResult.sort(key=itemgetter('Name', 'Severity', 'StartDate'))

            for dt in uniformResult:
                if (dt['StartDate'] < targetDate) and (dt['EndDate'] >
                                                       targetDate):
                    # the list is already ordered in a way that outages come first over warnings
                    # and the earliest outages are on top of other outages and warnings
                    # while the earliest warnings are on top of the other warnings
                    # so what ever comes first in the list is also what we are looking for
                    dtOverlapping = [dt]
                    break
        else:
            # IN THE PRESENT
            # sorting by 'EndDate' b/c if we look for DTs in the present
            # then we are interested in those DTs that last longer
            uniformResult.sort(key=itemgetter('Name', 'Severity', 'EndDate'))

            for dt in uniformResult:
                if (dt['StartDate'] < targetDate) and (dt['EndDate'] >
                                                       targetDate):
                    # if outage, we put it on top of the overlapping buffer
                    # i.e. the latest ending outage is on top
                    if dt['Severity'].upper() == 'OUTAGE':
                        dtOverlapping = [dt] + dtOverlapping
                    # if warning, we put it at the bottom of the overlapping buffer
                    # i.e. the latest ending warning is at the bottom
                    elif dt['Severity'].upper() == 'WARNING':
                        dtOverlapping.append(dt)

        result = None
        if len(dtOverlapping) > 0:
            dtTop = dtOverlapping[0]
            dtBottom = dtOverlapping[-1]
            if dtTop['Severity'].upper() == 'OUTAGE':
                result = dtTop
            else:
                result = dtBottom

        return S_OK(result)

    def doMaster(self):
        ''' Master method, which looks little bit spaghetti code, sorry !
        - It gets all sites and transforms them into gocSites.
        - It gets all the storage elements and transforms them into their hosts
        - It gets the the CEs (FTS and file catalogs will come).
    '''

        gocSites = CSHelpers.getGOCSites()
        if not gocSites['OK']:
            return gocSites
        gocSites = gocSites['Value']

        sesHosts = CSHelpers.getStorageElementsHosts()
        if not sesHosts['OK']:
            return sesHosts
        sesHosts = sesHosts['Value']

        resources = sesHosts

        ftsServer = getFTS3Servers()
        if ftsServer['OK']:
            resources.extend(ftsServer['Value'])

        # TODO: file catalogs need also to use their hosts

        #fc = CSHelpers.getFileCatalogs()
        # if fc[ 'OK' ]:
        #  resources = resources + fc[ 'Value' ]

        ce = CSHelpers.getComputingElements()
        if ce['OK']:
            resources.extend(ce['Value'])

        self.log.verbose('Processing Sites: %s' % ', '.join(gocSites))

        siteRes = self.doNew(('Site', gocSites))
        if not siteRes['OK']:
            self.metrics['failed'].append(siteRes['Message'])

        self.log.verbose('Processing Resources: %s' % ', '.join(resources))

        resourceRes = self.doNew(('Resource', resources))
        if not resourceRes['OK']:
            self.metrics['failed'].append(resourceRes['Message'])

        return S_OK(self.metrics)
Esempio n. 6
0
class DowntimeCommand(Command):
    '''
    Downtime "master" Command.    
  '''
    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if 'GOCDBClient' in self.apis:
            self.gClient = self.apis['GOCDBClient']
        else:
            self.gClient = GOCDBClient()

        if 'ResourceManagementClient' in self.apis:
            self.rmClient = self.apis['ResourceManagementClient']
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        '''
      Stores the results of doNew method on the database.
    '''

        for dt in result:

            resQuery = self.rmClient.addOrModifyDowntimeCache(
                dt['DowntimeID'], dt['Element'], dt['Name'], dt['StartDate'],
                dt['EndDate'], dt['Severity'], dt['Description'], dt['Link'])
            if not resQuery['OK']:
                return resQuery
        return S_OK()

    def _prepareCommand(self):
        '''
      DowntimeCommand requires three arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>  
      
      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

        if 'name' not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args['name']

        if 'element' not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args['element']

        if 'elementType' not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args['elementType']

        if not element in ['Site', 'Resource']:
            return S_ERROR('element is not Site nor Resource')

        hours = None
        if 'hours' in self.args:
            hours = self.args['hours']

        # Transform DIRAC site names into GOCDB topics
        if element == 'Site':

            gocSite = getGOCSiteName(elementName)
            if not gocSite['OK']:
                return gocSite
            elementName = gocSite['Value']

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == 'StorageElement':

            seHost = CSHelpers.getSEHost(elementName)
            if not seHost:
                return S_ERROR('No seHost for %s' % elementName)
            elementName = seHost

        return S_OK((element, elementName, hours))

    def doNew(self, masterParams=None):
        '''
      Gets the parameters to run, either from the master method or from its
      own arguments.
      
      For every elementName, unless it is given a list, in which case it contacts 
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.
      
      If there are downtimes, are recorded and then returned.
    '''

        if masterParams is not None:
            element, elementNames = masterParams
            hours = None
            elementName = None
        else:
            params = self._prepareCommand()
            if not params['OK']:
                return params
            element, elementName, hours = params['Value']
            elementNames = [elementName]

        startDate = datetime.utcnow() - timedelta(days=2)

        try:
            results = self.gClient.getStatus(element, elementName, startDate,
                                             120)
        except urllib2.URLError:
            try:
                #Let's give it a second chance..
                results = self.gClient.getStatus(element, elementName,
                                                 startDate, 120)
            except urllib2.URLError, e:
                return S_ERROR(e)

        if not results['OK']:
            return results
        results = results['Value']

        if results is None:
            return S_OK(None)

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():

            dt = {}
            if element == 'Resource':
                dt['Name'] = downDic['HOSTNAME']
            else:
                dt['Name'] = downDic['SITENAME']

            if not dt['Name'] in elementNames:
                continue

            dt['DowntimeID'] = downtime
            dt['Element'] = element
            dt['StartDate'] = downDic['FORMATED_START_DATE']
            dt['EndDate'] = downDic['FORMATED_END_DATE']
            dt['Severity'] = downDic['SEVERITY']
            dt['Description'] = downDic['DESCRIPTION'].replace('\'', '')
            dt['Link'] = downDic['GOCDB_PORTAL_URL']

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes['OK']:
            return storeRes

        # We return only one downtime, if its ongoind at dtDate
        startDate = datetime.now()
        endDate = startDate
        if hours:
            startDate = startDate + timedelta(hours=hours)

        result = None
        for dt in uniformResult:

            if (dt['StartDate'] < str(startDate)) and (dt['EndDate'] >
                                                       str(endDate)):
                result = dt
                break

        return S_OK(result)
Esempio n. 7
0
class DowntimeCommand( Command ):
  '''
    Downtime "master" Command.
  '''

  def __init__( self, args = None, clients = None ):

    super( DowntimeCommand, self ).__init__( args, clients )

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis[ 'GOCDBClient' ]
    else:
      self.gClient = GOCDBClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis[ 'ResourceManagementClient' ]
    else:
      self.rmClient = ResourceManagementClient()

  def _storeCommand( self, result ):
    '''
      Stores the results of doNew method on the database.
    '''

    for dt in result:
      resQuery = self.rmClient.addOrModifyDowntimeCache( 
                               downtimeID = dt[ 'DowntimeID' ],
                               element = dt[ 'Element' ],
                               name = dt[ 'Name' ],
                               startDate = dt[ 'StartDate' ],
                               endDate = dt[ 'EndDate' ],
                               severity = dt[ 'Severity' ],
                               description = dt[ 'Description' ],
                               link = dt[ 'Link' ],
                               gocdbServiceType = dt[ 'GOCDBServiceType' ] )
    return resQuery
  
  
  def _cleanCommand( self, element, elementNames):
    '''
      Clear Cache from expired DT.
    '''
    
    resQuery = []
    
    for elementName in elementNames:
      #reading all the cache entries
      result = self.rmClient.selectDowntimeCache( 
                               element = element,
                               name = elementName
                               )

      if not result[ 'OK' ]:
        return result

      uniformResult = [ dict( zip( result[ 'Columns' ], res ) ) for res in result[ 'Value' ] ]
    
      currentDate = datetime.utcnow()
    
      if len(uniformResult) == 0:
        return S_OK( None ) 
    
      for dt in uniformResult:
        if dt[ 'EndDate' ] < currentDate:
          result = self.rmClient.deleteDowntimeCache ( 
                               downtimeID = dt[ 'DowntimeID' ]
                               )
          resQuery.append(result)
          
    return S_OK( resQuery )


  def _prepareCommand( self ):
    '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

    if 'name' not in self.args:
      return S_ERROR( '"name" not found in self.args' )
    elementName = self.args[ 'name' ]

    if 'element' not in self.args:
      return S_ERROR( '"element" not found in self.args' )
    element = self.args[ 'element' ]

    if 'elementType' not in self.args:
      return S_ERROR( '"elementType" not found in self.args' )
    elementType = self.args[ 'elementType' ]

    if not element in [ 'Site', 'Resource' ]:
      return S_ERROR( 'element is neither Site nor Resource' )

    hours = None
    if 'hours' in self.args:
      hours = self.args[ 'hours' ]

    gocdbServiceType = None

    # Transform DIRAC site names into GOCDB topics
    if element == 'Site':

      gocSite = getGOCSiteName( elementName )
      if not gocSite[ 'OK' ]:
        return gocSite
      elementName = gocSite[ 'Value' ]

    # The DIRAC se names mean nothing on the grid, but their hosts do mean.
    elif elementType == 'StorageElement':
      # We need to distinguish if it's tape or disk
      if getStorageElementOptions( elementName )['Value']['TapeSE']:
        gocdbServiceType = "srm.nearline"
      elif getStorageElementOptions( elementName )['Value']['DiskSE']:
        gocdbServiceType = "srm"

      seHost = CSHelpers.getSEHost( elementName )
      if not seHost:
        return S_ERROR( 'No seHost for %s' % elementName )
      elementName = seHost
      
    elif elementType == 'FTS' or elementType == 'FTS3':
      gocdbServiceType = 'FTS'
      try:
        #WARNING: this method presupposes that the server is an FTS3 type
        elementName  = getGOCFTSName(elementName)
      except:
        return S_ERROR( 'No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)' )

    return S_OK( ( element, elementName, hours, gocdbServiceType ) )


  def doNew( self, masterParams = None ):
    '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

    if masterParams is not None:
      element, elementNames = masterParams
      #translate DIRAC CS elementNames into GOCDB elementNames
      translatedElementNames = []
      for e in elementNames:
        translatedElementNames.append(CSHelpers.getSEHost( e ))
      elementNames = translatedElementNames
      hours = None
      elementName = None
      gocdbServiceType = None

    else:
      params = self._prepareCommand()
      if not params[ 'OK' ]:
        return params
      element, elementName, hours, gocdbServiceType = params[ 'Value' ]
      elementNames = [ elementName ]

    #WARNING: checking all the DT that are ongoing or starting in given <hours> from now
    startDate = None 
    if hours is not None:
      startDate = datetime.utcnow() + timedelta( hours = hours )

    try:
      results = self.gClient.getStatus( element, elementNames, startDate )
    except urllib2.URLError:
      try:
        #Let's give it a second chance..
        results = self.gClient.getStatus( element, elementNames, startDate )
      except urllib2.URLError, e:
        return S_ERROR( e )

    if not results[ 'OK' ]:
      return results
    results = results[ 'Value' ]

    if results is None:
      return S_OK( None )
    
    
    #cleaning the Cache
    cleanRes = self._cleanCommand(element, elementNames)
    if not cleanRes[ 'OK' ]:
      return cleanRes
    

    uniformResult = []

    # Humanize the results into a dictionary, not the most optimal, but readable
    for downtime, downDic in results.items():

      dt = {}
      
      if 'HOSTNAME' in downDic.keys():
        dt[ 'Name' ] = downDic[ 'HOSTNAME' ]
      elif 'SITENAME' in downDic.keys():
        dt[ 'Name' ] = downDic[ 'SITENAME' ]
      else:
        return S_ERROR( "SITENAME or HOSTNAME are missing" )
         
      
      if 'SERVICE_TYPE' in downDic.keys():
        dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ]
        if gocdbServiceType:
          gocdbST = gocdbServiceType.lower()
          csST = downDic[ 'SERVICE_TYPE' ].lower()
          if gocdbST != csST:
            return S_ERROR( "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt[ 'Name' ]) )          
      else:
        #WARNING: do we want None as default value?
        dt[ 'GOCDBServiceType' ] = None

      dt[ 'DowntimeID' ] = downtime
      dt[ 'Element' ] = element
      dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ]
      dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ]
      dt[ 'Severity' ] = downDic[ 'SEVERITY' ]
      dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' )
      dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ]

      uniformResult.append( dt )

    storeRes = self._storeCommand( uniformResult )
    if not storeRes[ 'OK' ]:
      return storeRes

    return S_OK()
Esempio n. 8
0
class DowntimeCommand(Command):
    """
    Downtime "master" Command.    
  """

    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if "GOCDBClient" in self.apis:
            self.gClient = self.apis["GOCDBClient"]
        else:
            self.gClient = GOCDBClient()

        if "ResourceManagementClient" in self.apis:
            self.rmClient = self.apis["ResourceManagementClient"]
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        """
      Stores the results of doNew method on the database.
    """

        for dt in result:

            resQuery = self.rmClient.addOrModifyDowntimeCache(
                dt["DowntimeID"],
                dt["Element"],
                dt["Name"],
                dt["StartDate"],
                dt["EndDate"],
                dt["Severity"],
                dt["Description"],
                dt["Link"],
            )
            if not resQuery["OK"]:
                return resQuery
        return S_OK()

    def _prepareCommand(self):
        """
      DowntimeCommand requires three arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>  
      
      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    """

        if "name" not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args["name"]

        if "element" not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args["element"]

        if "elementType" not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args["elementType"]

        if not element in ["Site", "Resource"]:
            return S_ERROR("element is not Site nor Resource")

        hours = None
        if "hours" in self.args:
            hours = self.args["hours"]

        # Transform DIRAC site names into GOCDB topics
        if element == "Site":

            gocSite = getGOCSiteName(elementName)
            if not gocSite["OK"]:
                return gocSite
            elementName = gocSite["Value"]

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == "StorageElement":

            seHost = CSHelpers.getSEHost(elementName)
            if not seHost:
                return S_ERROR("No seHost for %s" % elementName)
            elementName = seHost

        return S_OK((element, elementName, hours))

    def doNew(self, masterParams=None):
        """
      Gets the parameters to run, either from the master method or from its
      own arguments.
      
      For every elementName, unless it is given a list, in which case it contacts 
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.
      
      If there are downtimes, are recorded and then returned.
    """

        if masterParams is not None:
            element, elementNames = masterParams
            hours = None
            elementName = None
        else:
            params = self._prepareCommand()
            if not params["OK"]:
                return params
            element, elementName, hours = params["Value"]
            elementNames = [elementName]

        startDate = datetime.utcnow() - timedelta(days=14)

        try:
            results = self.gClient.getStatus(element, elementName, startDate, 120)
        except urllib2.URLError:
            try:
                # Let's give it a second chance..
                results = self.gClient.getStatus(element, elementName, startDate, 120)
            except urllib2.URLError, e:
                return S_ERROR(e)

        if not results["OK"]:
            return results
        results = results["Value"]

        if results is None:
            return S_OK(None)

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():

            dt = {}
            if element == "Resource":
                dt["Name"] = downDic["HOSTNAME"]
            else:
                dt["Name"] = downDic["SITENAME"]

            if not dt["Name"] in elementNames:
                continue

            dt["DowntimeID"] = downtime
            dt["Element"] = element
            dt["StartDate"] = downDic["FORMATED_START_DATE"]
            dt["EndDate"] = downDic["FORMATED_END_DATE"]
            dt["Severity"] = downDic["SEVERITY"]
            dt["Description"] = downDic["DESCRIPTION"].replace("'", "")
            dt["Link"] = downDic["GOCDB_PORTAL_URL"]

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes["OK"]:
            return storeRes

        # We return only one downtime, if its ongoind at dtDate
        startDate = datetime.utcnow()
        endDate = startDate
        if hours:
            startDate = startDate + timedelta(hours=hours)

        result = None
        for dt in uniformResult:

            if (dt["StartDate"] < str(startDate)) and (dt["EndDate"] > str(endDate)):
                result = dt
                # We want to take the latest one ( they are sorted by insertion time )
                # break

        return S_OK(result)
Esempio n. 9
0
class DowntimeCommand( Command ):
  '''
    Downtime "master" Command.
  '''

  def __init__( self, args = None, clients = None ):

    super( DowntimeCommand, self ).__init__( args, clients )

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis[ 'GOCDBClient' ]
    else:
      self.gClient = GOCDBClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis[ 'ResourceManagementClient' ]
    else:
      self.rmClient = ResourceManagementClient()

  def _storeCommand( self, result ):
    '''
      Stores the results of doNew method on the database.
    '''

    for dt in result:
      resQuery = self.rmClient.addOrModifyDowntimeCache( 
                               downtimeID = dt[ 'DowntimeID' ],
                               element = dt[ 'Element' ],
                               name = dt[ 'Name' ],
                               startDate = dt[ 'StartDate' ],
                               endDate = dt[ 'EndDate' ],
                               severity = dt[ 'Severity' ],
                               description = dt[ 'Description' ],
                               link = dt[ 'Link' ],
                               gocdbServiceType = dt[ 'GOCDBServiceType' ] )
      if not resQuery[ 'OK' ]:
        return resQuery
    return S_OK()

  def _prepareCommand( self ):
    '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

    if 'name' not in self.args:
      return S_ERROR( '"name" not found in self.args' )
    elementName = self.args[ 'name' ]

    if 'element' not in self.args:
      return S_ERROR( '"element" not found in self.args' )
    element = self.args[ 'element' ]

    if 'elementType' not in self.args:
      return S_ERROR( '"elementType" not found in self.args' )
    elementType = self.args[ 'elementType' ]

    if not element in [ 'Site', 'Resource' ]:
      return S_ERROR( 'element is not Site nor Resource' )

    hours = None
    if 'hours' in self.args:
      hours = self.args[ 'hours' ]

    gocdbServiceType = None

    # Transform DIRAC site names into GOCDB topics
    if element == 'Site':

      gocSite = getGOCSiteName( elementName )
      if not gocSite[ 'OK' ]:
        return gocSite
      elementName = gocSite[ 'Value' ]

    # The DIRAC se names mean nothing on the grid, but their hosts do mean.
    elif elementType == 'StorageElement':
      # We need to distinguish if it's tape or disk
      if getStorageElementOptions( elementName )['Value']['TapeSE']:
        gocdbServiceType = "srm.nearline"
      elif getStorageElementOptions( elementName )['Value']['DiskSE']:
        gocdbServiceType = "srm"

      seHost = CSHelpers.getSEHost( elementName )
      if not seHost:
        return S_ERROR( 'No seHost for %s' % elementName )
      elementName = seHost

    return S_OK( ( element, elementName, hours, gocdbServiceType ) )

  def doNew( self, masterParams = None ):
    '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

    if masterParams is not None:
      element, elementNames = masterParams
      hours = None
      elementName = None
      gocdbServiceType = None
    else:
      params = self._prepareCommand()
      if not params[ 'OK' ]:
        return params
      element, elementName, hours, gocdbServiceType = params[ 'Value' ]
      elementNames = [ elementName ]

    startDate = datetime.utcnow() - timedelta( days = 14 )

    try:
      results = self.gClient.getStatus( element, elementName, startDate, 120 )
    except urllib2.URLError:
      try:
        #Let's give it a second chance..
        results = self.gClient.getStatus( element, elementName, startDate, 120 )
      except urllib2.URLError, e:
        return S_ERROR( e )

    if not results[ 'OK' ]:
      return results
    results = results[ 'Value' ]

    if results is None:
      return S_OK( None )

    uniformResult = []

    # Humanize the results into a dictionary, not the most optimal, but readable
    for downtime, downDic in results.items():

      dt = {}
      if gocdbServiceType and downDic[ 'SERVICE_TYPE' ]:
        if  gocdbServiceType.lower() != downDic[ 'SERVICE_TYPE' ].lower():
          continue
      if element == 'Resource':
        dt[ 'Name' ] = downDic[ 'HOSTNAME' ]
      else:
        dt[ 'Name' ] = downDic[ 'SITENAME' ]

      if not dt[ 'Name' ] in elementNames:
        continue

      dt[ 'DowntimeID' ] = downtime
      dt[ 'Element' ] = element
      dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ]
      dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ]
      dt[ 'Severity' ] = downDic[ 'SEVERITY' ]
      dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' )
      dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ]
      try:
        dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ]
      except KeyError:
        # SERVICE_TYPE is not always defined
        pass

      uniformResult.append( dt )

    storeRes = self._storeCommand( uniformResult )
    if not storeRes[ 'OK' ]:
      return storeRes

    # We return only one downtime, if its ongoing at dtDate
    startDate = datetime.utcnow()
    if hours:
      startDate = startDate + timedelta( hours = hours )
    endDate = startDate

    result = None
    dtOutages = []
    dtWarnings = []

    for dt in uniformResult:
      if ( dt[ 'StartDate' ] < str( startDate ) ) and ( dt[ 'EndDate' ] > str( endDate ) ):
        if dt[ 'Severity' ] == 'Outage':
          dtOutages.append( dt )
        else:
          dtWarnings.append( dt )

    #In case many overlapping downtimes have been declared, the first one in
    #severity and then time order will be selected. We want to get the latest one
    #( they are sorted by insertion time )
    if len( dtOutages ) > 0:
      result = dtOutages[-1]
    elif len( dtWarnings ) > 0:
      result = dtWarnings[-1]

    return S_OK( result )
Esempio n. 10
0
class DowntimeCommand(Command):
  '''
    Downtime "master" Command or removed DTs.
  '''

  def __init__(self, args=None, clients=None):

    super(DowntimeCommand, self).__init__(args, clients)

    if 'GOCDBClient' in self.apis:
      self.gClient = self.apis['GOCDBClient']
    else:
      self.gClient = GOCDBClient()

    if 'ResourceManagementClient' in self.apis:
      self.rmClient = self.apis['ResourceManagementClient']
    else:
      self.rmClient = ResourceManagementClient()

  def _storeCommand(self, result):
    '''
      Stores the results of doNew method on the database.
    '''

    for dt in result:
      resQuery = self.rmClient.addOrModifyDowntimeCache(downtimeID=dt['DowntimeID'],
                                                        element=dt['Element'],
                                                        name=dt['Name'],
                                                        startDate=dt['StartDate'],
                                                        endDate=dt['EndDate'],
                                                        severity=dt['Severity'],
                                                        description=dt['Description'],
                                                        link=dt['Link'],
                                                        gOCDBServiceType=dt['gOCDBServiceType'])
    return resQuery

  def _cleanCommand(self, element, elementNames):
    '''
      Clear Cache from expired DT.
    '''

    resQuery = []

    for elementName in elementNames:
      # get the list of all DTs stored in the cache
      result = self.rmClient.selectDowntimeCache(element=element,
                                                 name=elementName)

      if not result['OK']:
        return result

      uniformResult = [dict(zip(result['Columns'], res)) for res in result['Value']]

      currentDate = datetime.utcnow()

      if not uniformResult:
        continue

      # get the list of all ongoing DTs from GocDB
      gDTLinkList = self.gClient.getCurrentDTLinkList()
      if not gDTLinkList['OK']:
        return gDTLinkList

      for dt in uniformResult:
        # if DT expired or DT not in the list of current DTs, then we remove it from the cache
        if dt['EndDate'] < currentDate or dt['Link'] not in gDTLinkList['Value']:
          result = self.rmClient.deleteDowntimeCache(downtimeID=dt['DowntimeID'])
          resQuery.append(result)

    return S_OK(resQuery)

  def _prepareCommand(self):
    '''
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    '''

    if 'name' not in self.args:
      return S_ERROR('"name" not found in self.args')
    elementName = self.args['name']

    if 'element' not in self.args:
      return S_ERROR('"element" not found in self.args')
    element = self.args['element']

    if 'elementType' not in self.args:
      return S_ERROR('"elementType" not found in self.args')
    elementType = self.args['elementType']

    if element not in ['Site', 'Resource']:
      return S_ERROR('element is neither Site nor Resource')

    hours = None
    if 'hours' in self.args:
      hours = self.args['hours']

    gOCDBServiceType = None

    # Transform DIRAC site names into GOCDB topics
    if element == 'Site':

      gocSite = getGOCSiteName(elementName)
      if not gocSite['OK']:  # The site is most probably is not a grid site - not an issue, of course
        pass  # so, elementName remains unchanged
      else:
        elementName = gocSite['Value']

    # The DIRAC se names mean nothing on the grid, but their hosts do mean.
    elif elementType == 'StorageElement':
      # We need to distinguish if it's tape or disk
      try:
        seOptions = StorageElement(elementName).options
      except AttributeError:  # Sometimes the SE can't be instantiated properly
        self.log.error(
            "Failure instantiating StorageElement object for %s" % elementName)
        return S_ERROR("Failure instantiating StorageElement")
      if 'SEType' in seOptions:
        # Type should follow the convention TXDY
        seType = seOptions['SEType']
        diskSE = re.search('D[1-9]', seType) != None
        tapeSE = re.search('T[1-9]', seType) != None
        if tapeSE:
          gOCDBServiceType = "srm.nearline"
        elif diskSE:
          gOCDBServiceType = "srm"

      seHost = CSHelpers.getSEHost(elementName)
      if not seHost['OK']:
        return seHost
      seHost = seHost['Value']

      if not seHost:
        return S_ERROR('No seHost for %s' % elementName)
      elementName = seHost

    elif elementType in ['FTS', 'FTS3']:
      gOCDBServiceType = 'FTS'
      # WARNING: this method presupposes that the server is an FTS3 type
      gocSite = getGOCFTSName(elementName)
      if not gocSite['OK']:
        self.log.warn("%s not in Resources/FTSEndpoints/FTS3 ?" % elementName)
      else:
        elementName = gocSite['Value']

    return S_OK((element, elementName, hours, gOCDBServiceType))

  def doNew(self, masterParams=None):
    '''
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    '''

    if masterParams is not None:
      element, elementNames = masterParams
      hours = 120
      elementName = None
      gOCDBServiceType = None

    else:
      params = self._prepareCommand()
      if not params['OK']:
        return params
      element, elementName, hours, gOCDBServiceType = params['Value']
      elementNames = [elementName]

    # WARNING: checking all the DT that are ongoing or starting in given <hours> from now
    try:
      results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours)
    except urllib2.URLError:
      try:
        # Let's give it a second chance..
        results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours)
      except urllib2.URLError as e:
        return S_ERROR(e)

    if not results['OK']:
      return results
    results = results['Value']

    if results is None:  # no downtimes found
      return S_OK(None)

    # cleaning the Cache
    cleanRes = self._cleanCommand(element, elementNames)
    if not cleanRes['OK']:
      return cleanRes

    uniformResult = []

    # Humanize the results into a dictionary, not the most optimal, but readable
    for downtime, downDic in results.iteritems():

      dt = {}

      dt['Name'] = downDic.get('HOSTNAME', downDic.get('SITENAME'))
      if not dt['Name']:
        return S_ERROR("SITENAME and HOSTNAME are missing from downtime dictionary")

      dt['gOCDBServiceType'] = downDic.get('SERVICE_TYPE')

      if dt['gOCDBServiceType'] and gOCDBServiceType:
        if gOCDBServiceType.lower() != downDic['SERVICE_TYPE'].lower():
          return S_ERROR("SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gOCDBServiceType,
                                                                                          downDic['SERVICE_TYPE'],
                                                                                          dt['Name']))

      dt['DowntimeID'] = downtime
      dt['Element'] = element
      dt['StartDate'] = downDic['FORMATED_START_DATE']
      dt['EndDate'] = downDic['FORMATED_END_DATE']
      dt['Severity'] = downDic['SEVERITY']
      dt['Description'] = downDic['DESCRIPTION'].replace('\'', '')
      dt['Link'] = downDic['GOCDB_PORTAL_URL']

      uniformResult.append(dt)

    storeRes = self._storeCommand(uniformResult)
    if not storeRes['OK']:
      return storeRes

    return S_OK()

  def doCache(self):
    '''
      Method that reads the cache table and tries to read from it. It will
      return a list with one dictionary describing the DT if there are results.
    '''

    params = self._prepareCommand()
    if not params['OK']:
      return params
    element, elementName, hours, gOCDBServiceType = params['Value']

    result = self.rmClient.selectDowntimeCache(element=element, name=elementName,
                                               gOCDBServiceType=gOCDBServiceType)

    if not result['OK']:
      return result

    uniformResult = [dict(zip(result['Columns'], res)) for res in result['Value']]

    #'targetDate' can be either now or some 'hours' later in the future
    targetDate = datetime.utcnow()

    # dtOverlapping is a buffer to assure only one dt is returned
    # when there are overlapping outage/warning dt for same element
    # on top of the buffer we put the most recent outages
    # while at the bottom the most recent warnings,
    # assumption: uniformResult list is already ordered by resource/site name, severity, startdate
    dtOverlapping = []

    if hours is not None:
      # IN THE FUTURE
      targetDate = targetDate + timedelta(hours=hours)
      # sorting by 'StartDate' b/c if we look for DTs in the future
      # then we are interested in the earliest DTs
      uniformResult.sort(key=itemgetter('Name', 'Severity', 'StartDate'))

      for dt in uniformResult:
        if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate):
          # the list is already ordered in a way that outages come first over warnings
          # and the earliest outages are on top of other outages and warnings
          # while the earliest warnings are on top of the other warnings
          # so what ever comes first in the list is also what we are looking for
          dtOverlapping = [dt]
          break
    else:
      # IN THE PRESENT
      # sorting by 'EndDate' b/c if we look for DTs in the present
      # then we are interested in those DTs that last longer
      uniformResult.sort(key=itemgetter('Name', 'Severity', 'EndDate'))

      for dt in uniformResult:
        if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate):
          # if outage, we put it on top of the overlapping buffer
          # i.e. the latest ending outage is on top
          if dt['Severity'].upper() == 'OUTAGE':
            dtOverlapping = [dt] + dtOverlapping
          # if warning, we put it at the bottom of the overlapping buffer
          # i.e. the latest ending warning is at the bottom
          elif dt['Severity'].upper() == 'WARNING':
            dtOverlapping.append(dt)

    result = None
    if dtOverlapping:
      dtTop = dtOverlapping[0]
      dtBottom = dtOverlapping[-1]
      if dtTop['Severity'].upper() == 'OUTAGE':
        result = dtTop
      else:
        result = dtBottom

    return S_OK(result)

  def doMaster(self):
    ''' Master method, which looks little bit spaghetti code, sorry !
        - It gets all sites and transforms them into gocSites.
        - It gets all the storage elements and transforms them into their hosts
        - It gets the the CEs (FTS and file catalogs will come).
    '''

    gocSites = CSHelpers.getGOCSites()
    if not gocSites['OK']:
      return gocSites
    gocSites = gocSites['Value']

    sesHosts = CSHelpers.getStorageElementsHosts()
    if not sesHosts['OK']:
      return sesHosts
    sesHosts = sesHosts['Value']

    resources = sesHosts

    ftsServer = getFTS3Servers()
    if ftsServer['OK']:
      resources.extend(ftsServer['Value'])

    # TODO: file catalogs need also to use their hosts

    #fc = CSHelpers.getFileCatalogs()
    # if fc[ 'OK' ]:
    #  resources = resources + fc[ 'Value' ]

    ce = CSHelpers.getComputingElements()
    if ce['OK']:
      resources.extend(ce['Value'])

    self.log.verbose('Processing Sites: %s' % ', '.join(gocSites))

    siteRes = self.doNew(('Site', gocSites))
    if not siteRes['OK']:
      self.metrics['failed'].append(siteRes['Message'])

    self.log.verbose('Processing Resources: %s' % ', '.join(resources))

    resourceRes = self.doNew(('Resource', resources))
    if not resourceRes['OK']:
      self.metrics['failed'].append(resourceRes['Message'])

    return S_OK(self.metrics)
Esempio n. 11
0
class DowntimeCommand(Command):
    """
    Downtime "master" Command or removed DTs.
    """
    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if "GOCDBClient" in self.apis:
            self.gClient = self.apis["GOCDBClient"]
        else:
            self.gClient = GOCDBClient()

        if "ResourceManagementClient" in self.apis:
            self.rmClient = self.apis["ResourceManagementClient"]
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        """
        Stores the results of doNew method on the database.
        """

        for dt in result:
            resQuery = self.rmClient.addOrModifyDowntimeCache(
                downtimeID=dt["DowntimeID"],
                element=dt["Element"],
                name=dt["Name"],
                startDate=dt["StartDate"],
                endDate=dt["EndDate"],
                severity=dt["Severity"],
                description=dt["Description"],
                link=dt["Link"],
                gOCDBServiceType=dt["gOCDBServiceType"],
            )
        return resQuery

    def _cleanCommand(self, element, elementNames):
        """
        Clear Cache from expired DT.
        """

        resQuery = []

        for elementName in elementNames:
            # get the list of all DTs stored in the cache
            result = self.rmClient.selectDowntimeCache(element=element,
                                                       name=elementName)

            if not result["OK"]:
                return result

            uniformResult = [
                dict(zip(result["Columns"], res)) for res in result["Value"]
            ]

            currentDate = datetime.utcnow()

            if not uniformResult:
                continue

            # get the list of all ongoing DTs from GocDB
            gDTLinkList = self.gClient.getCurrentDTLinkList()
            if not gDTLinkList["OK"]:
                return gDTLinkList

            for dt in uniformResult:
                # if DT expired or DT not in the list of current DTs, then we remove it from the cache
                if dt["EndDate"] < currentDate or dt[
                        "Link"] not in gDTLinkList["Value"]:
                    result = self.rmClient.deleteDowntimeCache(
                        downtimeID=dt["DowntimeID"])
                    resQuery.append(result)

        return S_OK(resQuery)

    def _prepareCommand(self):
        """
        DowntimeCommand requires four arguments:
        - name : <str>
        - element : Site / Resource
        - elementType: <str>

        If the elements are Site(s), we need to get their GOCDB names. They may
        not have, so we ignore them if they do not have.
        """

        if "name" not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args["name"]

        if "element" not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args["element"]

        if "elementType" not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args["elementType"]

        if element not in ["Site", "Resource"]:
            return S_ERROR("element is neither Site nor Resource")

        hours = None
        if "hours" in self.args:
            hours = self.args["hours"]

        gOCDBServiceType = None

        # Transform DIRAC site names into GOCDB topics
        if element == "Site":

            gocSite = getGOCSiteName(elementName)
            if not gocSite[
                    "OK"]:  # The site is most probably is not a grid site - not an issue, of course
                pass  # so, elementName remains unchanged
            else:
                elementName = gocSite["Value"]

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == "StorageElement":
            # for SRM and SRM only, we need to distinguish if it's tape or disk
            # if it's not SRM, then gOCDBServiceType will be None (and we'll use them all)
            try:
                se = StorageElement(elementName)
                seOptions = se.options
                seProtocols = set(se.localAccessProtocolList) | set(
                    se.localWriteProtocolList)
            except AttributeError:  # Sometimes the SE can't be instantiated properly
                self.log.error("Failure instantiating StorageElement object",
                               elementName)
                return S_ERROR("Failure instantiating StorageElement")
            if "SEType" in seOptions and "srm" in seProtocols:
                # Type should follow the convention TXDY
                seType = seOptions["SEType"]
                diskSE = re.search("D[1-9]", seType) is not None
                tapeSE = re.search("T[1-9]", seType) is not None
                if tapeSE:
                    gOCDBServiceType = "srm.nearline"
                elif diskSE:
                    gOCDBServiceType = "srm"

            res = getSEHosts(elementName)
            if not res["OK"]:
                return res
            seHosts = res["Value"]

            if not seHosts:
                return S_ERROR("No seHost(s) for %s" % elementName)
            elementName = seHosts  # in this case it will return a list, because there might be more than one host only

        elif elementType in ["FTS", "FTS3"]:
            gOCDBServiceType = "FTS"
            # WARNING: this method presupposes that the server is an FTS3 type
            gocSite = getGOCFTSName(elementName)
            if not gocSite["OK"]:
                self.log.warn("FTS not in Resources/FTSEndpoints/FTS3 ?",
                              elementName)
            else:
                elementName = gocSite["Value"]

        elif elementType == "ComputingElement":
            res = getCESiteMapping(elementName)
            if not res["OK"]:
                return res
            siteName = res["Value"][elementName]
            ceType = gConfig.getValue(
                cfgPath("Resources", "Sites",
                        siteName.split(".")[0], siteName, "CEs", elementName,
                        "CEType"))
            if ceType == "HTCondorCE":
                gOCDBServiceType = "org.opensciencegrid.htcondorce"
            elif ceType == "ARC":
                gOCDBServiceType = "ARC-CE"

        return S_OK((element, elementName, hours, gOCDBServiceType))

    def doNew(self, masterParams=None):
        """
        Gets the parameters to run, either from the master method or from its
        own arguments.

        For every elementName, unless it is given a list, in which case it contacts
        the gocdb client. The server is not very stable, so in case of failure tries
        a second time.

        If there are downtimes, are recorded and then returned.
        """

        if masterParams is not None:
            element, elementNames = masterParams
            hours = 120
            elementName = None
            gOCDBServiceType = None

        else:
            params = self._prepareCommand()
            if not params["OK"]:
                return params
            element, elementName, hours, gOCDBServiceType = params["Value"]
            if not isinstance(elementName, list):
                elementNames = [elementName]
            else:
                elementNames = elementName

        # WARNING: checking all the DT that are ongoing or starting in given <hours> from now
        try:
            results = self.gClient.getStatus(element,
                                             name=elementNames,
                                             startingInHours=hours)
        except URLError:
            try:
                # Let's give it a second chance..
                results = self.gClient.getStatus(element,
                                                 name=elementNames,
                                                 startingInHours=hours)
            except URLError as e:
                return S_ERROR(e)

        if not results["OK"]:
            return results
        results = results["Value"]

        if results is None:  # no downtimes found
            return S_OK(None)

        # cleaning the Cache
        if elementNames:
            cleanRes = self._cleanCommand(element, elementNames)
            if not cleanRes["OK"]:
                return cleanRes

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():  # can be an iterator

            dt = {}

            dt["Name"] = downDic.get(
                "URL", downDic.get("HOSTNAME", downDic.get("SITENAME")))
            if not dt["Name"]:
                return S_ERROR(
                    "URL, SITENAME and HOSTNAME are missing from downtime dictionary"
                )

            dt["gOCDBServiceType"] = downDic.get("SERVICE_TYPE")

            if dt["gOCDBServiceType"] and gOCDBServiceType:
                if gOCDBServiceType.lower() != downDic["SERVICE_TYPE"].lower():
                    self.log.warn(
                        "SERVICE_TYPE mismatch",
                        "between GOCDB (%s) and CS (%s) for %s" %
                        (downDic["SERVICE_TYPE"], gOCDBServiceType,
                         dt["Name"]),
                    )

            dt["DowntimeID"] = downtime
            dt["Element"] = element
            dt["StartDate"] = downDic["FORMATED_START_DATE"]
            dt["EndDate"] = downDic["FORMATED_END_DATE"]
            dt["Severity"] = downDic["SEVERITY"]
            dt["Description"] = downDic["DESCRIPTION"].replace("'", "")
            dt["Link"] = downDic["GOCDB_PORTAL_URL"]

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes["OK"]:
            return storeRes

        return S_OK()

    def doCache(self):
        """
        Method that reads the cache table and tries to read from it. It will
        return a list with one dictionary describing the DT if there are results.
        """

        params = self._prepareCommand()
        if not params["OK"]:
            return params
        element, elementName, hours, gOCDBServiceType = params["Value"]

        result = self.rmClient.selectDowntimeCache(
            element=element,
            name=elementName,
            gOCDBServiceType=gOCDBServiceType)
        if not result["OK"]:
            return result
        if not result["Value"]:
            return S_OK()

        uniformResult = [
            dict(zip(result["Columns"], res)) for res in result["Value"]
        ]

        # 'targetDate' can be either now or in some 'hours' from now
        targetDate = datetime.utcnow()

        # dtOverlapping is a buffer to assure only one dt is returned
        # when there are overlapping outage/warning dt for same element
        # on top of the buffer we put the most recent outages
        # while at the bottom the most recent warnings,
        # assumption: uniformResult list is already ordered by resource/site name, severity, startdate
        dtOverlapping = []

        if hours is not None:
            # IN THE FUTURE
            targetDate = targetDate + timedelta(hours=hours)
            # sorting by 'StartDate' b/c if we look for DTs in the future
            # then we are interested in the earliest DTs
            uniformResult.sort(key=itemgetter("Name", "Severity", "StartDate"))

            for dt in uniformResult:
                if (dt["StartDate"] < targetDate) and (dt["EndDate"] >
                                                       targetDate):
                    # the list is already ordered in a way that outages come first over warnings
                    # and the earliest outages are on top of other outages and warnings
                    # while the earliest warnings are on top of the other warnings
                    # so what ever comes first in the list is also what we are looking for
                    dtOverlapping = [dt]
                    break
        else:
            # IN THE PRESENT
            # sorting by 'EndDate' b/c if we look for DTs in the present
            # then we are interested in those DTs that last longer
            uniformResult.sort(key=itemgetter("Name", "Severity", "EndDate"))

            for dt in uniformResult:
                if (dt["StartDate"] < targetDate) and (dt["EndDate"] >
                                                       targetDate):
                    # if outage, we put it on top of the overlapping buffer
                    # i.e. the latest ending outage is on top
                    if dt["Severity"].upper() == "OUTAGE":
                        dtOverlapping = [dt] + dtOverlapping
                    # if warning, we put it at the bottom of the overlapping buffer
                    # i.e. the latest ending warning is at the bottom
                    elif dt["Severity"].upper() == "WARNING":
                        dtOverlapping.append(dt)

        if not dtOverlapping:
            return S_OK()

        dtTop = dtOverlapping[0]
        if dtTop["Severity"].upper() == "OUTAGE":
            return S_OK(dtTop)
        else:
            return S_OK(dtOverlapping[-1])

    def doMaster(self):
        """Master method, which looks little bit spaghetti code, sorry !
        - It gets all sites and transforms them into gocSites.
        - It gets all the storage elements and transforms them into their hosts
        - It gets the the CEs (FTS and file catalogs will come).
        """

        gocSites = getGOCSites()
        if not gocSites["OK"]:
            return gocSites
        gocSites = gocSites["Value"]

        sesHosts = getStorageElementsHosts()
        if not sesHosts["OK"]:
            return sesHosts
        sesHosts = sesHosts["Value"]

        resources = sesHosts if sesHosts else []

        ftsServer = getFTS3Servers(hostOnly=True)
        if ftsServer["OK"] and ftsServer["Value"]:
            resources.extend(ftsServer["Value"])

        # TODO: file catalogs need also to use their hosts

        # fc = CSHelpers.getFileCatalogs()
        # if fc[ 'OK' ]:
        #  resources = resources + fc[ 'Value' ]

        res = getCESiteMapping()
        if res["OK"] and res["Value"]:
            resources.extend(list(res["Value"]))

        self.log.verbose("Processing Sites",
                         ", ".join(gocSites if gocSites else ["NONE"]))

        siteRes = self.doNew(("Site", gocSites))
        if not siteRes["OK"]:
            self.metrics["failed"].append(siteRes["Message"])

        self.log.verbose("Processing Resources",
                         ", ".join(resources if resources else ["NONE"]))

        resourceRes = self.doNew(("Resource", resources))
        if not resourceRes["OK"]:
            self.metrics["failed"].append(resourceRes["Message"])

        return S_OK(self.metrics)
Esempio n. 12
0
class DowntimeCommand(Command):
    """
    Downtime "master" Command or removed DTs.
  """

    def __init__(self, args=None, clients=None):

        super(DowntimeCommand, self).__init__(args, clients)

        if "GOCDBClient" in self.apis:
            self.gClient = self.apis["GOCDBClient"]
        else:
            self.gClient = GOCDBClient()

        if "ResourceManagementClient" in self.apis:
            self.rmClient = self.apis["ResourceManagementClient"]
        else:
            self.rmClient = ResourceManagementClient()

    def _storeCommand(self, result):
        """
      Stores the results of doNew method on the database.
    """

        for dt in result:
            resQuery = self.rmClient.addOrModifyDowntimeCache(
                downtimeID=dt["DowntimeID"],
                element=dt["Element"],
                name=dt["Name"],
                startDate=dt["StartDate"],
                endDate=dt["EndDate"],
                severity=dt["Severity"],
                description=dt["Description"],
                link=dt["Link"],
                gocdbServiceType=dt["GOCDBServiceType"],
            )
        return resQuery

    def _cleanCommand(self, element, elementNames):
        """
      Clear Cache from expired DT.
    """

        resQuery = []

        for elementName in elementNames:
            # get the list of all DTs stored in the cache
            result = self.rmClient.selectDowntimeCache(element=element, name=elementName)

            if not result["OK"]:
                return result

            uniformResult = [dict(zip(result["Columns"], res)) for res in result["Value"]]

            currentDate = datetime.utcnow()

            if len(uniformResult) == 0:
                continue

            # get the list of all ongoing DTs from GocDB
            gDTLinkList = self.gClient.getCurrentDTLinkList()
            if not gDTLinkList["OK"]:
                return gDTLinkList

            for dt in uniformResult:
                # if DT expired or DT not in the list of current DTs, then we remove it from the cache
                if dt["EndDate"] < currentDate or dt["Link"] not in gDTLinkList["Value"]:
                    result = self.rmClient.deleteDowntimeCache(downtimeID=dt["DowntimeID"])
                    resQuery.append(result)

        return S_OK(resQuery)

    def _prepareCommand(self):
        """
      DowntimeCommand requires four arguments:
      - name : <str>
      - element : Site / Resource
      - elementType: <str>

      If the elements are Site(s), we need to get their GOCDB names. They may
      not have, so we ignore them if they do not have.
    """

        if "name" not in self.args:
            return S_ERROR('"name" not found in self.args')
        elementName = self.args["name"]

        if "element" not in self.args:
            return S_ERROR('"element" not found in self.args')
        element = self.args["element"]

        if "elementType" not in self.args:
            return S_ERROR('"elementType" not found in self.args')
        elementType = self.args["elementType"]

        if not element in ["Site", "Resource"]:
            return S_ERROR("element is neither Site nor Resource")

        hours = None
        if "hours" in self.args:
            hours = self.args["hours"]

        gocdbServiceType = None

        # Transform DIRAC site names into GOCDB topics
        if element == "Site":

            gocSite = getGOCSiteName(elementName)
            if not gocSite["OK"]:
                return gocSite
            elementName = gocSite["Value"]

        # The DIRAC se names mean nothing on the grid, but their hosts do mean.
        elif elementType == "StorageElement":
            # We need to distinguish if it's tape or disk
            seOptions = getStorageElementOptions(elementName)
            if not seOptions["OK"]:
                return seOptions
            if seOptions["Value"].get("TapeSE"):
                gocdbServiceType = "srm.nearline"
            elif seOptions["Value"].get("DiskSE"):
                gocdbServiceType = "srm"

            seHost = CSHelpers.getSEHost(elementName)
            if not seHost["OK"]:
                return seHost
            seHost = seHost["Value"]

            if not seHost:
                return S_ERROR("No seHost for %s" % elementName)
            elementName = seHost

        elif elementType in ["FTS", "FTS3"]:
            gocdbServiceType = "FTS"
            try:
                # WARNING: this method presupposes that the server is an FTS3 type
                elementName = getGOCFTSName(elementName)
            except:
                return S_ERROR("No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)")

        return S_OK((element, elementName, hours, gocdbServiceType))

    def doNew(self, masterParams=None):
        """
      Gets the parameters to run, either from the master method or from its
      own arguments.

      For every elementName, unless it is given a list, in which case it contacts
      the gocdb client. The server is not very stable, so in case of failure tries
      a second time.

      If there are downtimes, are recorded and then returned.
    """

        if masterParams is not None:
            element, elementNames = masterParams
            hours = 120
            elementName = None
            gocdbServiceType = None

        else:
            params = self._prepareCommand()
            if not params["OK"]:
                return params
            element, elementName, hours, gocdbServiceType = params["Value"]
            elementNames = [elementName]

        # WARNING: checking all the DT that are ongoing or starting in given <hours> from now
        try:
            results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours)
        except urllib2.URLError:
            try:
                # Let's give it a second chance..
                results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours)
            except urllib2.URLError, e:
                return S_ERROR(e)

        if not results["OK"]:
            return results
        results = results["Value"]

        if results is None:
            return S_OK(None)

        # cleaning the Cache
        cleanRes = self._cleanCommand(element, elementNames)
        if not cleanRes["OK"]:
            return cleanRes

        uniformResult = []

        # Humanize the results into a dictionary, not the most optimal, but readable
        for downtime, downDic in results.items():

            dt = {}

            if "HOSTNAME" in downDic.keys():
                dt["Name"] = downDic["HOSTNAME"]
            elif "SITENAME" in downDic.keys():
                dt["Name"] = downDic["SITENAME"]
            else:
                return S_ERROR("SITENAME or HOSTNAME are missing")

            if "SERVICE_TYPE" in downDic.keys():
                dt["GOCDBServiceType"] = downDic["SERVICE_TYPE"]
                if gocdbServiceType:
                    gocdbST = gocdbServiceType.lower()
                    csST = downDic["SERVICE_TYPE"].lower()
                    if gocdbST != csST:
                        return S_ERROR(
                            "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt["Name"])
                        )
            else:
                # WARNING: do we want None as default value?
                dt["GOCDBServiceType"] = None

            dt["DowntimeID"] = downtime
            dt["Element"] = element
            dt["StartDate"] = downDic["FORMATED_START_DATE"]
            dt["EndDate"] = downDic["FORMATED_END_DATE"]
            dt["Severity"] = downDic["SEVERITY"]
            dt["Description"] = downDic["DESCRIPTION"].replace("'", "")
            dt["Link"] = downDic["GOCDB_PORTAL_URL"]

            uniformResult.append(dt)

        storeRes = self._storeCommand(uniformResult)
        if not storeRes["OK"]:
            return storeRes

        return S_OK()