Ejemplo n.º 1
0
  def __discoverURL(self):
    """ Calculate the final URL. It is called at initialization and in connect in case of issue

        It sets:
          * self.serviceURL: the url (dips) selected as target using __findServiceURL
          * self.__URLTuple: a split of serviceURL obtained by Network.splitURL
          * self._serviceName: the last part of URLTuple (typically System/Component)
    """
    # Calculate final URL
    try:
      result = self.__findServiceURL()
    except Exception as e:
      return S_ERROR(repr(e))
    if not result['OK']:
      return result
    self.serviceURL = result['Value']
    retVal = Network.splitURL(self.serviceURL)
    if not retVal['OK']:
      return retVal
    self.__URLTuple = retVal['Value']
    self._serviceName = self.__URLTuple[-1]
    res = gConfig.getOptionsDict("/DIRAC/ConnConf/%s:%s" % self.__URLTuple[1:3])
    if res['OK']:
      opts = res['Value']
      for k in opts:
        if k not in self.kwargs:
          self.kwargs[k] = opts[k]
    return S_OK()
Ejemplo n.º 2
0
 def __reduceComponentList( self, componentList ):
   """
   Only keep the most restrictive components
   """
   for i in range( len( componentList ) ):
     component = componentList[i]
     for j in range( len( componentList ) ):
       if i == j or componentList[j] == False :
         continue
       potentiallyMoreRestrictiveComponent = componentList[j]
       match = True
       for key in component:
         if key not in potentiallyMoreRestrictiveComponent:
           match = False
           break
         if key == 'Host':
           result = Network.checkHostsMatch( component[key],
                                             potentiallyMoreRestrictiveComponent[key] )
           if not result[ 'OK' ] or not result[ 'Value' ]:
             match = False
             break
         else:
           if component[key] != potentiallyMoreRestrictiveComponent[key]:
             match = False
             break
       if match:
         componentList[i] = False
         break
   return [ comp for comp in componentList if comp != False ]
Ejemplo n.º 3
0
 def getSocket( self, hostAddress, **kwargs ):
   hostName = hostAddress[0]
   retVal = self.generateClientInfo( hostName, kwargs )
   if not retVal[ 'OK' ]:
     return retVal
   socketInfo = retVal[ 'Value' ]
   retVal = Network.getIPsForHostName( hostName )
   if not retVal[ 'OK' ]:
     return S_ERROR( "Could not resolve %s: %s" % ( hostName, retVal[ 'Message' ] ) )
   ipList = List.randomize( retVal[ 'Value' ] )
   for i in range( 3 ):
     connected = False
     errorsList = []
     for ip in ipList :
       ipAddress = ( ip, hostAddress[1] )
       retVal = self.__connect( socketInfo, ipAddress )
       if retVal[ 'OK' ]:
         sslSocket = retVal[ 'Value' ]
         connected = True
         break
       errorsList.append( "%s: %s" % ( ipAddress, retVal[ 'Message' ] ) )
     if not connected:
       return S_ERROR( "Could not connect to %s: %s" % ( hostAddress, "," .join( [ e for e in errorsList ] ) ) )
     retVal = socketInfo.doClientHandshake()
     if retVal[ 'OK' ]:
       #Everything went ok. Don't need to retry
       break
   #Did the auth or the connection fail?
   if not retVal['OK']:
     return retVal
   if 'enableSessions' in kwargs and kwargs[ 'enableSessions' ]:
     sessionId = hash( hostAddress )
     gSessionManager.set( sessionId, sslSocket.get_session() )
   return S_OK( socketInfo )
Ejemplo n.º 4
0
 def initialize( self ):
   self.logger = gLogger.getSubLogger( "Monitoring" )
   self.logger.debug( "Initializing Monitoring Client" )
   self.sourceDict[ 'setup' ] = gConfig.getValue( "/DIRAC/Setup" )
   self.sourceDict[ 'site' ] = DIRAC.siteName()
   if self.sourceDict[ 'componentType' ] == self.COMPONENT_SERVICE:
     self.cfgSection = PathFinder.getSystemSection( self.sourceDict[ 'componentName' ] )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_AGENT:
     self.cfgSection = PathFinder.getAgentSection( self.sourceDict[ 'componentName' ] )
     self.setComponentLocation( Network.getFQDN() )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_WEB:
     self.cfgSection = "/WebApp"
     self.setComponentLocation( 'http://%s' % Network.getFQDN() )
     self.setComponentName( 'WebApp' )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_SCRIPT:
     self.cfgSection = "/Script"
   else:
     raise Exception( "Component type has not been defined" )
   gMonitoringFlusher.registerMonitoringClient( self )
   # ExitCallback.registerExitCallback( self.forceFlush )
   self.__initialized = True
    def initialize(self):

        self.am_disableMonitoring()
        #Init vars
        self.runningPod = gConfig.getValue('/LocalSite/RunningPod')
        self.log.info("Running pod name of the image is %s" % self.runningPod)
        self.vmID = gConfig.getValue('/LocalSite/VMID')

        self.__loadHistory = []
        self.vmMinWorkingLoad = None
        self.vmLoadAvgTimespan = None
        self.vmJobWrappersLocation = None
        self.haltPeriod = None
        self.haltBeforeMargin = None
        self.heartBeatPeriod = None
        self.am_setOption("MaxCycles", 0)
        self.am_setOption("PollingTime", 60)

        #Discover net address
        netData = Network.discoverInterfaces()
        for iface in sorted(netData):
            if iface.find("eth") == 0:
                self.ipAddress = netData[iface]['ip']
                break
        self.log.info("IP Address is %s" % self.ipAddress)

        #getting the stop policy
        self.op = Operations.Operations()
        self.vmStopPolicy = self.op.getValue("Cloud/%s/VMStopPolicy",
                                             'elastic')
        self.log.info("vmStopPolicy is %s" % self.vmStopPolicy)

        #Declare instance running
        self.uniqueID = ''
        result = virtualMachineDB.getUniqueIDByName(self.vmID)
        if result['OK']:
            self.uniqueID = result['Value']
        result = self.__declareInstanceRunning()
        if not result['OK']:
            self.log.error("Could not declare instance running",
                           result['Message'])
            self.__haltInstance()
            return S_ERROR("Halting!")

        self.__instanceInfo = result['Value']

        #Get the cs config
        result = self.__getCSConfig()
        if not result['OK']:
            return result

        return S_OK()
Ejemplo n.º 6
0
 def initialize(self):
     self.logger = gLogger.getSubLogger("Monitoring")
     self.logger.debug("Initializing Monitoring Client")
     self.sourceDict['setup'] = gConfig.getValue("/DIRAC/Setup")
     self.sourceDict['site'] = DIRAC.siteName()
     if self.sourceDict['componentType'] == self.COMPONENT_SERVICE:
         self.cfgSection = PathFinder.getSystemSection(
             self.sourceDict['componentName'])
     elif self.sourceDict['componentType'] == self.COMPONENT_AGENT:
         self.cfgSection = PathFinder.getAgentSection(
             self.sourceDict['componentName'])
         self.setComponentLocation(Network.getFQDN())
     elif self.sourceDict['componentType'] == self.COMPONENT_WEB:
         self.cfgSection = "/Website"
         self.setComponentLocation('http://%s' % Network.getFQDN())
         self.setComponentName('Web')
     elif self.sourceDict['componentType'] == self.COMPONENT_SCRIPT:
         self.cfgSection = "/Script"
     else:
         raise Exception("Component type has not been defined")
     gMonitoringFlusher.registerMonitoringClient(self)
     # ExitCallback.registerExitCallback( self.forceFlush )
     self.__initialized = True
Ejemplo n.º 7
0
 def __addFoundDefinedComponent(self, compDictList):
     cD = self.walkSet(self.__requiredSet, compDictList[0])
     dbD = self.walkSet(self.__dbSet, compDictList[0])
     now = Time.dateTime()
     unmatched = compDictList
     for dbComp in dbD:
         if "Status" not in dbComp:
             self.__setStatus(dbComp, "OK")
             if dbComp["Type"] == "service":
                 if "Port" not in dbComp:
                     self.__setStatus(dbComp, "Error", "Port is not defined")
                 elif dbComp["Port"] not in [compDict["Port"] for compDict in compDictList if "Port" in compDict]:
                     self.__setStatus(
                         compDictList[-1],
                         "Error",
                         "Port (%s) is different that specified in the CS" % dbComp["Port"],
                     )
             elapsed = now - dbComp["LastHeartbeat"]
             elapsed = elapsed.days * 86400 + elapsed.seconds
             if elapsed > self.__maxSecsSinceHeartbeat:
                 self.__setStatus(
                     dbComp,
                     "Error",
                     "Last heartbeat was received at %s (%s secs ago)" % (dbComp["LastHeartbeat"], elapsed),
                 )
         cD.append(dbComp)
         # See if we have a perfect match
         newUnmatched = []
         for unmatchedComp in unmatched:
             perfectMatch = True
             for field in unmatchedComp:
                 if field in ("Status", "Message"):
                     continue
                 if field not in dbComp:
                     perfectMatch = False
                     continue
                 if field == "Host":
                     result = Network.checkHostsMatch(unmatchedComp[field], dbComp[field])
                     if not result["OK"] or not result["Value"]:
                         perfectMatch = False
                 else:
                     if unmatchedComp[field] != dbComp[field]:
                         perfectMatch = False
             if not perfectMatch:
                 newUnmatched.append(unmatchedComp)
         unmatched = newUnmatched
     for unmatchedComp in unmatched:
         self.__setStatus(unmatchedComp, "Error", "There is no component up with this properties")
         cD.append(unmatchedComp)
Ejemplo n.º 8
0
 def __init__( self, optionsDictionary ):
   threading.Thread.__init__( self )
   self.__interactive = optionsDictionary[ 'Interactive' ]
   self.__sleep = optionsDictionary[ 'SleepTime' ]
   self._messageQueue = Queue.Queue()
   self._Transactions = []
   self._alive = True
   self._site = optionsDictionary[ 'Site' ]
   self._hostname = Network.getFQDN()
   self._logLevels = LogLevels()
   self._negativeLevel = self._logLevels.getLevelValue( 'ERROR' )
   self._positiveLevel = self._logLevels.getLevelValue( 'ALWAYS' )
   self._maxBundledMessages = 20
   self.setDaemon(1)
   self.start()
Ejemplo n.º 9
0
 def initialize(self):
     self.logger = gLogger.getSubLogger("Monitoring")
     self.logger.debug("Initializing Monitoring Client")
     self.sourceDict["setup"] = gConfig.getValue("/DIRAC/Setup")
     self.sourceDict["site"] = DIRAC.siteName()
     if self.sourceDict["componentType"] == self.COMPONENT_SERVICE:
         self.cfgSection = PathFinder.getSystemSection(
             self.sourceDict["componentName"])
     elif self.sourceDict["componentType"] == self.COMPONENT_AGENT:
         self.cfgSection = PathFinder.getAgentSection(
             self.sourceDict["componentName"])
         self.setComponentLocation(Network.getFQDN())
     elif self.sourceDict["componentType"] == self.COMPONENT_WEB:
         self.cfgSection = "/WebApp"
         self.setComponentLocation("http://%s" % Network.getFQDN())
         self.setComponentName("WebApp")
     elif self.sourceDict["componentType"] == self.COMPONENT_SCRIPT:
         self.cfgSection = "/Script"
     elif self.sourceDict["componentType"] == self.COMPONENT_TORNADO:
         self.cfgSection = "/Tornado"
     else:
         raise Exception("Component type has not been defined")
     gMonitoringFlusher.registerMonitoringClient(self)
     self.__initialized = True
Ejemplo n.º 10
0
    def __selectUrl(self, notselect, urls):
        """In case when multiple services are running in the same host, a new url has to be in a different host
    Note: If we do not have different host we will use the selected url...
    """

        url = None
        for i in urls:
            retVal = Network.splitURL(i)
            if retVal['OK']:
                if retVal['Value'][1] != notselect[1]:  # the hots are different
                    url = i
                    break
                else:
                    gLogger.error(retVal['Message'])
        return url
Ejemplo n.º 11
0
  def __selectUrl( self, notselect, urls ):
    """In case when multiple services are running in the same host, a new url has to be in a different host
    Note: If we do not have different host we will use the selected url...
    """

    url = None
    for i in urls:
      retVal = Network.splitURL( i )
      if retVal['OK']:
        if retVal['Value'][1] != notselect[1]:  # the hots are different
          url = i
          break
        else:
          gLogger.error( retVal['Message'] )
    return url
Ejemplo n.º 12
0
 def __init__( self, optionsDictionary ):
   threading.Thread.__init__( self )
   self.__interactive = optionsDictionary[ 'Interactive' ]
   self.__sleep = optionsDictionary[ 'SleepTime' ]
   self._messageQueue = Queue.Queue()
   self._Transactions = []
   self._alive = True
   self._site = optionsDictionary[ 'Site' ]
   self._hostname = Network.getFQDN()
   self._logLevels = LogLevels()
   self._negativeLevel = self._logLevels.getLevelValue( 'ERROR' )
   self._positiveLevel = self._logLevels.getLevelValue( 'ALWAYS' )
   self._maxBundledMessages = 20
   self.setDaemon(1)
   self.start()
Ejemplo n.º 13
0
    def initialize(self):

        self.am_disableMonitoring()
        self.op = Operations.Operations()
        # Init vars
        self.runningPod = gConfig.getValue("/LocalSite/RunningPod")
        self.log.info("Running pod name of the image is %s" % self.runningPod)
        self.vmID = gConfig.getValue("/LocalSite/VMID")

        self.__loadHistory = []
        self.vmLoadAvgTimespan = None
        self.vmJobWrappersLocation = None
        self.haltPeriod = None
        self.haltBeforeMargin = None
        self.heartBeatPeriod = None
        self.am_setOption("MaxCycles", 0)
        self.am_setOption("PollingTime", 60)

        # Discover net address
        self.ipAddress = None
        netData = Network.discoverInterfaces()
        for iface in sorted(netData):
            # Warning! On different clouds interface name may be different(eth, ens, ...)
            if "eth" in iface or "ens" in iface:
                self.ipAddress = netData[iface]["ip"]
                self.log.info("IP Address is %s" % self.ipAddress)
                break

        # Declare instance running
        self.uniqueID = ""
        result = virtualMachineDB.getUniqueIDByName(self.vmID)
        if result["OK"]:
            self.uniqueID = result["Value"]
        result = self.__declareInstanceRunning()
        if not result["OK"]:
            self.log.error("Could not declare instance running",
                           result["Message"])
            self.__haltInstance()
            return S_ERROR("Halting!")

        self.__instanceInfo = result["Value"]

        # Get the cs config
        result = self.__getCSConfig()
        if not result["OK"]:
            return result

        return S_OK()
Ejemplo n.º 14
0
 def __addFoundDefinedComponent( self, compDictList ):
   cD = self.walkSet( self.__requiredSet, compDictList[0] )
   dbD = self.walkSet( self.__dbSet, compDictList[0] )
   now = Time.dateTime()
   unmatched = compDictList
   for dbComp in dbD:
     if 'Status' not in dbComp:
       self.__setStatus( dbComp, 'OK' )
       if dbComp[ 'Type' ] == "service":
         if 'Port' not in dbComp:
           self.__setStatus( dbComp, 'Error', "Port is not defined" )
         elif dbComp[ 'Port' ] not in [ compDict[ 'Port' ] for compDict in compDictList if 'Port' in compDict ]:
           self.__setStatus( compDict, 'Error',
                             "Port (%s) is different that specified in the CS" % dbComp[ 'Port' ] )
       elapsed = now - dbComp[ 'LastHeartbeat' ]
       elapsed = elapsed.days * 86400 + elapsed.seconds
       if elapsed > self.__maxSecsSinceHeartbeat:
         self.__setStatus( dbComp, "Error",
                           "Last heartbeat was received at %s (%s secs ago)" % ( dbComp[ 'LastHeartbeat' ],
                                                                                 elapsed ) )
     cD.append( dbComp )
     #See if we have a perfect match
     newUnmatched = []
     for unmatchedComp in unmatched:
       perfectMatch = True
       for field in unmatchedComp:
         if field in ( 'Status', 'Message' ):
           continue
         if field not in dbComp:
           perfectMatch = False
           continue
         if field == 'Host':
           result = Network.checkHostsMatch( unmatchedComp[ field ], dbComp[ field ] )
           if not result[ 'OK' ] or not result[ 'Value' ]:
             perfectMatch = False
         else:
           if unmatchedComp[ field ] != dbComp[ field ]:
             perfectMatch = False
       if not perfectMatch:
         newUnmatched.append( unmatchedComp )
     unmatched = newUnmatched
   for unmatchedComp in unmatched:
     self.__setStatus( unmatchedComp, "Error", "There is no component up with this properties" )
     cD.append( unmatchedComp )
Ejemplo n.º 15
0
def getGenericVMId():
  fd = open( "/proc/stat" )
  lines = fd.readlines()
  fd.close()
  btime = False
  for line in lines:
    fields = List.fromChar( line, " " )
    if fields[0] == "btime":
      btime = fields[1]
      break
  if not btime:
    return S_ERROR( "Could not find btime in /proc/stat" )
  md5Hash = md5()
  md5Hash.update( btime )
  netData = Network.discoverInterfaces()
  for iface in sorted( netData ):
    if iface == "lo":
      continue
    md5Hash.update( netData[ iface ][ 'mac' ] )
  return S_OK( md5Hash.hexdigest() )
Ejemplo n.º 16
0
    def __selectUrl(self, notselect, urls):
        """In case when multiple services are running in the same host, a new url has to be in a different host
        Note: If we do not have different host we will use the selected url...

        :param notselect: URL that should NOT be selected
        :param list urls: list of potential URLs

        :return: str -- selected URL
        """
        url = None
        for i in urls:
            retVal = Network.splitURL(i)
            if retVal["OK"]:
                if retVal["Value"][1] != notselect[
                        1]:  # the hosts are different
                    url = i
                    break
                else:
                    gLogger.error(retVal["Message"])
        return url
Ejemplo n.º 17
0
    def __selectUrl(self, notselect, urls):
        """In case when multiple services are running in the same host, a new url has to be in a different host
    Note: If we do not have different host we will use the selected url...

    :param notselect: URL that should NOT be selected
    :param urls: list of potential URLs

    :return: selected URL

    WARNING: COPY/PASTE FROM Core/Diset/private/BaseClient
    """
        url = None
        for i in urls:
            retVal = Network.splitURL(i)
            if retVal['OK']:
                if retVal['Value'][1] != notselect[1]:  # the hots are different
                    url = i
                    break
                else:
                    gLogger.error(retVal['Message'])
        return url
Ejemplo n.º 18
0
 def __discoverURL( self ):
   #Calculate final URL
   try:
     result = self.__findServiceURL()
   except Exception as e:
     return S_ERROR( repr( e ) )
   if not result[ 'OK' ]:
     return result
   self.serviceURL = result[ 'Value' ]
   retVal = Network.splitURL( self.serviceURL )
   if not retVal[ 'OK' ]:
     return retVal
   self.__URLTuple = retVal[ 'Value' ]
   self._serviceName = self.__URLTuple[-1]
   res = gConfig.getOptionsDict( "/DIRAC/ConnConf/%s:%s" % self.__URLTuple[1:3] )
   if res[ 'OK' ]:
     opts = res[ 'Value' ]
     for k in opts:
       if k not in self.kwargs:
         self.kwargs[k] = opts[k]
   return S_OK()
Ejemplo n.º 19
0
 def __discoverURL(self):
     #Calculate final URL
     try:
         result = self.__findServiceURL()
     except Exception as e:
         return S_ERROR(repr(e))
     if not result['OK']:
         return result
     self.serviceURL = result['Value']
     retVal = Network.splitURL(self.serviceURL)
     if not retVal['OK']:
         return retVal
     self.__URLTuple = retVal['Value']
     self._serviceName = self.__URLTuple[-1]
     res = gConfig.getOptionsDict("/DIRAC/ConnConf/%s:%s" %
                                  self.__URLTuple[1:3])
     if res['OK']:
         opts = res['Value']
         for k in opts:
             if k not in self.kwargs:
                 self.kwargs[k] = opts[k]
     return S_OK()
Ejemplo n.º 20
0
    def createRMSRecord(self, status, nbObject):
        """
        This method is used to create a record given some parameters for sending it to the ES backend.
        It is used inside DMS/Agent/RequestOperations and this method is designed particularly for file
        type of objects.

        :param status: This can be one of these i.e. Attempted, Failed, or Successful.
        :param nbObject: This is number of objects in question.

        :returns: a dictionary.
        """
        record = {
            "timestamp": int(TimeUtilities.toEpoch()),
            "host": Network.getFQDN(),
            "objectType": "File",
            "operationType": self.operation.Type,
            "status": status,
            "nbObject": nbObject,
            "parentID": self.operation.OperationID,
        }

        return record
Ejemplo n.º 21
0
 def _executeAction(self, trid, proposalTuple, handlerObj):
     try:
         response = handlerObj._rh_executeAction(proposalTuple)
         if self.activityMonitoring and response["OK"]:
             self.activityMonitoringReporter.addRecord({
                 'timestamp':
                 int(Time.toEpoch()),
                 'host':
                 Network.getFQDN(),
                 'componentType':
                 'service',
                 'component':
                 "_".join(self._name.split("/")),
                 'componentLocation':
                 self._cfg.getURL(),
                 'ServiceResponseTime':
                 response["Value"][1]
             })
         return response["Value"][0]
     except Exception as e:
         gLogger.exception("Exception while executing handler action")
         return S_ERROR("Server error while executing action: %s" % str(e))
Ejemplo n.º 22
0
def siteName():
  """
  Determine and return DIRAC name for current site
  """
  global __siteName

  if not __siteName:
  
    #FIXME: does this ever happen that we have to use the defaultValue if getValue ???
    from DIRAC.Core.Utilities import Network
    # Some Defaults if not present in the configuration
    fqdn = Network.getFQDN()
    if len( fqdn.split( '.' ) ) > 2 :
    # Use the last component of the FQDN as country code if there are more than 2 components
      _siteName = 'DIRAC.Client.%s' % fqdn.split( '.' )[-1]
    else:
      # else use local as country code
      _siteName = 'DIRAC.Client.local'
    
    __siteName = gConfig.getValue( '/LocalSite/Site', _siteName )

  return __siteName
Ejemplo n.º 23
0
 def _executeAction(self, trid, proposalTuple, handlerObj):
     try:
         response = handlerObj._rh_executeAction(proposalTuple)
         if not response["OK"]:
             return response
         if self.activityMonitoring:
             self.activityMonitoringReporter.addRecord({
                 "timestamp":
                 int(TimeUtilities.toEpoch()),
                 "Host":
                 Network.getFQDN(),
                 "ServiceName":
                 "_".join(self._name.split("/")),
                 "Location":
                 self._cfg.getURL(),
                 "ResponseTime":
                 response["Value"][1],
             })
         return response["Value"][0]
     except Exception as e:
         gLogger.exception("Exception while executing handler action")
         return S_ERROR("Server error while executing action: %s" % str(e))
Ejemplo n.º 24
0
    def getSocket(self, hostAddress, **kwargs):
        hostName = hostAddress[0]
        retVal = self.generateClientInfo(hostName, kwargs)
        if not retVal['OK']:
            return retVal
        socketInfo = retVal['Value']
        retVal = Network.getIPsForHostName(hostName)
        if not retVal['OK']:
            return S_ERROR("Could not resolve %s: %s" %
                           (hostName, retVal['Message']))
        ipList = retVal[
            'Value']  #In that case the first ip always  the correct one.

        for _ in xrange(1):  #TODO: this retry can be reduced.
            connected = False
            errorsList = []
            for ip in ipList:
                ipAddress = (ip, hostAddress[1])
                retVal = self.__connect(socketInfo, ipAddress)
                if retVal['OK']:
                    sslSocket = retVal['Value']
                    connected = True
                    break
                errorsList.append("%s: %s" % (ipAddress, retVal['Message']))
            if not connected:
                return S_ERROR("Could not connect to %s: %s" %
                               (hostAddress, ",".join([e
                                                       for e in errorsList])))
            retVal = socketInfo.doClientHandshake()
            if retVal['OK']:
                #Everything went ok. Don't need to retry
                break
        #Did the auth or the connection fail?
        if not retVal['OK']:
            return retVal
        if 'enableSessions' in kwargs and kwargs['enableSessions']:
            sessionId = hash(hostAddress)
            gSessionManager.set(sessionId, sslSocket.get_session())
        return S_OK(socketInfo)
Ejemplo n.º 25
0
    def __init__(self, sleepTime, interactive, site):
        """
    Initialization of the ServerHandler.
    The queue is initialized with the hostname and the start of the thread.

    :params sleepTime: integer, representing time in seconds where the handler can send messages.
    :params interactive: not used at the moment.
    :params site: the site where the log messages come from.
    """
        super(ServerHandler, self).__init__()
        threading.Thread.__init__(self)
        self.__logQueue = Queue.Queue()

        self.__sleepTime = sleepTime
        self.__interactive = interactive
        self.__site = site
        self.__transactions = []
        self.__hostname = Network.getFQDN()
        self.__alive = True
        self.__maxBundledLogs = 20

        self.setDaemon(True)
        self.start()
Ejemplo n.º 26
0
    def __reduceComponentList(self, componentList):
        """
    Only keep the most restrictive components.

    :type componentList: list
    :param componentList: A list of components.
    :return: A list of reduced components.
    """
        for i in range(len(componentList)):
            component = componentList[i]
            for j in range(len(componentList)):
                if i == j or componentList[j] is False:
                    continue
                potentiallyMoreRestrictiveComponent = componentList[j]
                match = True
                for key in component:
                    if key not in potentiallyMoreRestrictiveComponent:
                        match = False
                        break
                    if key == 'Host':
                        result = Network.checkHostsMatch(
                            component[key],
                            potentiallyMoreRestrictiveComponent[key])
                        if not result['OK'] or not result['Value']:
                            match = False
                            break
                    else:
                        if component[
                                key] != potentiallyMoreRestrictiveComponent[
                                    key]:
                            match = False
                            break
                if match:
                    componentList[i] = False
                    break
        return [comp for comp in componentList if comp]
Ejemplo n.º 27
0
class BaseClient:

    VAL_EXTRA_CREDENTIALS_HOST = "hosts"

    KW_USE_CERTIFICATES = "useCertificates"
    KW_EXTRA_CREDENTIALS = "extraCredentials"
    KW_TIMEOUT = "timeout"
    KW_SETUP = "setup"
    KW_VO = "VO"
    KW_DELEGATED_DN = "delegatedDN"
    KW_DELEGATED_GROUP = "delegatedGroup"
    KW_IGNORE_GATEWAYS = "ignoreGateways"
    KW_PROXY_LOCATION = "proxyLocation"
    KW_PROXY_STRING = "proxyString"
    KW_PROXY_CHAIN = "proxyChain"
    KW_SKIP_CA_CHECK = "skipCACheck"
    KW_KEEP_ALIVE_LAPSE = "keepAliveLapse"

    def __init__(self, serviceName, **kwargs):
        if type(serviceName) != types.StringType:
            raise TypeError(
                "Service name expected to be a string. Received %s type %s" %
                (str(serviceName), type(serviceName)))
        self._destinationSrv = serviceName
        self.kwargs = kwargs
        self.__initStatus = S_OK()
        self.__idDict = {}
        self.__enableThreadCheck = False
        for initFunc in (self.__discoverSetup, self.__discoverVO,
                         self.__discoverTimeout, self.__discoverURL,
                         self.__discoverCredentialsToUse,
                         self.__discoverExtraCredentials,
                         self.__checkTransportSanity,
                         self.__setKeepAliveLapse):
            result = initFunc()
            if not result['OK'] and self.__initStatus['OK']:
                self.__initStatus = result
        self._initialize()
        #HACK for thread-safety:
        self.__allowedThreadID = False

    def _initialize(self):
        pass

    def getDestinationService(self):
        return self._destinationSrv

    def __discoverSetup(self):
        #Which setup to use?
        if self.KW_SETUP in self.kwargs and self.kwargs[self.KW_SETUP]:
            self.setup = str(self.kwargs[self.KW_SETUP])
        else:
            self.setup = gConfig.getValue("/DIRAC/Setup", "Test")
        return S_OK()

    def __discoverVO(self):
        #Which setup to use?
        if self.KW_VO in self.kwargs and self.kwargs[self.KW_VO]:
            self.vo = str(self.kwargs[self.KW_VO])
        else:
            self.vo = gConfig.getValue("/DIRAC/VirtualOrganization", "unknown")
        return S_OK()

    def __discoverURL(self):
        #Calculate final URL
        try:
            result = self.__findServiceURL()
        except Exception, e:
            return S_ERROR(str(e))
        if not result['OK']:
            return result
        self.serviceURL = result['Value']
        retVal = Network.splitURL(self.serviceURL)
        if not retVal['OK']:
            return S_ERROR("URL is malformed: %s" % retVal['Message'])
        self.__URLTuple = retVal['Value']
        self._serviceName = self.__URLTuple[-1]
        return S_OK()
Ejemplo n.º 28
0
    def __call__(self):
        """request processing"""

        self.log.debug("about to execute request")
        if not self.rmsMonitoring:
            gMonitor.addMark("RequestAtt", 1)

        # # setup proxy for request owner
        setupProxy = self.setupProxy()
        if not setupProxy["OK"]:
            userSuspended = "User is currently suspended"
            self.request.Error = setupProxy["Message"]
            # In case the user does not have proxy
            if DErrno.cmpError(setupProxy, DErrno.EPROXYFIND):
                self.log.error("Error setting proxy. Request set to Failed:",
                               setupProxy["Message"])
                # If user is no longer registered, fail the request
                for operation in self.request:
                    for opFile in operation:
                        opFile.Status = "Failed"
                    operation.Status = "Failed"
            elif userSuspended in setupProxy["Message"]:
                # If user is suspended, wait for a long time
                self.request.delayNextExecution(6 * 60)
                self.request.Error = userSuspended
                self.log.error("Error setting proxy: " + userSuspended,
                               self.request.OwnerDN)
            else:
                self.log.error("Error setting proxy", setupProxy["Message"])
            return S_OK(self.request)
        shifter = setupProxy["Value"]["Shifter"]

        error = None

        while self.request.Status == "Waiting":

            # # get waiting operation
            operation = self.request.getWaiting()
            if not operation["OK"]:
                self.log.error("Cannot get waiting operation",
                               operation["Message"])
                return operation
            operation = operation["Value"]
            self.log.info("executing operation", "%s" % operation.Type)

            # # and handler for it
            handler = self.getHandler(operation)
            if not handler["OK"]:
                self.log.error("Unable to process operation",
                               "%s: %s" % (operation.Type, handler["Message"]))
                # gMonitor.addMark( "%s%s" % ( operation.Type, "Fail" ), 1 )
                operation.Error = handler["Message"]
                break

            handler = handler["Value"]
            # # set shifters list in the handler
            handler.shifter = shifter
            # set rmsMonitoring flag for the RequestOperation
            handler.rmsMonitoring = self.rmsMonitoring
            # # and execute
            pluginName = self.getPluginName(
                self.handlersDict.get(operation.Type))
            if self.standalone:
                useServerCertificate = gConfig.useServerCertificate()
            else:
                # Always use server certificates if executed within an agent
                useServerCertificate = True
            try:
                if pluginName:
                    if self.rmsMonitoring:
                        self.rmsMonitoringReporter.addRecord({
                            "timestamp":
                            int(Time.toEpoch()),
                            "host":
                            Network.getFQDN(),
                            "objectType":
                            "Operation",
                            "operationType":
                            pluginName,
                            "objectID":
                            operation.OperationID,
                            "parentID":
                            operation.RequestID,
                            "status":
                            "Attempted",
                            "nbObject":
                            1,
                        })
                    else:
                        gMonitor.addMark("%s%s" % (pluginName, "Att"), 1)
                # Always use request owner proxy
                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        "/DIRAC/Security/UseServerCertificate", "false")
                exe = handler()
                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        "/DIRAC/Security/UseServerCertificate", "true")
                if not exe["OK"]:
                    self.log.error("unable to process operation",
                                   "%s: %s" % (operation.Type, exe["Message"]))
                    if pluginName:
                        if self.rmsMonitoring:
                            self.rmsMonitoringReporter.addRecord({
                                "timestamp":
                                int(Time.toEpoch()),
                                "host":
                                Network.getFQDN(),
                                "objectType":
                                "Operation",
                                "operationType":
                                pluginName,
                                "objectID":
                                operation.OperationID,
                                "parentID":
                                operation.RequestID,
                                "status":
                                "Failed",
                                "nbObject":
                                1,
                            })
                        else:
                            gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
                    if self.rmsMonitoring:
                        self.rmsMonitoringReporter.addRecord({
                            "timestamp":
                            int(Time.toEpoch()),
                            "host":
                            Network.getFQDN(),
                            "objectType":
                            "Request",
                            "objectID":
                            operation.RequestID,
                            "status":
                            "Failed",
                            "nbObject":
                            1,
                        })
                    else:
                        gMonitor.addMark("RequestFail", 1)

                    if self.request.JobID:
                        # Check if the job exists
                        monitorServer = JobMonitoringClient(
                            useCertificates=True)
                        res = monitorServer.getJobSummary(
                            int(self.request.JobID))
                        if not res["OK"]:
                            self.log.error(
                                "RequestTask: Failed to get job status",
                                "%d" % self.request.JobID)
                        elif not res["Value"]:
                            self.log.warn(
                                "RequestTask: job does not exist (anymore): failed request",
                                "JobID: %d" % self.request.JobID,
                            )
                            for opFile in operation:
                                opFile.Status = "Failed"
                            if operation.Status != "Failed":
                                operation.Status = "Failed"
                            self.request.Error = "Job no longer exists"
            except Exception as e:
                error = str(e)
                self.log.exception("hit by exception:", "%s" % error)
                if pluginName:
                    if self.rmsMonitoring:
                        self.rmsMonitoringReporter.addRecord({
                            "timestamp":
                            int(Time.toEpoch()),
                            "host":
                            Network.getFQDN(),
                            "objectType":
                            "Operation",
                            "operationType":
                            pluginName,
                            "objectID":
                            operation.OperationID,
                            "parentID":
                            operation.RequestID,
                            "status":
                            "Failed",
                            "nbObject":
                            1,
                        })
                    else:
                        gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
                if self.rmsMonitoring:
                    self.rmsMonitoringReporter.addRecord({
                        "timestamp":
                        int(Time.toEpoch()),
                        "host":
                        Network.getFQDN(),
                        "objectType":
                        "Request",
                        "objectID":
                        operation.RequestID,
                        "status":
                        "Failed",
                        "nbObject":
                        1,
                    })
                else:
                    gMonitor.addMark("RequestFail", 1)

                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        "/DIRAC/Security/UseServerCertificate", "true")
                break

            # # operation status check
            if operation.Status == "Done" and pluginName:
                if self.rmsMonitoring:
                    self.rmsMonitoringReporter.addRecord({
                        "timestamp":
                        int(Time.toEpoch()),
                        "host":
                        Network.getFQDN(),
                        "objectType":
                        "Operation",
                        "operationType":
                        pluginName,
                        "objectID":
                        operation.OperationID,
                        "parentID":
                        operation.RequestID,
                        "status":
                        "Successful",
                        "nbObject":
                        1,
                    })
                else:
                    gMonitor.addMark("%s%s" % (pluginName, "OK"), 1)
            elif operation.Status == "Failed" and pluginName:
                if self.rmsMonitoring:
                    self.rmsMonitoringReporter.addRecord({
                        "timestamp":
                        int(Time.toEpoch()),
                        "host":
                        Network.getFQDN(),
                        "objectType":
                        "Operation",
                        "operationType":
                        pluginName,
                        "objectID":
                        operation.OperationID,
                        "parentID":
                        operation.RequestID,
                        "status":
                        "Failed",
                        "nbObject":
                        1,
                    })
                else:
                    gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
            elif operation.Status in ("Waiting", "Scheduled"):
                # # no update for waiting or all files scheduled
                break

        if not self.rmsMonitoring:
            gMonitor.flush()

        if error:
            return S_ERROR(error)

        # # request done?
        if self.request.Status == "Done":
            # # update request to the RequestDB
            self.log.info("Updating request status:",
                          "%s" % self.request.Status)
            update = self.updateRequest()
            if not update["OK"]:
                self.log.error("Cannot update request status",
                               update["Message"])
                return update
            self.log.info("request is done", "%s" % self.request.RequestName)
            if self.rmsMonitoring:
                self.rmsMonitoringReporter.addRecord({
                    "timestamp":
                    int(Time.toEpoch()),
                    "host":
                    Network.getFQDN(),
                    "objectType":
                    "Request",
                    "objectID":
                    getattr(self.request, "RequestID", 0),
                    "status":
                    "Successful",
                    "nbObject":
                    1,
                })
            else:
                gMonitor.addMark("RequestOK", 1)
            # # and there is a job waiting for it? finalize!
            if self.request.JobID:
                attempts = 0
                while True:
                    finalizeRequest = self.requestClient.finalizeRequest(
                        self.request.RequestID,
                        self.request.JobID  # pylint: disable=no-member
                    )
                    if not finalizeRequest["OK"]:
                        if not attempts:
                            self.log.error(
                                "unable to finalize request, will retry",
                                "ReqName %s:%s" % (self.request.RequestName,
                                                   finalizeRequest["Message"]),
                            )
                        self.log.debug("Waiting 10 seconds")
                        attempts += 1
                        if attempts == 10:
                            self.log.error("Giving up finalize request")
                            return S_ERROR("Could not finalize request")

                        time.sleep(10)

                    else:
                        self.log.info(
                            "request is finalized",
                            "ReqName %s %s" % (self.request.RequestName,
                                               (" after %d attempts" %
                                                attempts) if attempts else ""),
                        )
                        break

        # Commit all the data to the ES Backend
        if self.rmsMonitoring:
            self.rmsMonitoringReporter.commit()
        # Request will be updated by the callBack method
        self.log.verbose("RequestTasks exiting",
                         "request %s" % self.request.Status)
        return S_OK(self.request)
Ejemplo n.º 29
0
class BaseClient:

  VAL_EXTRA_CREDENTIALS_HOST = "hosts"

  KW_USE_CERTIFICATES = "useCertificates"
  KW_EXTRA_CREDENTIALS = "extraCredentials"
  KW_TIMEOUT = "timeout"
  KW_SETUP = "setup"
  KW_VO = "VO"
  KW_DELEGATED_DN = "delegatedDN"
  KW_DELEGATED_GROUP = "delegatedGroup"
  KW_IGNORE_GATEWAYS = "ignoreGateways"
  KW_PROXY_LOCATION = "proxyLocation"
  KW_PROXY_STRING = "proxyString"
  KW_PROXY_CHAIN = "proxyChain"
  KW_SKIP_CA_CHECK = "skipCACheck"
  KW_KEEP_ALIVE_LAPSE = "keepAliveLapse"

  __threadConfig = ThreadConfig()

  def __init__( self, serviceName, **kwargs ):
    if type( serviceName ) not in types.StringTypes:
      raise TypeError( "Service name expected to be a string. Received %s type %s" %
                       ( str( serviceName ), type( serviceName ) ) )
    self._destinationSrv = serviceName
    self._serviceName = serviceName
    self.kwargs = kwargs
    self.__initStatus = S_OK()
    self.__idDict = {}
    self.__extraCredentials = ""
    self.__enableThreadCheck = False
    self.__retry = 0
    self.__retryDelay = 0
    self.__nbOfUrls = 1 #by default we always have 1 url for example: RPCClient('dips://volhcb38.cern.ch:9162/Framework/SystemAdministrator')
    self.__nbOfRetry = 3 # by default we try try times 
    self.__bannedUrls = []
    for initFunc in ( self.__discoverSetup, self.__discoverVO, self.__discoverTimeout,
                      self.__discoverURL, self.__discoverCredentialsToUse,
                      self.__checkTransportSanity,
                      self.__setKeepAliveLapse ):
      result = initFunc()
      if not result[ 'OK' ] and self.__initStatus[ 'OK' ]:
        self.__initStatus = result
    self._initialize()
    #HACK for thread-safety:
    self.__allowedThreadID = False


  def _initialize( self ):
    pass

  def getDestinationService( self ):
    return self._destinationSrv

  def getServiceName( self ):
    return self._serviceName

  def __discoverSetup( self ):
    #Which setup to use?
    if self.KW_SETUP in self.kwargs and self.kwargs[ self.KW_SETUP ]:
      self.setup = str( self.kwargs[ self.KW_SETUP ] )
    else:
      self.setup = self.__threadConfig.getSetup()
      if not self.setup:
        self.setup = gConfig.getValue( "/DIRAC/Setup", "Test" )
    return S_OK()

  def __discoverVO( self ):
    #Which setup to use?
    if self.KW_VO in self.kwargs and self.kwargs[ self.KW_VO ]:
      self.vo = str( self.kwargs[ self.KW_VO ] )
    else:
      self.vo = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown" )
    return S_OK()

  def __discoverURL( self ):
    #Calculate final URL
    try:
      result = self.__findServiceURL()
    except Exception, e:
      return S_ERROR( str( e ) )
    if not result[ 'OK' ]:
      return result
    self.serviceURL = result[ 'Value' ]
    retVal = Network.splitURL( self.serviceURL )
    if not retVal[ 'OK' ]:
      return S_ERROR( "URL is malformed: %s" % retVal[ 'Message' ] )
    self.__URLTuple = retVal[ 'Value' ]
    self._serviceName = self.__URLTuple[-1]
    res = gConfig.getOptionsDict( "/DIRAC/ConnConf/%s:%s" % self.__URLTuple[1:3] )
    if res[ 'OK' ]:
      opts = res[ 'Value' ]
      for k in opts:
        if k not in self.kwargs:
          self.kwargs[k] = opts[k]
    return S_OK()
Ejemplo n.º 30
0
    def __findServiceURL(self):
        """
        Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry removed in HTTPS (Managed by requests)
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: the selected URL

        WARNING (Mostly) COPY PASTE FROM BaseClient (protocols list is changed to https)

        """
        if not self.__initStatus["OK"]:
            return self.__initStatus

        # Load the Gateways URLs for the current site Name
        gatewayURL = False
        if not self.kwargs.get(self.KW_IGNORE_GATEWAYS):
            gatewayURLs = getGatewayURLs()
            if gatewayURLs:
                gatewayURL = "/".join(gatewayURLs[0].split("/")[:3])

        # If what was given as constructor attribute is a properly formed URL,
        # we just return this one.
        # If we have to use a gateway, we just replace the server name in it
        if self._destinationSrv.startswith("https://"):
            gLogger.debug("Already given a valid url", self._destinationSrv)
            if not gatewayURL:
                return S_OK(self._destinationSrv)
            gLogger.debug("Reconstructing given URL to pass through gateway")
            path = "/".join(self._destinationSrv.split("/")[3:])
            finalURL = "%s/%s" % (gatewayURL, path)
            gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
            return S_OK(finalURL)

        if gatewayURL:
            gLogger.debug("Using gateway", gatewayURL)
            return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

        # If nor url is given as constructor, we extract the list of URLs from the CS (System/URLs/Component)
        try:
            # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
            urlsList = getServiceURLs(self._destinationSrv, setup=self.setup, failover=True)
        except Exception as e:
            return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
        if not urlsList:
            return S_ERROR("URL for service %s not found" % self._destinationSrv)

        self.__nbOfUrls = len(urlsList)
        # __nbOfRetry removed in HTTPS (managed by requests)
        if self.__nbOfUrls == len(self.__bannedUrls):
            self.__bannedUrls = []  # retry all urls
            gLogger.debug("Retrying again all URLs")

        if self.__bannedUrls and len(urlsList) > 1:
            # we have host which is not accessible. We remove that host from the list.
            # We only remove if we have more than one instance
            for i in self.__bannedUrls:
                gLogger.debug("Removing banned URL", "%s" % i)
                urlsList.remove(i)

        sURL = urlsList[0]

        # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
        # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

        if self.__bannedUrls and self.__nbOfUrls > 2:  # when we have multiple services then we can
            # have a situation when two services are running on the same machine with different ports...
            retVal = Network.splitURL(sURL)
            nexturl = None
            if retVal["OK"]:
                nexturl = retVal["Value"]

                found = False
                for i in self.__bannedUrls:
                    retVal = Network.splitURL(i)
                    if retVal["OK"]:
                        bannedurl = retVal["Value"]
                    else:
                        break
                    # We found a banned URL on the same host as the one we are running on
                    if nexturl[1] == bannedurl[1]:
                        found = True
                        break
                if found:
                    nexturl = self.__selectUrl(nexturl, urlsList[1:])
                    if nexturl:  # an url found which is in different host
                        sURL = nexturl
        gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
        return S_OK(sURL)
Ejemplo n.º 31
0
 def __generateUniqueClientName(self):
     hashStr = ":".join(
         (str(datetime.datetime.utcnow()), str(random.random()), Network.getFQDN(), gLogger.getName())
     )
     hexHash = md5(hashStr.encode()).hexdigest()
     return hexHash
Ejemplo n.º 32
0
  def __findServiceURL( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]:
      dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() )
      if dRetVal[ 'OK' ]:
        rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0]
        gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] )

    for protocol in gProtocolDict.keys():
      if self._destinationSrv.find( "%s://" % protocol ) == 0:
        gLogger.debug( "Already given a valid url", self._destinationSrv )
        if not gatewayURL:
          return S_OK( self._destinationSrv )
        gLogger.debug( "Reconstructing given URL to pass through gateway" )
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception as e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, repr( e ) ) )
    if not urls:
      return S_ERROR( "URL for service %s not found" % self._destinationSrv )

    urlsList = List.fromChar( urls, "," )
    self.__nbOfUrls = len( urlsList )
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services
    if len( urlsList ) == len( self.__bannedUrls ):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug( "Retrying again all URLs" )

    if len( self.__bannedUrls ) > 0 and len( urlsList ) > 1 :
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug( "Removing banned URL", "%s" % i )
        urlsList.remove( i )

    randUrls = List.randomize( urlsList )
    sURL = randUrls[0]

    if len( self.__bannedUrls ) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can have a situation
      # when two service are running on the same machine with different port...

      retVal = Network.splitURL( sURL )
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL( i )
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break

          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl( nexturl, randUrls[1:] )
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) )
    return S_OK( sURL )
Ejemplo n.º 33
0
 def am_go(self):
     # Set the shifter proxy if required
     result = self._setShifterProxy()
     if not result['OK']:
         return result
     self.log.notice("-" * 40)
     self.log.notice("Starting cycle for module %s" %
                     self.__moduleProperties['fullName'])
     mD = self.am_getMaxCycles()
     if mD > 0:
         cD = self.__moduleProperties['cyclesDone']
         self.log.notice("Remaining %s of %s cycles" % (mD - cD, mD))
     self.log.notice("-" * 40)
     # use SIGALARM as a watchdog interrupt if enabled
     watchdogInt = self.am_getWatchdogTime()
     if watchdogInt > 0:
         signal.signal(signal.SIGALRM, signal.SIG_DFL)
         signal.alarm(watchdogInt)
     elapsedTime = time.time()
     cpuStats = self._startReportToMonitoring()
     cycleResult = self.__executeModuleCycle()
     if cpuStats:
         self._endReportToMonitoring(*cpuStats)
     # Increment counters
     self.__moduleProperties['cyclesDone'] += 1
     # Show status
     elapsedTime = time.time() - elapsedTime
     self.__moduleProperties['totalElapsedTime'] += elapsedTime
     self.log.notice("-" * 40)
     self.log.notice("Agent module %s run summary" %
                     self.__moduleProperties['fullName'])
     self.log.notice(" Executed %s times previously" %
                     self.__moduleProperties['cyclesDone'])
     self.log.notice(" Cycle took %.2f seconds" % elapsedTime)
     averageElapsedTime = self.__moduleProperties[
         'totalElapsedTime'] / self.__moduleProperties['cyclesDone']
     self.log.notice(" Average execution time: %.2f seconds" %
                     (averageElapsedTime))
     elapsedPollingRate = averageElapsedTime * 100 / self.am_getOption(
         'PollingTime')
     self.log.notice(" Polling time: %s seconds" %
                     self.am_getOption('PollingTime'))
     self.log.notice(" Average execution/polling time: %.2f%%" %
                     elapsedPollingRate)
     if cycleResult['OK']:
         self.log.notice(" Cycle was successful")
         if self.activityMonitoring:
             # Here we record the data about the cycle duration along with some basic details about the
             # component and right now it isn't committed to the ES backend.
             self.activityMonitoringReporter.addRecord({
                 'timestamp':
                 int(Time.toEpoch()),
                 'host':
                 Network.getFQDN(),
                 'componentType':
                 "agent",
                 'component':
                 "_".join(self.__moduleProperties['fullName'].split("/")),
                 'cycleDuration':
                 elapsedTime,
                 'cycles':
                 1
             })
     else:
         self.log.warn(" Cycle had an error:", cycleResult['Message'])
     self.log.notice("-" * 40)
     # Update number of cycles
     if not self.activityMonitoring:
         self.monitor.setComponentExtraParam(
             'cycles', self.__moduleProperties['cyclesDone'])
     # cycle finished successfully, cancel watchdog
     if watchdogInt > 0:
         signal.alarm(0)
     return cycleResult
Ejemplo n.º 34
0
 def getHostname(self):
     hostname = self.getOption("/DIRAC/Hostname")
     if not hostname:
         return Network.getFQDN()
     return hostname
Ejemplo n.º 35
0
  def __findServiceURL(self):
    """
        Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: the selected URL

    """
    if not self.__initStatus['OK']:
      return self.__initStatus

    # Load the Gateways URLs for the current site Name
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[self.KW_IGNORE_GATEWAYS]:
      dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName())
      if dRetVal['OK']:
        rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0]
        gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

    # If what was given as constructor attribute is a properly formed URL,
    # we just return this one.
    # If we have to use a gateway, we just replace the server name in it
    for protocol in gProtocolDict:
      if self._destinationSrv.find("%s://" % protocol) == 0:
        gLogger.debug("Already given a valid url", self._destinationSrv)
        if not gatewayURL:
          return S_OK(self._destinationSrv)
        gLogger.debug("Reconstructing given URL to pass through gateway")
        path = "/".join(self._destinationSrv.split("/")[3:])
        finalURL = "%s/%s" % (gatewayURL, path)
        gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
        return S_OK(finalURL)

    if gatewayURL:
      gLogger.debug("Using gateway", gatewayURL)
      return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

    # We extract the list of URLs from the CS (System/URLs/Component)
    try:
      urls = getServiceURL(self._destinationSrv, setup=self.setup)
    except Exception as e:
      return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
    if not urls:
      return S_ERROR("URL for service %s not found" % self._destinationSrv)

    failoverUrls = []
    # Try if there are some failover URLs to use as last resort
    try:
      failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup)
      if failoverUrlsStr:
        failoverUrls = failoverUrlsStr.split(',')
    except Exception as e:
      pass

    # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
    urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls
    self.__nbOfUrls = len(urlsList)
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
    if self.__nbOfUrls == len(self.__bannedUrls):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug("Retrying again all URLs")

    if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug("Removing banned URL", "%s" % i)
        urlsList.remove(i)

    # Take the first URL from the list
    #randUrls = List.randomize( urlsList ) + failoverUrls

    sURL = urlsList[0]

    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl
    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

    if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can
      # have a situation when two services are running on the same machine with different ports...
      retVal = Network.splitURL(sURL)
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL(i)
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break
          # We found a banned URL on the same host as the one we are running on
          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl(nexturl, urlsList[1:])
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
    return S_OK(sURL)
Ejemplo n.º 36
0
 def getHostname( self ):
   hostname = self.getOption( "/DIRAC/Hostname" )
   if not hostname:
     return Network.getFQDN()
   return hostname
Ejemplo n.º 37
0
  def __findServiceURL(self):
    """ Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: S_OK(str)/S_ERROR() -- the selected URL
    """
    if not self.__initStatus['OK']:
      return self.__initStatus

    # Load the Gateways URLs for the current site Name
    gatewayURL = False
    if not self.kwargs.get(self.KW_IGNORE_GATEWAYS):
      dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName())
      if dRetVal['OK']:
        rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0]
        gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

    # If what was given as constructor attribute is a properly formed URL,
    # we just return this one.
    # If we have to use a gateway, we just replace the server name in it
    for protocol in gProtocolDict:
      if self._destinationSrv.find("%s://" % protocol) == 0:
        gLogger.debug("Already given a valid url", self._destinationSrv)
        if not gatewayURL:
          return S_OK(self._destinationSrv)
        gLogger.debug("Reconstructing given URL to pass through gateway")
        path = "/".join(self._destinationSrv.split("/")[3:])
        finalURL = "%s/%s" % (gatewayURL, path)
        gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
        return S_OK(finalURL)

    if gatewayURL:
      gLogger.debug("Using gateway", gatewayURL)
      return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

    # We extract the list of URLs from the CS (System/URLs/Component)
    try:
      urls = getServiceURL(self._destinationSrv, setup=self.setup)
    except Exception as e:
      return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
    if not urls:
      return S_ERROR("URL for service %s not found" % self._destinationSrv)

    failoverUrls = []
    # Try if there are some failover URLs to use as last resort
    try:
      failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup)
      if failoverUrlsStr:
        failoverUrls = failoverUrlsStr.split(',')
    except Exception as e:
      pass

    # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
    urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls
    self.__nbOfUrls = len(urlsList)
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
    if self.__nbOfUrls == len(self.__bannedUrls):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug("Retrying again all URLs")

    if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug("Removing banned URL", "%s" % i)
        urlsList.remove(i)

    # Take the first URL from the list
    # randUrls = List.randomize( urlsList ) + failoverUrls

    sURL = urlsList[0]

    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl
    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

    if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can
      # have a situation when two services are running on the same machine with different ports...
      retVal = Network.splitURL(sURL)
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL(i)
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break
          # We found a banned URL on the same host as the one we are running on
          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl(nexturl, urlsList[1:])
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
    return S_OK(sURL)
Ejemplo n.º 38
0
 def getVMIP(self):
     netData = Network.discoverInterfaces()
     ip = netData['eth0']['ip']
     return ip
Ejemplo n.º 39
0
    def execute(self):
        """read requests from RequestClient and enqueue them into ProcessPool"""
        if not self.__rmsMonitoring:
            gMonitor.addMark("Iteration", 1)
        # # requests (and so tasks) counter
        taskCounter = 0
        while taskCounter < self.__requestsPerCycle:
            self.log.debug("execute: executing %d request in this cycle" % taskCounter)

            requestsToExecute = []

            if not self.__bulkRequest:
                self.log.info("execute: ask for a single request")
                getRequest = self.requestClient().getRequest()
                if not getRequest["OK"]:
                    self.log.error("execute:", "%s" % getRequest["Message"])
                    break
                if not getRequest["Value"]:
                    self.log.info("execute: no more 'Waiting' requests to process")
                    break
                requestsToExecute = [getRequest["Value"]]
            else:
                numberOfRequest = min(self.__bulkRequest, self.__requestsPerCycle - taskCounter)
                self.log.info("execute: ask for requests", "%s" % numberOfRequest)
                getRequests = self.requestClient().getBulkRequests(numberOfRequest)
                if not getRequests["OK"]:
                    self.log.error("execute:", "%s" % getRequests["Message"])
                    break
                if not getRequests["Value"]:
                    self.log.info("execute: no more 'Waiting' requests to process")
                    break
                for rId in getRequests["Value"]["Failed"]:
                    self.log.error("execute:", "%s" % getRequests["Value"]["Failed"][rId])

                requestsToExecute = list(getRequests["Value"]["Successful"].values())

            self.log.info("execute: will execute requests ", "%s" % len(requestsToExecute))

            for request in requestsToExecute:
                # # set task id
                taskID = request.RequestID

                self.log.info(
                    "processPool status",
                    "tasks idle = %s working = %s"
                    % (self.processPool().getNumIdleProcesses(), self.processPool().getNumWorkingProcesses()),
                )

                looping = 0
                while True:
                    if not self.processPool().getFreeSlots():
                        if not looping:
                            self.log.info(
                                "No free slots available in processPool",
                                "will wait %d seconds to proceed" % self.__poolSleep,
                            )
                        time.sleep(self.__poolSleep)
                        looping += 1
                    else:
                        if looping:
                            self.log.info("Free slot found", "after %d seconds" % looping * self.__poolSleep)
                        looping = 0
                        # # save current request in cache
                        res = self.cacheRequest(request)
                        if not res["OK"]:
                            if cmpError(res, errno.EALREADY):
                                # The request is already in the cache, skip it. break out of the while loop to get next request
                                break
                            # There are too many requests in the cache, commit suicide
                            self.log.error(
                                "Too many requests in cache",
                                "(%d requests): put back all requests and exit cycle. Error %s"
                                % (len(self.__requestCache), res["Message"]),
                            )
                            self.putAllRequests()
                            return res
                        # # serialize to JSON
                        result = request.toJSON()
                        if not result["OK"]:
                            continue
                        requestJSON = result["Value"]
                        self.log.info("spawning task for request", "'%s/%s'" % (request.RequestID, request.RequestName))
                        timeOut = self.getTimeout(request)
                        enqueue = self.processPool().createAndQueueTask(
                            RequestTask,
                            kwargs={
                                "requestJSON": requestJSON,
                                "handlersDict": self.handlersDict,
                                "csPath": self.__configPath,
                                "agentName": self.agentName,
                                "rmsMonitoring": self.__rmsMonitoring,
                            },
                            taskID=taskID,
                            blocking=True,
                            usePoolCallbacks=True,
                            timeOut=timeOut,
                        )
                        if not enqueue["OK"]:
                            self.log.error("Could not enqueue task", enqueue["Message"])
                        else:
                            self.log.debug("successfully enqueued task", "'%s'" % taskID)
                            # # update monitor
                            if self.__rmsMonitoring:
                                self.rmsMonitoringReporter.addRecord(
                                    {
                                        "timestamp": int(Time.toEpoch()),
                                        "host": Network.getFQDN(),
                                        "objectType": "Request",
                                        "status": "Attempted",
                                        "objectID": request.RequestID,
                                        "nbObject": 1,
                                    }
                                )
                            else:
                                gMonitor.addMark("Processed", 1)

                            # # update request counter
                            taskCounter += 1
                            # # task created, a little time kick to proceed
                            time.sleep(0.1)
                            break

        self.log.info("Flushing callbacks", "(%d requests still in cache)" % len(self.__requestCache))
        processed = self.processPool().processResults()
        # This happens when the result queue is screwed up.
        # Returning S_ERROR proved not to be sufficient,
        # and when in this situation, there is nothing we can do.
        # So we just exit. runit will restart from scratch.
        if processed < 0:
            self.log.fatal("Results queue is screwed up")
            sys.exit(1)
        # # clean return
        return S_OK()
Ejemplo n.º 40
0
 def __generateUniqueClientName(self):
   hashStr = ":".join((Time.toString(), str(random.random()), Network.getFQDN(), gLogger.getName()))
   hexHash = md5(hashStr).hexdigest()
   return hexHash
Ejemplo n.º 41
0
    def __findServiceURL(self):
        if not self.__initStatus['OK']:
            return self.__initStatus
        gatewayURL = False
        if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[
                self.KW_IGNORE_GATEWAYS]:
            dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" %
                                        DIRAC.siteName())
            if dRetVal['OK']:
                rawGatewayURL = List.randomize(
                    List.fromChar(dRetVal['Value'], ","))[0]
                gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

        for protocol in gProtocolDict.keys():
            if self._destinationSrv.find("%s://" % protocol) == 0:
                gLogger.debug("Already given a valid url",
                              self._destinationSrv)
                if not gatewayURL:
                    return S_OK(self._destinationSrv)
                gLogger.debug(
                    "Reconstructing given URL to pass through gateway")
                path = "/".join(self._destinationSrv.split("/")[3:])
                finalURL = "%s/%s" % (gatewayURL, path)
                gLogger.debug("Gateway URL conversion:\n %s -> %s" %
                              (self._destinationSrv, finalURL))
                return S_OK(finalURL)

        if gatewayURL:
            gLogger.debug("Using gateway", gatewayURL)
            return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

        try:
            urls = getServiceURL(self._destinationSrv, setup=self.setup)
        except Exception as e:
            return S_ERROR("Cannot get URL for %s in setup %s: %s" %
                           (self._destinationSrv, self.setup, repr(e)))
        if not urls:
            return S_ERROR("URL for service %s not found" %
                           self._destinationSrv)

        urlsList = List.fromChar(urls, ",")
        self.__nbOfUrls = len(urlsList)
        self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
        if len(urlsList) == len(self.__bannedUrls):
            self.__bannedUrls = []  # retry all urls
            gLogger.debug("Retrying again all URLs")

        if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
            # we have host which is not accessible. We remove that host from the list.
            # We only remove if we have more than one instance
            for i in self.__bannedUrls:
                gLogger.debug("Removing banned URL", "%s" % i)
                urlsList.remove(i)

        randUrls = List.randomize(urlsList)
        sURL = randUrls[0]

        if len(
                self.__bannedUrls
        ) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can have a situation
            # when two service are running on the same machine with different port...

            retVal = Network.splitURL(sURL)
            nexturl = None
            if retVal['OK']:
                nexturl = retVal['Value']

                found = False
                for i in self.__bannedUrls:
                    retVal = Network.splitURL(i)
                    if retVal['OK']:
                        bannedurl = retVal['Value']
                    else:
                        break

                    if nexturl[1] == bannedurl[1]:
                        found = True
                        break
                if found:
                    nexturl = self.__selectUrl(nexturl, randUrls[1:])
                    if nexturl:  # an url found which is in different host
                        sURL = nexturl
        gLogger.debug("Discovering URL for service",
                      "%s -> %s" % (self._destinationSrv, sURL))
        return S_OK(sURL)