Example #1
0
def __configurePilot(basepath, vo):
    """
    Configures the pilot.
    This method was created specifically for LHCb pilots, more info
    about othe VOs is needed to make it more general.
    """

    from DIRAC.ConfigurationSystem.Client.Helpers.CSGlobals import getVO, getSetup
    from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData

    vo = getVO()
    currentSetup = getSetup()
    masterCS = gConfigurationData.getMasterServer()

    os.system(
        "python " + basepath +
        "dirac-pilot.py -S %s -l %s -C %s -N ce.debug.ch -Q default -n DIRAC.JobDebugger.ch --pythonVersion=3 -dd"
        % (currentSetup, vo, masterCS))

    diracdir = os.path.expanduser("~") + os.path.sep
    try:
        os.rename(diracdir + ".dirac.cfg", diracdir + ".dirac.cfg.old")
    except OSError:
        pass
    shutil.copyfile(diracdir + "pilot.cfg", diracdir + ".dirac.cfg")
Example #2
0
 def __refreshAndPublish(self):
     self.__lastUpdateTime = time.time()
     gLogger.info("Refreshing from master server")
     from DIRAC.Core.DISET.RPCClient import RPCClient
     sMasterServer = gConfigurationData.getMasterServer()
     if sMasterServer:
         oClient = RPCClient(
             sMasterServer,
             timeout=self.__timeout,
             useCertificates=gConfigurationData.useServerCertificate(),
             skipCACheck=gConfigurationData.skipCACheck())
         dRetVal = _updateFromRemoteLocation(oClient)
         if not dRetVal['OK']:
             gLogger.error("Can't update from master server",
                           dRetVal['Message'])
             return False
         if gConfigurationData.getAutoPublish():
             gLogger.info("Publishing to master server...")
             dRetVal = oClient.publishSlaveServer(self.__url)
             if not dRetVal['OK']:
                 gLogger.error("Can't publish to master server",
                               dRetVal['Message'])
         return True
     else:
         gLogger.warn(
             "No master server is specified in the configuration, trying to get data from other slaves"
         )
         return self.__refresh()['OK']
  def do_connect(self, line):
    """connect
    Connect to the CS
    Usage: connect <URL> (Connect to the CS at the specified URL)
           connect       (Connect to the default CS URL of your config)
    """
    if line == "":
      self.serverURL  = gConfigurationData.getMasterServer()
      self.serverName = gConfigurationData.getName()
    else:
      self.serverURL  = self.serverName = line

    if self.serverURL == None:
      print "Unable to connect to the default server. Maybe you don't have a proxy ?"
      return self.do_disconnect("")

    print "Trying to connect to " + self.serverURL + "...",

    self.modificator = Modificator(RPCClient(self.serverURL))
    rv               = self.modificator.loadFromRemote()
    rv2              = self.modificator.loadCredentials()

    if rv['OK'] == False or rv2['OK'] == False:
      print "failed: ",
      if rv['OK'] == False: print rv['Message']
      else:                 print rv2['Message']
      self.connected = False
      self.update_prompt()
Example #4
0
    def do_connect(self, line):
        """connect
        Connect to the CS
        Usage: connect <URL> (Connect to the CS at the specified URL)
               connect       (Connect to the default CS URL of your config)
        """
        if line == "":
            self.serverURL = gConfigurationData.getMasterServer()
            self.serverName = gConfigurationData.getName()
        else:
            self.serverURL = self.serverName = line

        if self.serverURL is None:
            print("Unable to connect to the default server. Maybe you don't have a proxy ?")
            return self.do_disconnect("")

        print("Trying to connect to " + self.serverURL + "...", end=" ")

        self.modificator = Modificator(ConfigurationClient(url=self.serverURL))
        rv = self.modificator.loadFromRemote()
        rv2 = self.modificator.loadCredentials()

        if rv["OK"] == False or rv2["OK"] == False:
            print("failed: ", end=" ")
            if rv["OK"] is False:
                print(rv["Message"])
            else:
                print(rv2["Message"])
            self.connected = False
            self.update_prompt()
        else:
            self.connected = True
            self.update_prompt()
            print("done.")
Example #5
0
    def __refreshAndPublish(self):
        self.__lastUpdateTime = time.time()
        gLogger.info("Refreshing from master server")
        from DIRAC.Core.DISET.RPCClient import RPCClient

        sMasterServer = gConfigurationData.getMasterServer()
        if sMasterServer:
            oClient = RPCClient(
                sMasterServer,
                timeout=self.__timeout,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck(),
            )
            dRetVal = _updateFromRemoteLocation(oClient)
            if not dRetVal["OK"]:
                gLogger.error("Can't update from master server", dRetVal["Message"])
                return False
            if gConfigurationData.getAutoPublish():
                gLogger.info("Publishing to master server...")
                dRetVal = oClient.publishSlaveServer(self.__url)
                if not dRetVal["OK"]:
                    gLogger.error("Can't publish to master server", dRetVal["Message"])
            return True
        else:
            gLogger.warn("No master server is specified in the configuration, trying to get data from other slaves")
            return self.__refresh()["OK"]
    def do_connect(self, line):
        """connect
    Connect to the CS
    Usage: connect <URL> (Connect to the CS at the specified URL)
           connect       (Connect to the default CS URL of your config)
    """
        if line == "":
            self.serverURL = gConfigurationData.getMasterServer()
            self.serverName = gConfigurationData.getName()
        else:
            self.serverURL = self.serverName = line

        if self.serverURL == None:
            print "Unable to connect to the default server. Maybe you don't have a proxy ?"
            return self.do_disconnect("")

        print "Trying to connect to " + self.serverURL + "...",

        self.modificator = Modificator(RPCClient(self.serverURL))
        rv = self.modificator.loadFromRemote()
        rv2 = self.modificator.loadCredentials()

        if rv['OK'] == False or rv2['OK'] == False:
            print "failed: ",
            if rv['OK'] == False: print rv['Message']
            else: print rv2['Message']
            self.connected = False
            self.update_prompt()
Example #7
0
    def _refreshAndPublish(self):
        """
        Refresh configuration and publish local updates
        """
        self._lastUpdateTime = time.time()
        gLogger.info("Refreshing from master server")
        sMasterServer = gConfigurationData.getMasterServer()
        if sMasterServer:
            from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient

            oClient = ConfigurationClient(
                url=sMasterServer,
                timeout=self._timeout,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck(),
            )
            dRetVal = _updateFromRemoteLocation(oClient)
            if not dRetVal["OK"]:
                gLogger.error("Can't update from master server",
                              dRetVal["Message"])
                return False
            if gConfigurationData.getAutoPublish():
                gLogger.info("Publishing to master server...")
                dRetVal = oClient.publishSlaveServer(self._url)
                if not dRetVal["OK"]:
                    gLogger.error("Can't publish to master server",
                                  dRetVal["Message"])
            return True
        else:
            gLogger.warn(
                "No master server is specified in the configuration, trying to get data from other slaves"
            )
            return self._refresh()["OK"]
Example #8
0
    def _refresh(self, fromMaster=False):
        """
        Refresh configuration
        """
        self._lastUpdateTime = time.time()
        gLogger.debug("Refreshing configuration...")
        gatewayList = getGatewayURLs("Configuration/Server")
        updatingErrorsList = []
        if gatewayList:
            initialServerList = gatewayList
            gLogger.debug("Using configuration gateway",
                          str(initialServerList[0]))
        elif fromMaster:
            masterServer = gConfigurationData.getMasterServer()
            initialServerList = [masterServer]
            gLogger.debug("Refreshing from master %s" % masterServer)
        else:
            initialServerList = gConfigurationData.getServers()
            gLogger.debug("Refreshing from list %s" % str(initialServerList))

        # If no servers in the initial list, we are supposed to use the local configuration only
        if not initialServerList:
            return S_OK()

        randomServerList = List.randomize(initialServerList)
        gLogger.debug("Randomized server list is %s" %
                      ", ".join(randomServerList))

        for sServer in randomServerList:
            from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient

            oClient = ConfigurationClient(
                url=sServer,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck(),
            )
            dRetVal = _updateFromRemoteLocation(oClient)
            if dRetVal["OK"]:
                self._refreshTime = gConfigurationData.getRefreshTime()
                return dRetVal
            else:
                updatingErrorsList.append(dRetVal["Message"])
                gLogger.warn(
                    "Can't update from server",
                    "Error while updating from %s: %s" %
                    (sServer, dRetVal["Message"]))
                if dRetVal["Message"].find("Insane environment") > -1:
                    break
        return S_ERROR("Reason(s):\n\t%s" %
                       "\n\t".join(List.uniqueElements(updatingErrorsList)))
Example #9
0
 def do_connect( self, args = '' ):
   """
   Connects to configuration master server (in specified url if provided).
   
   Usage: connect <url>
   """
   if not args or type( args ) not in types.StringTypes:
     self.masterURL = gConfigurationData.getMasterServer()
     if self.masterURL != "unknown" and self.masterURL:
       self._tryConnection()
     else:
       self._setStatus( False )
   else:
     splitted = args.split()
     if len( splitted ) == 0:
       print "Must specify witch url to connect"
       self._setStatus( False )
     else:
       self.masterURL = splitted[0].strip()
       self._tryConnection()
Example #10
0
    def do_connect(self, args=''):
        """
    Connects to configuration master server (in specified url if provided).

    Usage: connect <url>
    """
        if not args or not isinstance(args, six.string_types):
            self.masterURL = gConfigurationData.getMasterServer()
            if self.masterURL != "unknown" and self.masterURL:
                self._tryConnection()
            else:
                self._setStatus(False)
        else:
            splitted = args.split()
            if len(splitted) == 0:
                print("Must specify witch url to connect")
                self._setStatus(False)
            else:
                self.masterURL = splitted[0].strip()
                self._tryConnection()
Example #11
0
  def __refresh( self, fromMaster = False ):
    self.__lastUpdateTime = time.time()
    gLogger.debug( "Refreshing configuration..." )
    gatewayList = getGatewayURLs( "Configuration/Server" )
    updatingErrorsList = []
    if gatewayList:
      initialServerList = gatewayList
      gLogger.debug( "Using configuration gateway", str( initialServerList[0] ) )
    elif fromMaster:
      masterServer = gConfigurationData.getMasterServer()
      initialServerList = [masterServer]
      gLogger.debug( "Refreshing from master %s" % masterServer )
    else:
      initialServerList = gConfigurationData.getServers()
      gLogger.debug( "Refreshing from list %s" % str( initialServerList ) )
      
    # If no servers in the initial list, we are supposed to use the local configuration only
    if not initialServerList:
      return S_OK()    

    randomServerList = List.randomize( initialServerList )
    gLogger.debug( "Randomized server list is %s" % ", ".join( randomServerList ) )

    for sServer in randomServerList:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      oClient = RPCClient( sServer,
                         useCertificates = gConfigurationData.useServerCertificate(),
                         skipCACheck = gConfigurationData.skipCACheck() )
      dRetVal = _updateFromRemoteLocation( oClient )
      if dRetVal[ 'OK' ]:
        return dRetVal
      else:
        updatingErrorsList.append( dRetVal[ 'Message' ] )
        gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % ( sServer, dRetVal[ 'Message' ] ) )
        if dRetVal[ 'Message' ].find( "Insane environment" ) > -1:
          break
    return S_ERROR( "Reason(s):\n\t%s" % "\n\t".join( List.uniqueElements( updatingErrorsList ) ) )
def __configurePilot(basepath, vo):
  """
  Configures the pilot.
  This method was created specifically for LHCb pilots, more info
  about othe VOs is needed to make it more general.
  """

  from DIRAC.ConfigurationSystem.Client.Helpers.CSGlobals    import getVO, getSetup
  from DIRAC.ConfigurationSystem.Client.ConfigurationData    import gConfigurationData
  

  
  vo = getVO()
  currentSetup = getSetup()
  masterCS = gConfigurationData.getMasterServer()

  os.system("python " + basepath + "dirac-pilot.py -S %s -l %s -C %s -N ce.debug.ch -Q default -n DIRAC.JobDebugger.ch -dd" %(currentSetup, vo, masterCS))
  
  dir = os.path.expanduser('~') + os.path.sep
  try:
    os.rename(dir + '.dirac.cfg', dir + '.dirac.cfg.old')
  except OSError:
    pass
  shutil.copyfile(dir + 'pilot.cfg', dir + '.dirac.cfg')
    def getCSDict(self, includeMasterCS=True):
        """Gets minimal info for running a pilot, from the CS

        :returns: pilotDict (containing pilots run info)
        :rtype: S_OK, S_ERROR, value is pilotDict
        """

        pilotDict = {
            "timestamp": datetime.datetime.utcnow().isoformat(),
            "Setups": {},
            "CEs": {},
            "GenericPilotDNs": [],
        }

        self.log.info("-- Getting the content of the CS --")

        # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations
        setupsRes = gConfig.getSections("/Operations/")
        if not setupsRes["OK"]:
            self.log.error("Can't get sections from Operations",
                           setupsRes["Message"])
            return setupsRes
        setupsInOperations = setupsRes["Value"]

        # getting the setup(s) in this CS, and comparing with what we found in Operations
        setupsInDIRACRes = gConfig.getSections("DIRAC/Setups")
        if not setupsInDIRACRes["OK"]:
            self.log.error("Can't get sections from DIRAC/Setups",
                           setupsInDIRACRes["Message"])
            return setupsInDIRACRes
        setupsInDIRAC = setupsInDIRACRes["Value"]

        # Handling the case of multi-VO CS
        if not set(setupsInDIRAC).intersection(set(setupsInOperations)):
            vos = list(setupsInOperations)
            for vo in vos:
                setupsFromVOs = gConfig.getSections("/Operations/%s" % vo)
                if not setupsFromVOs["OK"]:
                    continue
                else:
                    setupsInOperations = setupsFromVOs["Value"]

        self.log.verbose("From Operations/[Setup]/Pilot")

        for setup in setupsInOperations:
            self._getPilotOptionsPerSetup(setup, pilotDict)

        self.log.verbose("From Resources/Sites")
        sitesSection = gConfig.getSections("/Resources/Sites/")
        if not sitesSection["OK"]:
            self.log.error("Can't get sections from Resources",
                           sitesSection["Message"])
            return sitesSection

        for grid in sitesSection["Value"]:
            gridSection = gConfig.getSections("/Resources/Sites/" + grid)
            if not gridSection["OK"]:
                self.log.error("Can't get sections from Resources",
                               gridSection["Message"])
                return gridSection

            for site in gridSection["Value"]:
                ceList = gConfig.getSections(
                    cfgPath("/Resources", "Sites", grid, site, "CEs"))
                if not ceList["OK"]:
                    # Skip but log it
                    self.log.error("Site has no CEs! - skipping", site)
                    continue

                for ce in ceList["Value"]:
                    # This CEType is like 'HTCondor' or 'ARC' etc.
                    ceType = gConfig.getValue(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "CEType"))
                    if ceType is None:
                        # Skip but log it
                        self.log.error("CE has no option CEType!",
                                       ce + " at " + site)
                        pilotDict["CEs"][ce] = {"Site": site}
                    else:
                        pilotDict["CEs"][ce] = {
                            "Site": site,
                            "GridCEType": ceType
                        }

                    # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc.
                    # It can be in the queue and/or the CE level
                    localCEType = gConfig.getValue(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "LocalCEType"))
                    if localCEType is not None:
                        pilotDict["CEs"][ce].setdefault(
                            "LocalCEType", localCEType)

                    res = gConfig.getSections(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "Queues"))
                    if not res["OK"]:
                        # Skip but log it
                        self.log.error("No queues found for CE",
                                       ce + ": " + res["Message"])
                        continue
                    queueList = res["Value"]
                    for queue in queueList:
                        localCEType = gConfig.getValue(
                            cfgPath("/Resources", "Sites", grid, site, "CEs",
                                    ce, "Queues", queue, "LocalCEType"))
                        if localCEType is not None:
                            pilotDict["CEs"][ce].setdefault(
                                queue, {"LocalCEType": localCEType})

        defaultSetup = gConfig.getValue("/DIRAC/DefaultSetup")
        if defaultSetup:
            pilotDict["DefaultSetup"] = defaultSetup

        self.log.debug("From DIRAC/Configuration")
        configurationServers = gConfig.getServersList()
        if not includeMasterCS:
            masterCS = gConfigurationData.getMasterServer()
            configurationServers = list(
                set(configurationServers) - set([masterCS]))
        pilotDict["ConfigurationServers"] = configurationServers

        self.log.debug("Got pilotDict", str(pilotDict))

        return S_OK(pilotDict)
Example #14
0
    def _getPilotOptions(self, taskQueueDict, pilotsToSubmit):

        # Need to limit the maximum number of pilots to submit at once
        # For generic pilots this is limited by the number of use of the tokens and the
        # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation:
        pilotsToSubmit = max(min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1)
        pilotOptions = []
        privateIfGenericTQ = self.privatePilotFraction > random.random()
        privateTQ = "PilotTypes" in taskQueueDict and "private" in [t.lower() for t in taskQueueDict["PilotTypes"]]
        forceGeneric = "ForceGeneric" in taskQueueDict
        submitPrivatePilot = (privateIfGenericTQ or privateTQ) and not forceGeneric
        if submitPrivatePilot:
            self.log.verbose("Submitting private pilots for TaskQueue %s" % taskQueueDict["TaskQueueID"])
            ownerDN = taskQueueDict["OwnerDN"]
            ownerGroup = taskQueueDict["OwnerGroup"]
            # User Group requirement
            pilotOptions.append("-G %s" % taskQueueDict["OwnerGroup"])
            # check if group allows jobsharing
            ownerGroupProperties = getPropertiesForGroup(ownerGroup)
            if not "JobSharing" in ownerGroupProperties:
                # Add Owner requirement to pilot
                pilotOptions.append("-O '%s'" % ownerDN)
            if privateTQ:
                pilotOptions.append("-o /Resources/Computing/CEDefaults/PilotType=private")
            maxJobsInFillMode = self.maxJobsInFillMode
        else:
            # For generic jobs we'll submit mixture of generic and private pilots
            self.log.verbose("Submitting generic pilots for TaskQueue %s" % taskQueueDict["TaskQueueID"])
            # ADRI: Find the generic group
            result = findGenericPilotCredentials(group=taskQueueDict["OwnerGroup"])
            if not result["OK"]:
                self.log.error(ERROR_GENERIC_CREDENTIALS, result["Message"])
                return S_ERROR(ERROR_GENERIC_CREDENTIALS)
            ownerDN, ownerGroup = result["Value"]

            result = gProxyManager.requestToken(ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode))
            if not result["OK"]:
                self.log.error(ERROR_TOKEN, result["Message"])
                return S_ERROR(ERROR_TOKEN)
            (token, numberOfUses) = result["Value"]
            pilotsToSubmit = min(numberOfUses, pilotsToSubmit)

            pilotOptions.append("-o /Security/ProxyToken=%s" % token)

            pilotsToSubmit = max(1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1)

            maxJobsInFillMode = int(numberOfUses / pilotsToSubmit)
        # Use Filling mode
        pilotOptions.append("-M %s" % maxJobsInFillMode)

        # Debug
        pilotOptions.append("-d")
        # Setup.
        pilotOptions.append("-S %s" % taskQueueDict["Setup"])
        # CS Servers
        csServers = gConfig.getServersList()
        if len(csServers) > 3:
            # Remove the master
            master = gConfigurationData.getMasterServer()
            if master in csServers:
                csServers.remove(master)
        pilotOptions.append("-C %s" % ",".join(csServers))
        # DIRAC Extensions
        extensionsList = getCSExtensions()
        if extensionsList:
            pilotOptions.append("-e %s" % ",".join(extensionsList))
        # Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure)
        opsHelper = Operations(group=taskQueueDict["OwnerGroup"], setup=taskQueueDict["Setup"])
        # Requested version of DIRAC (it can be a list, so we take the fist one)
        version = opsHelper.getValue(cfgPath("Pilot", "Version"), [self.installVersion])[0]
        pilotOptions.append("-r %s" % version)
        # Requested Project to install
        installProject = opsHelper.getValue(cfgPath("Pilot", "Project"), self.installProject)
        if installProject:
            pilotOptions.append("-l %s" % installProject)
        installation = opsHelper.getValue(cfgPath("Pilot", "Installation"), self.installation)
        if installation:
            pilotOptions.append("-V %s" % installation)
        # Requested CPU time
        pilotOptions.append("-T %s" % taskQueueDict["CPUTime"])

        if self.submitPoolOption not in self.extraPilotOptions:
            pilotOptions.append(self.submitPoolOption)

        if self.extraPilotOptions:
            pilotOptions.extend(self.extraPilotOptions)

        return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ))
Example #15
0
  def _getPilotOptions( self, taskQueueDict, pilotsToSubmit ):

    # Need to limit the maximum number of pilots to submit at once
    # For generic pilots this is limited by the number of use of the tokens and the
    # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation:
    pilotsToSubmit = max( min( pilotsToSubmit, int( 50 / self.maxJobsInFillMode ) ), 1 )
    pilotOptions = []
    privateIfGenericTQ = self.privatePilotFraction > random.random()
    privateTQ = ( 'PilotTypes' in taskQueueDict and 'private' in [ t.lower() for t in taskQueueDict['PilotTypes'] ] )
    forceGeneric = 'ForceGeneric' in taskQueueDict
    submitPrivatePilot = ( privateIfGenericTQ or privateTQ ) and not forceGeneric
    if submitPrivatePilot:
      self.log.verbose( 'Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] )
      ownerDN = taskQueueDict['OwnerDN']
      ownerGroup = taskQueueDict['OwnerGroup']
      # User Group requirement
      pilotOptions.append( '-G %s' % taskQueueDict['OwnerGroup'] )
      # check if group allows jobsharing
      ownerGroupProperties = getPropertiesForGroup( ownerGroup )
      if not 'JobSharing' in ownerGroupProperties:
        # Add Owner requirement to pilot
        pilotOptions.append( "-O '%s'" % ownerDN )
      if privateTQ:
        pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private' )
      maxJobsInFillMode = self.maxJobsInFillMode
    else:
      #For generic jobs we'll submit mixture of generic and private pilots
      self.log.verbose( 'Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] )
      #ADRI: Find the generic group
      result = findGenericPilotCredentials( group = taskQueueDict[ 'OwnerGroup' ] )
      if not result[ 'OK' ]:
        self.log.error( ERROR_GENERIC_CREDENTIALS, result[ 'Message' ] )
        return S_ERROR( ERROR_GENERIC_CREDENTIALS )
      ownerDN, ownerGroup = result[ 'Value' ]

      result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) )
      if not result[ 'OK' ]:
        self.log.error( ERROR_TOKEN, result['Message'] )
        return S_ERROR( ERROR_TOKEN )
      ( token, numberOfUses ) = result[ 'Value' ]
      pilotsToSubmit = min( numberOfUses, pilotsToSubmit )

      pilotOptions.append( '-o /Security/ProxyToken=%s' % token )

      pilotsToSubmit = max( 1, ( pilotsToSubmit - 1 ) / self.maxJobsInFillMode + 1 )

      maxJobsInFillMode = int( numberOfUses / pilotsToSubmit )
    # Use Filling mode
    pilotOptions.append( '-M %s' % maxJobsInFillMode )

    # Debug
    pilotOptions.append( '-d' )
    # Setup.
    pilotOptions.append( '-S %s' % taskQueueDict['Setup'] )
    # CS Servers
    csServers = gConfig.getServersList()
    if len( csServers ) > 3:
      # Remove the master
      master = gConfigurationData.getMasterServer()
      if master in csServers:
        csServers.remove( master )
    pilotOptions.append( '-C %s' % ",".join( csServers ) )
    # DIRAC Extensions to be used in pilots
    # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line
    # line +352
    pilotExtensionsList = Operations().getValue( "Pilot/Extensions", [] )
    extensionsList = []
    if pilotExtensionsList:
      if pilotExtensionsList[0] != 'None':
        extensionsList = pilotExtensionsList
    else:
      extensionsList = getCSExtensions()
    if extensionsList:
      pilotOptions.append( '-e %s' % ",".join( extensionsList ) )

    #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure)
    opsHelper = Operations( group = taskQueueDict['OwnerGroup'], setup = taskQueueDict['Setup'] )
    # Requested version of DIRAC (it can be a list, so we take the fist one)
    version = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ) , [ self.installVersion ] )[0]
    pilotOptions.append( '-r %s' % version )
    # Requested Project to install
    installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ) , self.installProject )
    if installProject:
      pilotOptions.append( '-l %s' % installProject )
    installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation )
    if installation:
      pilotOptions.append( "-V %s" % installation )
    # Requested CPU time
    pilotOptions.append( '-T %s' % taskQueueDict['CPUTime'] )

    if self.submitPoolOption not in self.extraPilotOptions:
      pilotOptions.append( self.submitPoolOption )

    if self.extraPilotOptions:
      pilotOptions.extend( self.extraPilotOptions )

    return S_OK( ( pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) )
Example #16
0
    def _getPilotOptions(self, taskQueueDict, pilotsToSubmit):

        # Need to limit the maximum number of pilots to submit at once
        # For generic pilots this is limited by the number of use of the tokens and the
        # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation:
        pilotsToSubmit = max(
            min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1)
        pilotOptions = []
        privateIfGenericTQ = self.privatePilotFraction > random.random()
        privateTQ = ('PilotTypes' in taskQueueDict and 'private'
                     in [t.lower() for t in taskQueueDict['PilotTypes']])
        forceGeneric = 'ForceGeneric' in taskQueueDict
        submitPrivatePilot = (privateIfGenericTQ
                              or privateTQ) and not forceGeneric
        if submitPrivatePilot:
            self.log.verbose('Submitting private pilots for TaskQueue %s' %
                             taskQueueDict['TaskQueueID'])
            ownerDN = taskQueueDict['OwnerDN']
            ownerGroup = taskQueueDict['OwnerGroup']
            # User Group requirement
            pilotOptions.append('-G %s' % taskQueueDict['OwnerGroup'])
            # check if group allows jobsharing
            ownerGroupProperties = getPropertiesForGroup(ownerGroup)
            if not 'JobSharing' in ownerGroupProperties:
                # Add Owner requirement to pilot
                pilotOptions.append("-O '%s'" % ownerDN)
            if privateTQ:
                pilotOptions.append(
                    '-o /Resources/Computing/CEDefaults/PilotType=private')
            maxJobsInFillMode = self.maxJobsInFillMode
        else:
            #For generic jobs we'll submit mixture of generic and private pilots
            self.log.verbose('Submitting generic pilots for TaskQueue %s' %
                             taskQueueDict['TaskQueueID'])
            #ADRI: Find the generic group
            result = findGenericPilotCredentials(
                group=taskQueueDict['OwnerGroup'])
            if not result['OK']:
                self.log.error(ERROR_GENERIC_CREDENTIALS, result['Message'])
                return S_ERROR(ERROR_GENERIC_CREDENTIALS)
            ownerDN, ownerGroup = result['Value']

            result = gProxyManager.requestToken(
                ownerDN, ownerGroup, max(pilotsToSubmit,
                                         self.maxJobsInFillMode))
            if not result['OK']:
                self.log.error(ERROR_TOKEN, result['Message'])
                return S_ERROR(ERROR_TOKEN)
            (token, numberOfUses) = result['Value']
            pilotsToSubmit = min(numberOfUses, pilotsToSubmit)

            pilotOptions.append('-o /Security/ProxyToken=%s' % token)

            pilotsToSubmit = max(
                1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1)

            maxJobsInFillMode = int(numberOfUses / pilotsToSubmit)
        # Use Filling mode
        pilotOptions.append('-M %s' % maxJobsInFillMode)

        # Debug
        pilotOptions.append('-d')
        # Setup.
        pilotOptions.append('-S %s' % taskQueueDict['Setup'])
        # CS Servers
        csServers = gConfig.getServersList()
        if len(csServers) > 3:
            # Remove the master
            master = gConfigurationData.getMasterServer()
            if master in csServers:
                csServers.remove(master)
        pilotOptions.append('-C %s' % ",".join(csServers))
        # DIRAC Extensions to be used in pilots
        # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line
        # line +352
        pilotExtensionsList = Operations().getValue("Pilot/Extensions", [])
        extensionsList = []
        if pilotExtensionsList:
            if pilotExtensionsList[0] != 'None':
                extensionsList = pilotExtensionsList
        else:
            extensionsList = getCSExtensions()
        if extensionsList:
            pilotOptions.append('-e %s' % ",".join(extensionsList))

        #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure)
        opsHelper = Operations(group=taskQueueDict['OwnerGroup'],
                               setup=taskQueueDict['Setup'])
        # Requested version of DIRAC (it can be a list, so we take the fist one)
        version = opsHelper.getValue(cfgPath('Pilot', 'Version'),
                                     [self.installVersion])[0]
        pilotOptions.append('-r %s' % version)
        # Requested Project to install
        installProject = opsHelper.getValue(cfgPath('Pilot', 'Project'),
                                            self.installProject)
        if installProject:
            pilotOptions.append('-l %s' % installProject)
        installation = opsHelper.getValue(cfgPath('Pilot', 'Installation'),
                                          self.installation)
        if installation:
            pilotOptions.append("-V %s" % installation)
        # Requested CPU time
        pilotOptions.append('-T %s' % taskQueueDict['CPUTime'])

        if self.submitPoolOption not in self.extraPilotOptions:
            pilotOptions.append(self.submitPoolOption)

        if self.extraPilotOptions:
            pilotOptions.extend(self.extraPilotOptions)

        return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup,
                     submitPrivatePilot, privateTQ))
Example #17
0
    def getCSDict(self, includeMasterCS=True):
        """ Gets minimal info for running a pilot, from the CS

    :returns: pilotDict (containing pilots run info)
    :rtype: S_OK, S_ERROR, value is pilotDict
    """

        pilotDict = {
            'timestamp': datetime.datetime.utcnow().isoformat(),
            'Setups': {},
            'CEs': {},
            'GenericPilotDNs': []
        }

        self.log.info('-- Getting the content of the CS --')

        # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations
        setupsRes = gConfig.getSections('/Operations/')
        if not setupsRes['OK']:
            self.log.error("Can't get sections from Operations",
                           setupsRes['Message'])
            return setupsRes
        setupsInOperations = setupsRes['Value']

        # getting the setup(s) in this CS, and comparing with what we found in Operations
        setupsInDIRACRes = gConfig.getSections('DIRAC/Setups')
        if not setupsInDIRACRes['OK']:
            self.log.error("Can't get sections from DIRAC/Setups",
                           setupsInDIRACRes['Message'])
            return setupsInDIRACRes
        setupsInDIRAC = setupsInDIRACRes['Value']

        # Handling the case of multi-VO CS
        if not set(setupsInDIRAC).intersection(set(setupsInOperations)):
            vos = list(setupsInOperations)
            for vo in vos:
                setupsFromVOs = gConfig.getSections('/Operations/%s' % vo)
                if not setupsFromVOs['OK']:
                    continue
                else:
                    setupsInOperations = setupsFromVOs['Value']

        self.log.verbose('From Operations/[Setup]/Pilot')

        for setup in setupsInOperations:
            self._getPilotOptionsPerSetup(setup, pilotDict)

        self.log.verbose('From Resources/Sites')
        sitesSection = gConfig.getSections('/Resources/Sites/')
        if not sitesSection['OK']:
            self.log.error("Can't get sections from Resources",
                           sitesSection['Message'])
            return sitesSection

        for grid in sitesSection['Value']:
            gridSection = gConfig.getSections('/Resources/Sites/' + grid)
            if not gridSection['OK']:
                self.log.error("Can't get sections from Resources",
                               gridSection['Message'])
                return gridSection

            for site in gridSection['Value']:
                ceList = gConfig.getSections('/Resources/Sites/' + grid + '/' +
                                             site + '/CEs/')
                if not ceList['OK']:
                    # Skip but log it
                    self.log.error('Site has no CEs! - skipping', site)
                    continue

                for ce in ceList['Value']:
                    # This CEType is like 'HTCondor' or 'ARC' etc.
                    ceType = gConfig.getValue('/Resources/Sites/' + grid +
                                              '/' + site + '/CEs/' + ce +
                                              '/CEType')
                    if ceType is None:
                        # Skip but log it
                        self.log.error('CE has no option CEType!',
                                       ce + ' at ' + site)
                        pilotDict['CEs'][ce] = {'Site': site}
                    else:
                        pilotDict['CEs'][ce] = {
                            'Site': site,
                            'GridCEType': ceType
                        }

                    # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc.
                    # It can be in the queue and/or the CE level
                    localCEType = gConfig.getValue('/Resources/Sites/' + grid +
                                                   '/' + site + '/CEs/' + ce +
                                                   '/LocalCEType')
                    if localCEType is not None:
                        pilotDict['CEs'][ce].setdefault(
                            'LocalCEType', localCEType)

                    res = gConfig.getSections('/Resources/Sites/' + grid +
                                              '/' + site + '/CEs/' + ce +
                                              '/Queues/')
                    if not res['OK']:
                        # Skip but log it
                        self.log.error("No queues found for CE",
                                       ce + ': ' + res['Message'])
                        continue
                    queueList = res['Value']
                    for queue in queueList:
                        localCEType = gConfig.getValue('/Resources/Sites/' +
                                                       grid + '/' + site +
                                                       '/CEs/' + ce +
                                                       '/Queues/' + queue +
                                                       '/LocalCEType')
                        if localCEType is not None:
                            pilotDict['CEs'][ce].setdefault(
                                queue, {'LocalCEType': localCEType})

        defaultSetup = gConfig.getValue('/DIRAC/DefaultSetup')
        if defaultSetup:
            pilotDict['DefaultSetup'] = defaultSetup

        self.log.debug('From DIRAC/Configuration')
        configurationServers = gConfig.getServersList()
        if not includeMasterCS:
            masterCS = gConfigurationData.getMasterServer()
            configurationServers = list(
                set(configurationServers) - set([masterCS]))
        pilotDict['ConfigurationServers'] = configurationServers

        self.log.debug("Got pilotDict", str(pilotDict))

        return S_OK(pilotDict)