コード例 #1
0
    def __init__(self):
        """ Standard constructor
    """
        self.log = gLogger.getSubLogger('TimeLeft')
        # This is the ratio SpecInt published by the site over 250 (the reference used for Matching)
        self.scaleFactor = gConfig.getValue('/LocalSite/CPUScalingFactor', 0.0)
        if not self.scaleFactor:
            self.log.warn(
                '/LocalSite/CPUScalingFactor not defined for site %s' %
                DIRAC.siteName())

        self.normFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                           0.0)
        if not self.normFactor:
            self.log.warn(
                '/LocalSite/CPUNormalizationFactor not defined for site %s' %
                DIRAC.siteName())

        # CPU and wall clock margins, which don't seem to be set anywhere
        self.cpuMargin = gConfig.getValue('/LocalSite/CPUMargin', 2)  # percent
        self.wallClockMargin = gConfig.getValue('/LocalSite/wallClockMargin',
                                                8)  # percent

        result = self.__getBatchSystemPlugin()
        if result['OK']:
            self.batchPlugin = result['Value']
        else:
            self.batchPlugin = None
            self.batchError = result['Message']
コード例 #2
0
ファイル: TimeLeft.py プロジェクト: bmb/DIRAC
  def getTimeLeft( self, cpuConsumed ):
    """Returns the CPU Time Left for supported batch systems.  The CPUConsumed
       is the current raw total CPU.
    """
    #Quit if no scale factor available
    if not self.scaleFactor:
      return S_ERROR( '/LocalSite/CPUScalingFactor not defined for site %s' % DIRAC.siteName() )

    if not self.batchPlugin:
      return S_ERROR( self.batchError )

    resourceDict = self.batchPlugin.getResourceUsage()
    if not resourceDict['OK']:
      self.log.warn( 'Could not determine timeleft for batch system at site %s' % DIRAC.siteName() )
      return resourceDict

    resources = resourceDict['Value']
    self.log.verbose( resources )
    if not resources['CPULimit'] or not resources['WallClockLimit']:
      return S_ERROR( 'No CPU / WallClock limits obtained' )

    cpuFactor = 100 * float( resources['CPU'] ) / float( resources['CPULimit'] )
    cpuRemaining = 100 - cpuFactor
    cpuLimit = float( resources['CPULimit'] )
    wcFactor = 100 * float( resources['WallClock'] ) / float( resources['WallClockLimit'] )
    wcRemaining = 100 - wcFactor
    wcLimit = float( resources['WallClockLimit'] )
    self.log.verbose( 'Used CPU is %.02f, Used WallClock is %.02f.' % ( cpuFactor, wcFactor ) )
    self.log.verbose( 'Remaining WallClock %.02f, Remaining CPU %.02f, margin %s' %
                      ( wcRemaining, cpuRemaining, self.cpuMargin ) )

    timeLeft = None
    if wcRemaining > cpuRemaining and ( wcRemaining - cpuRemaining ) > self.cpuMargin:
      # In some cases cpuFactor might be 0
      # timeLeft = float(cpuConsumed*self.scaleFactor*cpuRemaining/cpuFactor)
      # We need time left in the same units used by the Matching
      timeLeft = float( cpuRemaining * cpuLimit / 100 * self.scaleFactor )
      self.log.verbose( 'Remaining WallClock %.02f > Remaining CPU %.02f and difference > margin %s' %
                        ( wcRemaining, cpuRemaining, self.cpuMargin ) )
    else:
      if cpuRemaining > self.cpuMargin and wcRemaining > self.cpuMargin:
        self.log.verbose( 'Remaining WallClock %.02f and Remaining CPU %.02f both > margin %s' %
                          ( wcRemaining, cpuRemaining, self.cpuMargin ) )
        # In some cases cpuFactor might be 0
        # timeLeft = float(cpuConsumed*self.scaleFactor*(wcRemaining-self.cpuMargin)/cpuFactor)
        timeLeft = float( cpuRemaining * cpuLimit / 100 * self.scaleFactor )
      else:
        self.log.verbose( 'Remaining CPU %.02f < margin %s and WallClock %.02f < margin %s so no time left' %
                          ( cpuRemaining, self.cpuMargin, wcRemaining, self.cpuMargin ) )

    if timeLeft:
      self.log.verbose( 'Remaining CPU in normalized units is: %.02f' % timeLeft )
      return S_OK( timeLeft )
    else:
      return S_ERROR( 'No time left for slot' )
コード例 #3
0
  def getTimeLeft( self, cpuConsumed ):
    """Returns the CPU Time Left for supported batch systems.  The CPUConsumed
       is the current raw total CPU.
    """
    #Quit if no scale factor available
    if not self.scaleFactor:
      return S_ERROR( '/LocalSite/CPUScalingFactor not defined for site %s' % DIRAC.siteName() )

    if not self.batchPlugin:
      return S_ERROR( self.batchError )

    resourceDict = self.batchPlugin.getResourceUsage()
    if not resourceDict['OK']:
      self.log.warn( 'Could not determine timeleft for batch system at site %s' % DIRAC.siteName() )
      return resourceDict

    resources = resourceDict['Value']
    self.log.verbose( resources )
    if not resources['CPULimit'] or not resources['WallClockLimit']:
      return S_ERROR( 'No CPU / WallClock limits obtained' )

    cpuFactor = 100 * float( resources['CPU'] ) / float( resources['CPULimit'] )
    cpuRemaining = 100 - cpuFactor
    cpuLimit = float( resources['CPULimit'] )
    wcFactor = 100 * float( resources['WallClock'] ) / float( resources['WallClockLimit'] )
    wcRemaining = 100 - wcFactor
    wcLimit = float( resources['WallClockLimit'] )
    self.log.verbose( 'Used CPU is %.02f, Used WallClock is %.02f.' % ( cpuFactor, wcFactor ) )
    self.log.verbose( 'Remaining WallClock %.02f, Remaining CPU %.02f, margin %s' %
                      ( wcRemaining, cpuRemaining, self.cpuMargin ) )

    timeLeft = None
    if wcRemaining > cpuRemaining and ( wcRemaining - cpuRemaining ) > self.cpuMargin:
      # In some cases cpuFactor might be 0
      # timeLeft = float(cpuConsumed*self.scaleFactor*cpuRemaining/cpuFactor)
      # We need time left in the same units used by the Matching
      timeLeft = float( cpuRemaining * cpuLimit / 100 * self.scaleFactor )
      self.log.verbose( 'Remaining WallClock %.02f > Remaining CPU %.02f and difference > margin %s' %
                        ( wcRemaining, cpuRemaining, self.cpuMargin ) )
    else:
      if cpuRemaining > self.cpuMargin and wcRemaining > self.cpuMargin:
        self.log.verbose( 'Remaining WallClock %.02f and Remaining CPU %.02f both > margin %s' %
                          ( wcRemaining, cpuRemaining, self.cpuMargin ) )
        # In some cases cpuFactor might be 0
        # timeLeft = float(cpuConsumed*self.scaleFactor*(wcRemaining-self.cpuMargin)/cpuFactor)
        timeLeft = float( cpuRemaining * cpuLimit / 100 * self.scaleFactor )
      else:
        self.log.verbose( 'Remaining CPU %.02f < margin %s and WallClock %.02f < margin %s so no time left' %
                          ( cpuRemaining, self.cpuMargin, wcRemaining, self.cpuMargin ) )

    if timeLeft:
      self.log.verbose( 'Remaining CPU in normalized units is: %.02f' % timeLeft )
      return S_OK( timeLeft )
    else:
      return S_ERROR( 'No time left for slot' )
コード例 #4
0
    def __init__(self):
        BaseAccountingType.__init__(self)
        self.definitionKeyFields = [('User', 'VARCHAR(32)'),
                                    ('UserGroup', 'VARCHAR(32)'),
                                    ('JobGroup', "VARCHAR(64)"),
                                    ('JobType', 'VARCHAR(32)'),
                                    ('JobClass', 'VARCHAR(32)'),
                                    ('ProcessingType', 'VARCHAR(256)'),
                                    ('Site', 'VARCHAR(32)'),
                                    ('FinalMajorStatus', 'VARCHAR(32)'),
                                    ('FinalMinorStatus', 'VARCHAR(256)')]
        self.definitionAccountingFields = [
            ('CPUTime', "INT UNSIGNED"), ('NormCPUTime', "INT UNSIGNED"),
            ('ExecTime', "INT UNSIGNED"), ('InputDataSize', 'BIGINT UNSIGNED'),
            ('OutputDataSize', 'BIGINT UNSIGNED'),
            ('InputDataFiles', 'INT UNSIGNED'),
            ('OutputDataFiles', 'INT UNSIGNED'),
            ('DiskSpace', 'BIGINT UNSIGNED'),
            ('InputSandBoxSize', 'BIGINT UNSIGNED'),
            ('OutputSandBoxSize', 'BIGINT UNSIGNED'),
            ('ProcessedEvents', 'INT UNSIGNED')
        ]
        self.bucketsLength = [
            (86400 * 8, 3600),  # <1w+1d = 1h
            (86400 * 35, 3600 * 4),  # <35d = 4h
            (86400 * 30 * 6, 86400),  # <6m = 1d
            (86400 * 365, 86400 * 2),  # <1y = 2d
            (86400 * 600, 604800),  # >1y = 1w
        ]

        self.checkType()
        # Fill the site
        self.setValueByKey("Site", DIRAC.siteName())
コード例 #5
0
  def __findServiceURL( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]:
      dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() )
      if dRetVal[ 'OK' ]:
        rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0]
        gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] )

    for protocol in gProtocolDict.keys():
      if self._destinationSrv.find( "%s://" % protocol ) == 0:
        gLogger.debug( "Already given a valid url", self._destinationSrv )
        if not gatewayURL:
          return S_OK( self._destinationSrv )
        gLogger.debug( "Reconstructing given URL to pass through gateway" )
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception, e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) )
コード例 #6
0
ファイル: OverlayInput.py プロジェクト: hamzazafar/ILCDIRAC
 def __init__(self):
   super(OverlayInput, self).__init__()
   self.enable = True
   self.STEP_NUMBER = ''
   self.log = gLogger.getSubLogger( "OverlayInput" )
   self.applicationName = 'OverlayInput'
   self.curdir = os.getcwd()
   self.applicationLog = ''
   self.printoutflag = ''
   self.prodid = 0
   self.detector = '' ##needed for backward compatibility
   self.detectormodel = ""
   self.energytouse = ''
   self.energy = 0
   self.nbofeventsperfile = 100
   self.lfns = []
   self.nbfilestoget = 0
   self.BkgEvtType = 'gghad'
   self.metaEventType = self.BkgEvtType
   self.BXOverlay = 0
   self.ggtohadint = 3.2
   self.nbsigeventsperfile = 0
   self.nbinputsigfile = 1
   self.NbSigEvtsPerJob = 0
   self.datMan = DataManager()
   self.fcc = FileCatalogClient()
   self.site = DIRAC.siteName()
   self.useEnergyForFileLookup = True
   self.machine = 'clic_cdr'
   self.pathToOverlayFiles = ''
   self.processorName = ''
コード例 #7
0
  def _getSEList( self, SEType = 'ProductionOutputs', DataType = 'SimtelProd' ):
    """ get from CS the list of available SE for data upload
    """
    opsHelper = Operations()
    optionName = os.path.join( SEType, DataType )
    SEList = opsHelper.getValue( optionName , [] )
    SEList = List.randomize( SEList )
    DIRAC.gLogger.notice( 'List of %s SE: %s ' % ( SEType, SEList ) )

    # # Check if the local SE is in the list. If yes try it first by reversing list order
    localSEList = []
    res = getSEsForSite( DIRAC.siteName() )
    if res['OK']:
      localSEList = res['Value']

    retainedlocalSEList = []
    for localSE in localSEList:
      if localSE in SEList:
        DIRAC.gLogger.notice( 'The local Storage Element is an available SE: ', localSE )
        retainedlocalSEList.append( localSE )
        SEList.remove( localSE )

    SEList = retainedlocalSEList + SEList
    if len( SEList ) == 0:
      return DIRAC.S_ERROR( 'Error in building SEList' )

    return DIRAC.S_OK( SEList )
コード例 #8
0
ファイル: dirac-jobexec.py プロジェクト: DIRACGrid/DIRAC
def jobexec(jobxml, wfParameters):
  jobfile = os.path.abspath(jobxml)
  if not os.path.exists(jobfile):
    gLogger.warn('Path to specified workflow %s does not exist' % (jobfile))
    sys.exit(1)
  workflow = fromXMLFile(jobfile)
  gLogger.debug(workflow)
  code = workflow.createCode()
  gLogger.debug(code)
  jobID = 0
  if 'JOBID' in os.environ:
    jobID = os.environ['JOBID']
    gLogger.info('DIRAC JobID %s is running at site %s' % (jobID, DIRAC.siteName()))

  workflow.addTool('JobReport', JobReport(jobID))
  workflow.addTool('AccountingReport', DataStoreClient())
  workflow.addTool('Request', Request())

  # Propagate the command line parameters to the workflow if any
  for pName, pValue in wfParameters.items():
    workflow.setValue(pName, pValue)

  # Propagate the command line parameters to the workflow module instances of each step
  for stepdefinition in workflow.step_definitions.itervalues():
    for moduleInstance in stepdefinition.module_instances:
      for pName, pValue in wfParameters.iteritems():
        if moduleInstance.parameters.find(pName):
          moduleInstance.parameters.setValue(pName, pValue)

  return workflow.execute()
コード例 #9
0
ファイル: dirac_jobexec.py プロジェクト: pmusset/DIRAC
    def jobexec(jobxml, wfParameters):
        jobfile = os.path.abspath(jobxml)
        if not os.path.exists(jobfile):
            gLogger.warn('Path to specified workflow %s does not exist' %
                         (jobfile))
            sys.exit(1)
        workflow = fromXMLFile(jobfile)
        gLogger.debug(workflow)
        code = workflow.createCode()
        gLogger.debug(code)
        jobID = 0
        if 'JOBID' in os.environ:
            jobID = os.environ['JOBID']
            gLogger.info('DIRAC JobID %s is running at site %s' %
                         (jobID, DIRAC.siteName()))

        workflow.addTool('JobReport', JobReport(jobID))
        workflow.addTool('AccountingReport', DataStoreClient())
        workflow.addTool('Request', Request())

        # Propagate the command line parameters to the workflow if any
        for pName, pValue in wfParameters.items():
            workflow.setValue(pName, pValue)

        # Propagate the command line parameters to the workflow module instances of each step
        for stepdefinition in workflow.step_definitions.values():
            for moduleInstance in stepdefinition.module_instances:
                for pName, pValue in wfParameters.items():
                    if moduleInstance.parameters.find(pName):
                        moduleInstance.parameters.setValue(pName, pValue)

        return workflow.execute()
コード例 #10
0
def jobexec(jobxml, wfParameters={}):
    jobfile = os.path.abspath(jobxml)
    if not os.path.exists(jobfile):
        gLogger.warn('Path to specified workflow %s does not exist' %
                     (jobfile))
        sys.exit(1)
    workflow = fromXMLFile(jobfile)
    gLogger.debug(workflow)
    code = workflow.createCode()
    gLogger.debug(code)
    jobID = 0
    if os.environ.has_key('JOBID'):
        jobID = os.environ['JOBID']
        gLogger.info('DIRAC JobID %s is running at site %s' %
                     (jobID, DIRAC.siteName()))

    workflow.addTool('JobReport', JobReport(jobID))
    workflow.addTool('AccountingReport', DataStoreClient())
    workflow.addTool('Request', RequestContainer())

    # Propagate the command line parameters to the workflow if any
    for name, value in wfParameters.items():
        workflow.setValue(name, value)

    result = workflow.execute()
    return result
コード例 #11
0
ファイル: DataOperation.py プロジェクト: vfalbor/DIRAC
 def __init__(self):
     BaseAccountingType.__init__(self)
     self.definitionKeyFields = [('OperationType', "VARCHAR(32)"),
                                 ('User', "VARCHAR(32)"),
                                 ('ExecutionSite', 'VARCHAR(32)'),
                                 ('Source', 'VARCHAR(32)'),
                                 ('Destination', 'VARCHAR(32)'),
                                 ('Protocol', 'VARCHAR(32)'),
                                 ('FinalStatus', 'VARCHAR(32)')]
     self.definitionAccountingFields = [('TransferSize', 'BIGINT UNSIGNED'),
                                        ('TransferTime', 'FLOAT'),
                                        ('RegistrationTime', 'FLOAT'),
                                        ('TransferOK', 'INT UNSIGNED'),
                                        ('TransferTotal', 'INT UNSIGNED'),
                                        ('RegistrationOK', 'INT UNSIGNED'),
                                        ('RegistrationTotal',
                                         'INT UNSIGNED')]
     self.bucketsLength = [
         (172800, 900),  #<2d = 15m
         (604800, 3600),  #<1w = 1h
         (15552000, 86400),  #>1w <6m = 1d
         (31104000, 604800),  #>6m = 1w
     ]
     self.checkType()
     self.setValueByKey('ExecutionSite', DIRAC.siteName())
コード例 #12
0
  def __getBatchSystemPlugin( self ):
    """Using the name of the batch system plugin, will return an instance
       of the plugin class.
    """
    batchSystems = {'LSF':'LSB_JOBID', 'PBS':'PBS_JOBID', 'BQS':'QSUB_REQNAME'} #more to be added later
    name = None
    for batchSystem, envVar in batchSystems.items():
      if os.environ.has_key( envVar ):
        name = batchSystem
        break

    if name == None:
      self.log.warn( 'Batch system type for site %s is not currently supported' % DIRAC.siteName() )
      return S_ERROR( 'Current batch system is not supported' )

    self.log.debug( 'Creating plugin for %s batch system' % ( name ) )
    try:
      batchSystemName = "%sTimeLeft" % ( name )
      batchPlugin = __import__( 'DIRAC.Core.Utilities.TimeLeft.%s' %
                                batchSystemName, globals(), locals(), [batchSystemName] )
    except Exception, x:
      msg = 'Could not import DIRAC.Core.Utilities.TimeLeft.%s' % ( batchSystemName )
      self.log.warn( x )
      self.log.warn( msg )
      return S_ERROR( msg )
コード例 #13
0
def getLocationOrderedCatalogs( siteName = '' ):
  # First get a list of the active catalogs and their location
  res = getActiveCatalogs()
  if not res['OK']:
    gLogger.error( "Failed to get list of active catalogs", res['Message'] )
    return res
  catalogDict = res['Value']
  # Get the tier1 associated to the current location
  if not siteName:
    import DIRAC
    siteName = DIRAC.siteName()
  countryCode = siteName.split( '.' )[-1]
  res = getCountryMappingTier1( countryCode )
  if not res['OK']:
    gLogger.error( "Failed to resolve closest Tier1", res['Message'] )
    return res
  tier1 = res['Value']
  # Create a sorted list of the active readonly catalogs
  catalogList = []
  if catalogDict.has_key( tier1 ):
    catalogList.append( catalogDict[tier1] )
    catalogDict.pop( tier1 )
  for catalogURL in randomize( catalogDict.values() ):
    catalogList.append( catalogURL )
  return S_OK( catalogList )
コード例 #14
0
ファイル: ResolveCatalog.py プロジェクト: zhangxiaomei/DIRAC
def getLocationOrderedCatalogs(siteName=''):
    # First get a list of the active catalogs and their location
    res = getActiveCatalogs()
    if not res['OK']:
        gLogger.error("Failed to get list of active catalogs", res['Message'])
        return res
    catalogDict = res['Value']
    # Get the tier1 associated to the current location
    if not siteName:
        import DIRAC
        siteName = DIRAC.siteName()
    countryCode = siteName.split('.')[-1]
    res = getCountryMappingTier1(countryCode)
    if not res['OK']:
        gLogger.error("Failed to resolve closest Tier1", res['Message'])
        return res
    tier1 = res['Value']
    # Create a sorted list of the active readonly catalogs
    catalogList = []
    if catalogDict.has_key(tier1):
        catalogList.append(catalogDict[tier1])
        catalogDict.pop(tier1)
    for catalogURL in randomize(catalogDict.values()):
        catalogList.append(catalogURL)
    return S_OK(catalogList)
コード例 #15
0
ファイル: DataOperation.py プロジェクト: sbel/bes3-jinr
 def __init__( self ):
   BaseAccountingType.__init__( self )
   self.definitionKeyFields = [ ( 'OperationType' , "VARCHAR(32)" ),
                                ( 'User', "VARCHAR(32)" ),
                                ( 'ExecutionSite', 'VARCHAR(32)' ),
                                ( 'Source', 'VARCHAR(32)' ),
                                ( 'Destination', 'VARCHAR(32)' ),
                                ( 'Protocol', 'VARCHAR(32)' ),
                                ( 'FinalStatus', 'VARCHAR(32)' )
                              ]
   self.definitionAccountingFields = [ ( 'TransferSize', 'BIGINT UNSIGNED' ),
                                       ( 'TransferTime', 'FLOAT' ),
                                       ( 'RegistrationTime', 'FLOAT' ),
                                       ( 'TransferOK', 'INT UNSIGNED' ),
                                       ( 'TransferTotal', 'INT UNSIGNED' ),
                                       ( 'RegistrationOK', 'INT UNSIGNED' ),
                                       ( 'RegistrationTotal', 'INT UNSIGNED' )
                                     ]
   self.bucketsLength = [ ( 86400 * 3, 900 ), #<3d = 15m
                          ( 86400 * 8, 3600 ), #<1w+1d = 1h
                          ( 15552000, 86400 ), #>1w+1d <6m = 1d
                          ( 31104000, 604800 ), #>6m = 1w
                        ]
   self.checkType()
   self.setValueByKey( 'ExecutionSite', DIRAC.siteName() )
コード例 #16
0
    def initialize(self, systemName, cfgPath):
        if self.__initialized:
            return
        self.__initialized = True

        from DIRAC.ConfigurationSystem.Client.Config import gConfig
        from os import getpid

        # self.__printDebug( "The configuration path is %s" % cfgPath )
        # Get the options for the different output backends
        retDict = gConfig.getOptionsDict("%s/BackendsOptions" % cfgPath)

        # self.__printDebug( retDict )
        if not retDict['OK']:
            cfgBackOptsDict = {
                'FileName': 'Dirac-log_%s.log' % getpid(),
                'Interactive': True,
                'SleepTime': 150
            }
        else:
            cfgBackOptsDict = retDict['Value']

        self.__backendOptions.update(cfgBackOptsDict)

        if 'FileName' not in self.__backendOptions:
            self.__backendOptions['FileName'] = 'Dirac-log_%s.log' % getpid()

        sleepTime = 150
        try:
            sleepTime = int(self.__backendOptions['SleepTime'])
        except:
            pass
        self.__backendOptions['SleepTime'] = sleepTime

        self.__backendOptions['Interactive'] = gConfig.getValue(
            "%s/BackendsOptions/Interactive" % cfgPath, True)

        self.__backendOptions['Site'] = DIRAC.siteName()

        self.__backendOptions['Color'] = gConfig.getValue(
            "%s/LogColor" % cfgPath, False)

        # Configure outputs
        desiredBackends = gConfig.getValue("%s/LogBackends" % cfgPath,
                                           'stdout')
        self.registerBackends(List.fromChar(desiredBackends))
        # Configure verbosity
        defaultLevel = Logger.defaultLogLevel
        if "Scripts" in cfgPath:
            defaultLevel = gConfig.getValue('/Systems/Scripts/LogLevel',
                                            Logger.defaultLogLevel)
        self.setLevel(gConfig.getValue("%s/LogLevel" % cfgPath, defaultLevel))
        # Configure framing
        self._showCallingFrame = gConfig.getValue("%s/LogShowLine" % cfgPath,
                                                  self._showCallingFrame)
        # Get system name
        self._systemName = str(systemName)

        if not self.__backendOptions['Interactive']:
            ExitCallback.registerExitCallback(self.flushAllMessages)
コード例 #17
0
ファイル: TimeLeft.py プロジェクト: bmb/DIRAC
  def __getBatchSystemPlugin( self ):
    """Using the name of the batch system plugin, will return an instance
       of the plugin class.
    """
    batchSystems = {'LSF':'LSB_JOBID', 'PBS':'PBS_JOBID', 'BQS':'QSUB_REQNAME', 'SGE':'SGE_TASK_ID'} #more to be added later
    name = None
    for batchSystem, envVar in batchSystems.items():
      if os.environ.has_key( envVar ):
        name = batchSystem
        break

    if name == None:
      self.log.warn( 'Batch system type for site %s is not currently supported' % DIRAC.siteName() )
      return S_ERROR( 'Current batch system is not supported' )

    self.log.debug( 'Creating plugin for %s batch system' % ( name ) )
    try:
      batchSystemName = "%sTimeLeft" % ( name )
      batchPlugin = __import__( 'DIRAC.Core.Utilities.TimeLeft.%s' %
                                batchSystemName, globals(), locals(), [batchSystemName] )
    except Exception, x:
      msg = 'Could not import DIRAC.Core.Utilities.TimeLeft.%s' % ( batchSystemName )
      self.log.warn( x )
      self.log.warn( msg )
      return S_ERROR( msg )
コード例 #18
0
ファイル: OverlayInput.py プロジェクト: LCDgit/ILCDIRAC
  def __init__(self):
    super(OverlayInput, self).__init__()
    self.enable = True
    self.STEP_NUMBER = ''
    self.log = gLogger.getSubLogger( "OverlayInput" )
    self.applicationName = 'OverlayInput'
    self.curdir = os.getcwd()
    self.applicationLog = ''
    self.printoutflag = ''
    self.prodid = 0
    self.detector = '' ##needed for backward compatibility
    self.detectormodel = ""
    self.energytouse = ''
    self.energy = 0
    self.nbofeventsperfile = 100
    self.lfns = []
    self.nbfilestoget = 0
    self.BkgEvtType = 'gghad'
    self.BXOverlay = 0
    self.ggtohadint = 3.2
    self.nbsigeventsperfile = 0
    self.nbinputsigfile = 1
    self.NbSigEvtsPerJob = 0
    self.rm = ReplicaManager()
    self.fc = FileCatalogClient()
    self.site = DIRAC.siteName()

    self.machine = 'clic_cdr'
コード例 #19
0
ファイル: ModuleBase.py プロジェクト: sposs/Documents
  def setReplicaProblematic(self,lfn,se,pfn='',reason='Access failure'):
    """ Set replica status to Problematic in the File Catalog
    @param lfn: lfn of the problematic file
    @param se: storage element
    @param pfn: physical file name
    @param reason: as name suggests...
    @return: S_OK()
    """

    rm = ReplicaManager()
    source = "Job %d at %s" % (self.jobID,DIRAC.siteName())
    result = rm.setReplicaProblematic((lfn,pfn,se,reason),source)
    if not result['OK'] or result['Value']['Failed']:
      # We have failed the report, let's attempt the Integrity DB faiover
      integrityDB = RPCClient('DataManagement/DataIntegrity',timeout=120)
      fileMetadata = {'Prognosis':reason,'LFN':lfn,'PFN':pfn,'StorageElement':se}
      result = integrityDB.insertProblematic(source,fileMetadata)
      if not result['OK']:
        # Add it to the request
        if self.workflow_commons.has_key('Request'):
          request  = self.workflow_commons['Request']
          subrequest = DISETSubRequest(result['rpcStub']).getDictionary()
          request.addSubRequest(subrequest,'integrity')

    return S_OK()
コード例 #20
0
ファイル: Job.py プロジェクト: DIRACGrid/DIRAC
  def __init__(self):
    BaseAccountingType.__init__(self)
    self.definitionKeyFields = [('User', 'VARCHAR(32)'),
                                ('UserGroup', 'VARCHAR(32)'),
                                ('JobGroup', "VARCHAR(64)"),
                                ('JobType', 'VARCHAR(32)'),
                                ('JobClass', 'VARCHAR(32)'),
                                ('ProcessingType', 'VARCHAR(256)'),
                                ('Site', 'VARCHAR(32)'),
                                ('FinalMajorStatus', 'VARCHAR(32)'),
                                ('FinalMinorStatus', 'VARCHAR(256)')
                                ]
    self.definitionAccountingFields = [('CPUTime', "INT UNSIGNED"),
                                       ('NormCPUTime', "INT UNSIGNED"),
                                       ('ExecTime', "INT UNSIGNED"),
                                       ('InputDataSize', 'BIGINT UNSIGNED'),
                                       ('OutputDataSize', 'BIGINT UNSIGNED'),
                                       ('InputDataFiles', 'INT UNSIGNED'),
                                       ('OutputDataFiles', 'INT UNSIGNED'),
                                       ('DiskSpace', 'BIGINT UNSIGNED'),
                                       ('InputSandBoxSize', 'BIGINT UNSIGNED'),
                                       ('OutputSandBoxSize', 'BIGINT UNSIGNED'),
                                       ('ProcessedEvents', 'INT UNSIGNED')
                                       ]
    self.bucketsLength = [(86400 * 8, 3600),  # <1w+1d = 1h
                          (86400 * 35, 3600 * 4),  # <35d = 4h
                          (86400 * 30 * 6, 86400),  # <6m = 1d
                          (86400 * 365, 86400 * 2),  # <1y = 2d
                          (86400 * 600, 604800),  # >1y = 1w
                          ]

    self.checkType()
    # Fill the site
    self.setValueByKey("Site", DIRAC.siteName())
コード例 #21
0
 def __init__(self):
     super(DataOperation, self).__init__()
     self.definitionKeyFields = [
         ("OperationType", "VARCHAR(32)"),
         ("User", "VARCHAR(64)"),
         ("ExecutionSite", "VARCHAR(256)"),
         ("Source", "VARCHAR(32)"),
         ("Destination", "VARCHAR(32)"),
         ("Protocol", "VARCHAR(32)"),
         ("FinalStatus", "VARCHAR(32)"),
     ]
     self.definitionAccountingFields = [
         ("TransferSize", "BIGINT UNSIGNED"),
         ("TransferTime", "FLOAT"),
         ("RegistrationTime", "FLOAT"),
         ("TransferOK", "INT UNSIGNED"),
         ("TransferTotal", "INT UNSIGNED"),
         ("RegistrationOK", "INT UNSIGNED"),
         ("RegistrationTotal", "INT UNSIGNED"),
     ]
     self.bucketsLength = [
         (86400 * 3, 900),  # <3d = 15m
         (86400 * 8, 3600),  # <1w+1d = 1h
         (15552000, 86400),  # >1w+1d <6m = 1d
         (31104000, 604800),  # >6m = 1w
     ]
     self.checkType()
     self.setValueByKey("ExecutionSite", DIRAC.siteName())
コード例 #22
0
ファイル: InputDataResolution.py プロジェクト: sparsh35/DIRAC
    def __resolveInputData(self):
        """This method controls the execution of the DIRAC input data modules according
       to the VO policy defined in the configuration service.
    """
        site = self.arguments['Configuration'].get('SiteName',
                                                   DIRAC.siteName())

        self.arguments.setdefault('Job', {})

        policy = self.arguments['Job'].get('InputDataPolicy', [])
        if policy:
            # In principle this can be a list of modules with the first taking precedence
            if isinstance(policy, six.string_types):
                policy = [policy]
            self.log.info('Job has a specific policy setting: %s' %
                          (', '.join(policy)))
        else:
            self.log.debug(
                'Attempting to resolve input data policy for site %s' % site)
            inputDataPolicy = Operations().getOptionsDict('InputDataPolicy')
            if not inputDataPolicy['OK']:
                return S_ERROR(
                    'Could not resolve InputDataPolicy from Operations InputDataPolicy'
                )

            options = inputDataPolicy['Value']
            policy = options.get(site, options.get('Default', []))
            if policy:
                policy = [x.strip() for x in policy.split(',')]
                if site in options:
                    prStr = 'Found specific'
                else:
                    prStr = 'Applying default'
                self.log.info('%s input data policy for site %s:\n%s' %
                              (prStr, site, '\n'.join(policy)))

        dataToResolve = []  # if none, all supplied input data is resolved
        successful = {}
        for modulePath in policy:
            result = self.__runModule(modulePath, dataToResolve)
            if not result['OK']:
                self.log.warn('Problem during %s execution' % modulePath)
                return result

            result = result['Value']
            successful.update(result.get('Successful', {}))
            dataToResolve = result.get('Failed', [])
            if dataToResolve:
                self.log.info('%s failed for the following files:\n%s' %
                              (modulePath, '\n'.join(dataToResolve)))
            else:
                self.log.info('All replicas resolved after %s execution' %
                              (modulePath))
                break

        if successful:
            self.log.verbose('Successfully resolved:', str(successful))

        return S_OK({'Successful': successful, 'Failed': dataToResolve})
コード例 #23
0
    def __resolveInputData(self):
        """This method controls the execution of the DIRAC input data modules according
        to the VO policy defined in the configuration service.
        """
        site = self.arguments["Configuration"].get("SiteName",
                                                   DIRAC.siteName())

        self.arguments.setdefault("Job", {})

        policy = self.arguments["Job"].get("InputDataPolicy", [])
        if policy:
            # In principle this can be a list of modules with the first taking precedence
            if isinstance(policy, six.string_types):
                policy = [policy]
            self.log.info("Job has a specific policy setting: %s" %
                          (", ".join(policy)))
        else:
            self.log.debug(
                "Attempting to resolve input data policy for site %s" % site)
            inputDataPolicy = Operations().getOptionsDict("InputDataPolicy")
            if not inputDataPolicy["OK"]:
                return S_ERROR(
                    "Could not resolve InputDataPolicy from Operations InputDataPolicy"
                )

            options = inputDataPolicy["Value"]
            policy = options.get(site, options.get("Default", []))
            if policy:
                policy = [x.strip() for x in policy.split(",")]
                if site in options:
                    prStr = "Found specific"
                else:
                    prStr = "Applying default"
                self.log.info("%s input data policy for site %s:\n%s" %
                              (prStr, site, "\n".join(policy)))

        dataToResolve = []  # if none, all supplied input data is resolved
        successful = {}
        for modulePath in policy:
            result = self.__runModule(modulePath, dataToResolve)
            if not result["OK"]:
                self.log.warn("Problem during %s execution" % modulePath)
                return result

            result = result["Value"]
            successful.update(result.get("Successful", {}))
            dataToResolve = result.get("Failed", [])
            if dataToResolve:
                self.log.info("%s failed for the following files:\n%s" %
                              (modulePath, "\n".join(dataToResolve)))
            else:
                self.log.info("All replicas resolved after %s execution" %
                              (modulePath))
                break

        if successful:
            self.log.verbose("Successfully resolved:", str(successful))

        return S_OK({"Successful": successful, "Failed": dataToResolve})
コード例 #24
0
def determineSeFromSite():
    siteName = DIRAC.siteName()
    SEname = SeSiteMap.get(siteName, "")
    if not SEname:
        result = getSEsForSite(siteName)
        if result["OK"] and result["Value"]:
            SEname = result["Value"][0]
    return SEname
コード例 #25
0
def determineSeFromSite():
    siteName = DIRAC.siteName()
    SEname = SeSiteMap.get(siteName, '')
    if not SEname:
        result = getSEsForSite(siteName)
        if result['OK'] and result['Value']:
            SEname = result['Value'][0]
    return SEname
コード例 #26
0
    def am_initialize(self, *initArgs):
        """ Common initialization for all the agents.

        This is executed every time an agent (re)starts.
        This is called by the AgentReactor, should not be overridden.
    """
        agentName = self.am_getModuleParam('fullName')
        result = self.initialize(*initArgs)
        if not isReturnStructure(result):
            return S_ERROR("initialize must return S_OK/S_ERROR")
        if not result['OK']:
            return S_ERROR("Error while initializing %s: %s" %
                           (agentName, result['Message']))
        mkDir(self.am_getControlDirectory())
        workDirectory = self.am_getWorkDirectory()
        mkDir(workDirectory)
        # Set the work directory in an environment variable available to subprocesses if needed
        os.environ['AGENT_WORKDIRECTORY'] = workDirectory

        self.__moduleProperties['shifterProxy'] = self.am_getOption(
            'shifterProxy')
        if self.am_monitoringEnabled() and not self.activityMonitoring:
            self.monitor.enable()
        if len(self.__moduleProperties['executors']) < 1:
            return S_ERROR("At least one executor method has to be defined")
        if not self.am_Enabled():
            return S_ERROR("Agent is disabled via the configuration")
        self.log.notice("=" * 40)
        self.log.notice("Loaded agent module %s" %
                        self.__moduleProperties['fullName'])
        self.log.notice(" Site: %s" % DIRAC.siteName())
        self.log.notice(" Setup: %s" % gConfig.getValue("/DIRAC/Setup"))
        self.log.notice(" Base Module version: %s " % __RCSID__)
        self.log.notice(" Agent version: %s" %
                        self.__codeProperties['version'])
        self.log.notice(" DIRAC version: %s" % DIRAC.version)
        self.log.notice(" DIRAC platform: %s" % DIRAC.getPlatform())
        pollingTime = int(self.am_getOption('PollingTime'))
        if pollingTime > 3600:
            self.log.notice(" Polling time: %s hours" % (pollingTime / 3600.))
        else:
            self.log.notice(" Polling time: %s seconds" %
                            self.am_getOption('PollingTime'))
        self.log.notice(" Control dir: %s" % self.am_getControlDirectory())
        self.log.notice(" Work dir: %s" % self.am_getWorkDirectory())
        if self.am_getOption('MaxCycles') > 0:
            self.log.notice(" Cycles: %s" % self.am_getMaxCycles())
        else:
            self.log.notice(" Cycles: unlimited")
        if self.am_getWatchdogTime() > 0:
            self.log.notice(" Watchdog interval: %s" %
                            self.am_getWatchdogTime())
        else:
            self.log.notice(" Watchdog interval: disabled ")
        self.log.notice("=" * 40)
        self.__initialized = True
        return S_OK()
コード例 #27
0
ファイル: StorageElement.py プロジェクト: vfalbor/DIRAC
 def isLocalSE( self ):
   """ Test if the Storage Element is local in the current context
   """
   import DIRAC
   gLogger.verbose( "StorageElement.isLocalSE: Determining whether %s is a local SE." % self.name )
   localSEs = getSEsForSite( DIRAC.siteName() )['Value']
   if self.name in localSEs:
     return S_OK( True )
   else:
     return S_OK( False )
コード例 #28
0
ファイル: StorageElement.py プロジェクト: kanwx/DIRAC
 def isLocalSE( self ):
   """ Test if the Storage Element is local in the current context
   """
   import DIRAC
   self.log.verbose( "isLocalSE: Determining whether %s is a local SE." % self.name )
   localSEs = getSEsForSite( DIRAC.siteName() )['Value']
   if self.name in localSEs:
     return S_OK( True )
   else:
     return S_OK( False )
コード例 #29
0
def getSiteSE(SEname):
    sitename = DIRAC.siteName()
    DIRAC.gLogger.error('Sitename: %s' % (sitename))
    print "sitename", sitename
    res = getSEsForSite(sitename)
    if not res['OK']:
        DIRAC.gLogger.error(res['Message'])
        return SEname
    if res['Value']:
        SEname = res['Value'][0]
    return SEname
コード例 #30
0
ファイル: StorageElement.py プロジェクト: mesmith75/DIRAC
  def __isLocalSE( self ):
    """ Test if the Storage Element is local in the current context
    """
    self.log.getSubLogger( 'LocalSE' ).verbose( "Determining whether %s is a local SE." % self.name )

    import DIRAC
    localSEs = getSEsForSite( DIRAC.siteName() )['Value']
    if self.name in localSEs:
      return S_OK( True )
    else:
      return S_OK( False )
コード例 #31
0
ファイル: TimeLeft.py プロジェクト: JanEbbing/DIRAC
  def __init__( self ):
    """ Standard constructor
    """
    self.log = gLogger.getSubLogger( 'TimeLeft' )
    # This is the ratio SpecInt published by the site over 250 (the reference used for Matching)
    self.scaleFactor = gConfig.getValue( '/LocalSite/CPUScalingFactor', 0.0 )
    if not self.scaleFactor:
      self.log.warn( '/LocalSite/CPUScalingFactor not defined for site %s' % DIRAC.siteName() )

    self.normFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
    if not self.normFactor:
      self.log.warn( '/LocalSite/CPUNormalizationFactor not defined for site %s' % DIRAC.siteName() )

    self.cpuMargin = gConfig.getValue( '/LocalSite/CPUMargin', 10 )  # percent
    result = self.__getBatchSystemPlugin()
    if result['OK']:
      self.batchPlugin = result['Value']
    else:
      self.batchPlugin = None
      self.batchError = result['Message']
コード例 #32
0
ファイル: StorageElement.py プロジェクト: kfox1111/DIRAC
    def __isLocalSE(self):
        """ Test if the Storage Element is local in the current context
    """
        self.log.getSubLogger("LocalSE").verbose("Determining whether %s is a local SE." % self.name)

        import DIRAC

        localSEs = getSEsForSite(DIRAC.siteName())["Value"]
        if self.name in localSEs:
            return S_OK(True)
        else:
            return S_OK(False)
コード例 #33
0
    def __getBatchSystemPlugin(self):
        """Using the name of the batch system plugin, will return an instance of the plugin class."""
        batchSystems = {
            "LSF": "LSB_JOBID",
            "PBS": "PBS_JOBID",
            "BQS": "QSUB_REQNAME",
            "SGE": "SGE_TASK_ID",
            "SLURM": "SLURM_JOB_ID",
            "HTCondor": "_CONDOR_JOB_AD",
        }  # more to be added later
        name = None
        for batchSystem, envVar in batchSystems.items():
            if envVar in os.environ:
                name = batchSystem
                break

        if name is None and "MACHINEFEATURES" in os.environ and "JOBFEATURES" in os.environ:
            # Only use MJF if legacy batch system information not available for now
            name = "MJF"

        if name is None:
            self.log.warn(
                "Batch system type for site %s is not currently supported" %
                DIRAC.siteName())
            return S_ERROR("Current batch system is not supported")

        self.log.debug("Creating plugin for %s batch system" % (name))
        try:
            batchSystemName = "%sResourceUsage" % (name)
            batchPlugin = __import__(
                "DIRAC.Resources.Computing.BatchSystems.TimeLeft.%s" %
                batchSystemName,  # pylint: disable=unused-variable
                globals(),
                locals(),
                [batchSystemName],
            )
        except ImportError as x:
            msg = "Could not import DIRAC.Resources.Computing.BatchSystems.TimeLeft.%s" % (
                batchSystemName)
            self.log.warn(x)
            self.log.warn(msg)
            return S_ERROR(msg)

        try:
            batchStr = "batchPlugin.%s()" % (batchSystemName)
            batchInstance = eval(batchStr)
        except Exception as x:  # pylint: disable=broad-except
            msg = "Could not instantiate %s()" % (batchSystemName)
            self.log.warn(x)
            self.log.warn(msg)
            return S_ERROR(msg)

        return S_OK(batchInstance)
コード例 #34
0
    def __getBatchSystemPlugin(self):
        """ Using the name of the batch system plugin, will return an instance of the plugin class.
    """
        batchSystems = {
            'LSF': 'LSB_JOBID',
            'PBS': 'PBS_JOBID',
            'BQS': 'QSUB_REQNAME',
            'SGE': 'SGE_TASK_ID',
            'SLURM': 'SLURM_JOB_ID',
            'HTCondor': '_CONDOR_JOB_AD'
        }  # more to be added later
        name = None
        for batchSystem, envVar in batchSystems.items():
            if envVar in os.environ:
                name = batchSystem
                break

        if name is None and 'MACHINEFEATURES' in os.environ and 'JOBFEATURES' in os.environ:
            # Only use MJF if legacy batch system information not available for now
            name = 'MJF'

        if name is None:
            self.log.warn(
                'Batch system type for site %s is not currently supported' %
                DIRAC.siteName())
            return S_ERROR('Current batch system is not supported')

        self.log.debug('Creating plugin for %s batch system' % (name))
        try:
            batchSystemName = "%sResourceUsage" % (name)
            batchPlugin = __import__(
                'DIRAC.Resources.Computing.BatchSystems.TimeLeft.%s' %  # pylint: disable=unused-variable
                batchSystemName,
                globals(),
                locals(),
                [batchSystemName])
        except ImportError as x:
            msg = 'Could not import DIRAC.Resources.Computing.BatchSystems.TimeLeft.%s' % (
                batchSystemName)
            self.log.warn(x)
            self.log.warn(msg)
            return S_ERROR(msg)

        try:
            batchStr = 'batchPlugin.%s()' % (batchSystemName)
            batchInstance = eval(batchStr)
        except Exception as x:  # pylint: disable=broad-except
            msg = 'Could not instantiate %s()' % (batchSystemName)
            self.log.warn(x)
            self.log.warn(msg)
            return S_ERROR(msg)

        return S_OK(batchInstance)
コード例 #35
0
 def __getConfigFlags():
     """ Get the flags for dirac-configure inside the container.
     Returns a string containing the command line flags.
 """
     cfgOpts = []
     setup = gConfig.getValue("/DIRAC/Setup", "unknown")
     if setup:
         cfgOpts.append("-S '%s'" % setup)
     csServers = gConfig.getValue("/DIRAC/Configuration/Servers", [])
     cfgOpts.append("-C '%s'" % ','.join(csServers))
     cfgOpts.append("-n '%s'" % DIRAC.siteName())
     return ' '.join(cfgOpts)
コード例 #36
0
 def __getConfigFlags():
   """ Get the flags for dirac-configure inside the container.
       Returns a string containing the command line flags.
   """
   cfgOpts = []
   setup = gConfig.getValue("/DIRAC/Setup", "unknown")
   if setup:
     cfgOpts.append("-S '%s'" % setup)
   csServers = gConfig.getValue("/DIRAC/Configuration/Servers", [])
   cfgOpts.append("-C '%s'" % ','.join(csServers))
   cfgOpts.append("-n '%s'" % DIRAC.siteName())
   return ' '.join(cfgOpts)
コード例 #37
0
  def __resolveInputData(self):
    """This method controls the execution of the DIRAC input data modules according
       to the VO policy defined in the configuration service.
    """
    site = self.arguments['Configuration'].get('SiteName', DIRAC.siteName())

    self.arguments.setdefault('Job', {})

    policy = self.arguments['Job'].get('InputDataPolicy', [])
    if policy:
      # In principle this can be a list of modules with the first taking precedence
      if isinstance(policy, basestring):
        policy = [policy]
      self.log.info('Job has a specific policy setting: %s' % (', '.join(policy)))
    else:
      self.log.debug('Attempting to resolve input data policy for site %s' % site)
      inputDataPolicy = Operations().getOptionsDict('InputDataPolicy')
      if not inputDataPolicy['OK']:
        return S_ERROR('Could not resolve InputDataPolicy from Operations InputDataPolicy')

      options = inputDataPolicy['Value']
      policy = options.get(site, options.get('Default', []))
      if policy:
        policy = [x.strip() for x in policy.split(',')]
        if site in options:
          prStr = 'Found specific'
        else:
          prStr = 'Applying default'
        self.log.info('%s input data policy for site %s:\n%s' % (prStr, site, '\n'.join(policy)))

    dataToResolve = []  # if none, all supplied input data is resolved
    successful = {}
    for modulePath in policy:
      result = self.__runModule(modulePath, dataToResolve)
      if not result['OK']:
        self.log.warn('Problem during %s execution' % modulePath)
        return result

      result = result['Value']
      successful.update(result.get('Successful', {}))
      dataToResolve = result.get('Failed', [])
      if dataToResolve:
        self.log.info('%s failed for the following files:\n%s'
                      % (modulePath, '\n'.join(dataToResolve)))
      else:
        self.log.info('All replicas resolved after %s execution' % (modulePath))
        break

    if successful:
      self.log.verbose('Successfully resolved:', str(successful))

    return S_OK({'Successful': successful, 'Failed': dataToResolve})
コード例 #38
0
ファイル: DataOperationSender.py プロジェクト: TaykYoku/DIRAC
 def sendMonitoring(self):
     baseDict["ExecutionSite"] = DIRAC.siteName()
     baseDict["Channel"] = baseDict["Source"] + "->" + baseDict[
         "Destination"]
     self.dataOperationReporter.addRecord(baseDict)
     if commitFlag:
         result = self.dataOperationReporter.commit()
         sLog.debug("Committing data operation to monitoring")
         if not result["OK"]:
             sLog.error("Could not commit data operation to monitoring",
                        result["Message"])
         else:
             sLog.debug("Done committing to monitoring")
コード例 #39
0
  def initialize( self, systemName, cfgPath ):
    if self.__initialized:
      return
    self.__initialized = True

    from DIRAC.ConfigurationSystem.Client.Config import gConfig
    from os import getpid

    # self.__printDebug( "The configuration path is %s" % cfgPath )
    # Get the options for the different output backends
    retDict = gConfig.getOptionsDict( "%s/BackendsOptions" % cfgPath )

    # self.__printDebug( retDict )
    if not retDict[ 'OK' ]:
      cfgBackOptsDict = { 'FileName': 'Dirac-log_%s.log' % getpid(), 'Interactive': True, 'SleepTime': 150 }
    else:
      cfgBackOptsDict = retDict[ 'Value' ]

    self.__backendOptions.update( cfgBackOptsDict )

    if 'FileName' not in self.__backendOptions:
      self.__backendOptions[ 'FileName' ] = 'Dirac-log_%s.log' % getpid()

    sleepTime = 150
    try:
      sleepTime = int ( self.__backendOptions[ 'SleepTime' ] )
    except:
      pass
    self.__backendOptions[ 'SleepTime' ] = sleepTime

    self.__backendOptions[ 'Interactive' ] = gConfig.getValue( "%s/BackendsOptions/Interactive" % cfgPath, True )

    self.__backendOptions[ 'Site' ] = DIRAC.siteName()

    self.__backendOptions[ 'Color' ] = gConfig.getValue( "%s/LogColor" % cfgPath, False )

    # Configure outputs
    desiredBackends = gConfig.getValue( "%s/LogBackends" % cfgPath, 'stdout' )
    self.registerBackends( List.fromChar( desiredBackends ) )
    # Configure verbosity
    defaultLevel = Logger.defaultLogLevel
    if "Scripts" in cfgPath:
      defaultLevel = gConfig.getValue( '/Systems/Scripts/LogLevel', Logger.defaultLogLevel )
    self.setLevel( gConfig.getValue( "%s/LogLevel" % cfgPath, defaultLevel ) )
    # Configure framing
    self._showCallingFrame = gConfig.getValue( "%s/LogShowLine" % cfgPath, self._showCallingFrame )
    # Get system name
    self._systemName = str( systemName )

    if not self.__backendOptions['Interactive']:
      ExitCallback.registerExitCallback( self.flushAllMessages )
コード例 #40
0
    def __resolveInputData(self):
        """This method controls the execution of the DIRAC input data modules according
       to the VO policy defined in the configuration service.
    """
        site = self.arguments["Configuration"].get("SiteName", DIRAC.siteName())

        self.arguments.setdefault("Job", {})

        policy = self.arguments["Job"].get("InputDataPolicy", [])
        if policy:
            # In principle this can be a list of modules with the first taking precedence
            if type(policy) in types.StringTypes:
                policy = [policy]
            self.log.info("Job has a specific policy setting: %s" % (", ".join(policy)))
        else:
            self.log.debug("Attempting to resolve input data policy for site %s" % site)
            inputDataPolicy = Operations().getOptionsDict("InputDataPolicy")
            if not inputDataPolicy["OK"]:
                return S_ERROR("Could not resolve InputDataPolicy from Operations InputDataPolicy")

            options = inputDataPolicy["Value"]
            policy = options.get(site, options.get("Default", []))
            if policy:
                policy = [x.strip() for x in policy.split(",")]
                if site in options:
                    prStr = "Found specific"
                else:
                    prStr = "Applying default"
                self.log.info("%s input data policy for site %s:\n%s" % (prStr, site, "\n".join(policy)))

        dataToResolve = []  # if none, all supplied input data is resolved
        successful = {}
        for modulePath in policy:
            result = self.__runModule(modulePath, dataToResolve)
            if not result["OK"]:
                self.log.warn("Problem during %s execution" % modulePath)
                return result

            result = result["Value"]
            successful.update(result.get("Successful", {}))
            dataToResolve = result.get("Failed", [])
            if dataToResolve:
                self.log.info("%s failed for the following files:\n%s" % (modulePath, "\n".join(dataToResolve)))
            else:
                self.log.info("All replicas resolved after %s execution" % (modulePath))
                break

        if successful:
            self.log.verbose("Successfully resolved:", str(successful))

        return S_OK({"Successful": successful, "Failed": dataToResolve})
コード例 #41
0
def upload_to_seList(FileLFN, FileName):

    DIRAC.gLogger.notice('Put and register in LFC and DFC:', FileLFN)
    from DIRAC.Interfaces.API.Dirac import Dirac
    from DIRAC.Core.Utilities.SiteSEMapping import getSEsForSite
    result = getSEsForSite(DIRAC.siteName())
    if result['OK']:
        localSEs = result['Value']

    dirac = Dirac()
    upload_result = 'NOTOK'
    failing_se = []

    for se in localSEs:
        if se in seList:
            DIRAC.gLogger.notice('Local SE is in the list:', se)
            ret = dirac.addFile(FileLFN, FileName, se)
            res = CheckCatalogCoherence(FileLFN)
            if res != DIRAC.S_OK:
                DIRAC.gLogger.error(
                    'Job failed: Catalog Coherence problem found')
                DIRAC.gLogger.notice('Failing SE:', se)
                failing_se.append(se)
                continue
            upload_result = 'OK'

    if upload_result != 'OK':
        for se in seList:
            DIRAC.gLogger.notice('Try upload to:', se)
            ret = dirac.addFile(FileLFN, FileName, se)

            res = CheckCatalogCoherence(FileLFN)
            if res != DIRAC.S_OK:
                DIRAC.gLogger.error(
                    'Job failed: Catalog Coherence problem found')
                failing_se.append(se)
                DIRAC.gLogger.notice('Failing SE:', se)
                continue
            upload_result = 'OK'
            break

    DIRAC.gLogger.notice('Failing SE list:', failing_se)

    #for se in failing_se:
    #  seList.remove(se)

    #  DIRAC.gLogger.notice('Failing SE list:',failing_se)
    if upload_result != 'OK':
        return DIRAC.S_ERROR

    return DIRAC.S_OK
コード例 #42
0
ファイル: AgentModule.py プロジェクト: DIRACGrid/DIRAC
  def am_initialize(self, *initArgs):
    """ Common initialization for all the agents.

        This is executed every time an agent (re)starts.
        This is called by the AgentReactor, should not be overridden.
    """
    agentName = self.am_getModuleParam('fullName')
    result = self.initialize(*initArgs)
    if not isReturnStructure(result):
      return S_ERROR("initialize must return S_OK/S_ERROR")
    if not result['OK']:
      return S_ERROR("Error while initializing %s: %s" % (agentName, result['Message']))
    mkDir(self.am_getControlDirectory())
    workDirectory = self.am_getWorkDirectory()
    mkDir(workDirectory)
    # Set the work directory in an environment variable available to subprocesses if needed
    os.environ['AGENT_WORKDIRECTORY'] = workDirectory

    self.__moduleProperties['shifterProxy'] = self.am_getOption('shifterProxy')
    if self.am_monitoringEnabled():
      self.monitor.enable()
    if len(self.__moduleProperties['executors']) < 1:
      return S_ERROR("At least one executor method has to be defined")
    if not self.am_Enabled():
      return S_ERROR("Agent is disabled via the configuration")
    self.log.notice("=" * 40)
    self.log.notice("Loaded agent module %s" % self.__moduleProperties['fullName'])
    self.log.notice(" Site: %s" % DIRAC.siteName())
    self.log.notice(" Setup: %s" % gConfig.getValue("/DIRAC/Setup"))
    self.log.notice(" Base Module version: %s " % __RCSID__)
    self.log.notice(" Agent version: %s" % self.__codeProperties['version'])
    self.log.notice(" DIRAC version: %s" % DIRAC.version)
    self.log.notice(" DIRAC platform: %s" % DIRAC.getPlatform())
    pollingTime = int(self.am_getOption('PollingTime'))
    if pollingTime > 3600:
      self.log.notice(" Polling time: %s hours" % (pollingTime / 3600.))
    else:
      self.log.notice(" Polling time: %s seconds" % self.am_getOption('PollingTime'))
    self.log.notice(" Control dir: %s" % self.am_getControlDirectory())
    self.log.notice(" Work dir: %s" % self.am_getWorkDirectory())
    if self.am_getOption('MaxCycles') > 0:
      self.log.notice(" Cycles: %s" % self.am_getMaxCycles())
    else:
      self.log.notice(" Cycles: unlimited")
    if self.am_getWatchdogTime() > 0:
      self.log.notice(" Watchdog interval: %s" % self.am_getWatchdogTime())
    else:
      self.log.notice(" Watchdog interval: disabled ")
    self.log.notice("=" * 40)
    self.__initialized = True
    return S_OK()
コード例 #43
0
ファイル: JobWrapperTemplate.py プロジェクト: vfalbor/DIRAC
def rescheduleFailedJob(jobID, message):
    try:
        import DIRAC
        global jobReport

        gLogger.warn('Failure during %s' % (message))

        #Setting a job parameter does not help since the job will be rescheduled,
        #instead set the status with the cause and then another status showing the
        #reschedule operation.

        if not jobReport:
            gLogger.info('Creating a new JobReport Object')
            jobReport = JobReport(int(jobID), 'JobWrapperTemplate')

        jobReport.setApplicationStatus('Failed %s ' % message, sendFlag=False)
        jobReport.setJobStatus('Rescheduled', message, sendFlag=False)

        # We must send Job States and Parameters before it gets reschedule
        jobReport.sendStoredStatusInfo()
        jobReport.sendStoredJobParameters()

        gLogger.info(
            'Job will be rescheduled after exception during execution of the JobWrapper'
        )

        jobManager = RPCClient('WorkloadManagement/JobManager')
        result = jobManager.rescheduleJob(int(jobID))
        if not result['OK']:
            gLogger.warn(result)

        # Send mail to debug errors
        mailAddress = DIRAC.alarmMail
        site = DIRAC.siteName()
        subject = 'Job rescheduled at %s' % site
        ret = systemCall(0, 'hostname')
        wn = ret['Value'][1]
        msg = 'Job %s rescheduled at %s, wn=%s\n' % (jobID, site, wn)
        msg += message

        NotificationClient().sendMail(mailAddress,
                                      subject,
                                      msg,
                                      fromAddress="*****@*****.**",
                                      localAttempt=False)

        return
    except Exception, x:
        gLogger.exception('JobWrapperTemplate failed to reschedule Job')
        return
コード例 #44
0
    def initialize(self, systemName, cfgPath):
        if self.__initialized:
            return
        self.__initialized = True

        from DIRAC.ConfigurationSystem.Client.Config import gConfig
        from os import getpid

        # self.__printDebug( "The configuration path is %s" % cfgPath )
        # Get the options for the different output backends
        retDict = gConfig.getOptionsDict("%s/BackendsOptions" % cfgPath)

        # self.__printDebug( retDict )
        if not retDict["OK"]:
            cfgBackOptsDict = {"FileName": "Dirac-log_%s.log" % getpid(), "Interactive": True, "SleepTime": 150}
        else:
            cfgBackOptsDict = retDict["Value"]

        self.__backendOptions.update(cfgBackOptsDict)

        if not self.__backendOptions.has_key("Filename"):
            self.__backendOptions["FileName"] = "Dirac-log_%s.log" % getpid()

        sleepTime = 150
        try:
            sleepTime = int(self.__backendOptions["SleepTime"])
        except:
            pass
        self.__backendOptions["SleepTime"] = sleepTime

        self.__backendOptions["Interactive"] = gConfig.getValue("%s/BackendsOptions/Interactive" % cfgPath, True)

        self.__backendOptions["Site"] = DIRAC.siteName()

        # Configure outputs
        desiredBackends = gConfig.getValue("%s/LogBackends" % cfgPath, "stdout")
        self.registerBackends(List.fromChar(desiredBackends))
        # Configure verbosity
        defaultLevel = Logger.defaultLogLevel
        if "Scripts" in cfgPath:
            defaultLevel = gConfig.getValue("/Systems/Scripts/LogLevel", Logger.defaultLogLevel)
        self.setLevel(gConfig.getValue("%s/LogLevel" % cfgPath, defaultLevel))
        # Configure framing
        self._showCallingFrame = gConfig.getValue("%s/LogShowLine" % cfgPath, self._showCallingFrame)
        # Get system name
        self._systemName = str(systemName)

        if not self.__backendOptions["Interactive"]:
            ExitCallback.registerExitCallback(self.flushAllMessages)
コード例 #45
0
ファイル: ServerBackend.py プロジェクト: sparsh35/DIRAC
    def createHandler(self, parameters=None):
        """
    Each backend can initialize its attributes and create its handler with them.

    :params parameters: dictionary of parameters. ex: {'FileName': file.log}
    """
        if parameters is not None:
            self.__interactive = parameters.get('Interactive',
                                                self.__interactive)
            self.__sleepTime = parameters.get('SleepTime', self.__sleepTime)
            self.__site = DIRAC.siteName()

        self._handler = ServerHandler(self.__sleepTime, self.__interactive,
                                      self.__site)
        self._handler.setLevel(LogLevels.ERROR)
コード例 #46
0
    def __init__(self):
        """Standard constructor"""
        self.log = gLogger.getSubLogger("TimeLeft")

        self.cpuPower = gConfig.getValue("/LocalSite/CPUNormalizationFactor",
                                         0.0)
        if not self.cpuPower:
            self.log.warn(
                "/LocalSite/CPUNormalizationFactor not defined for site %s" %
                DIRAC.siteName())

        result = self.__getBatchSystemPlugin()
        if result["OK"]:
            self.batchPlugin = result["Value"]
        else:
            self.batchPlugin = None
            self.batchError = result["Message"]
コード例 #47
0
    def am_initialize(self, *initArgs):
        agentName = self.am_getModuleParam('fullName')
        result = self.initialize(*initArgs)
        if result == None:
            return S_ERROR(
                "Error while initializing %s module: initialize must return S_OK/S_ERROR"
                % agentName)
        if not result['OK']:
            return S_ERROR("Error while initializing %s: %s" %
                           (agentName, result['Message']))
        _checkDir(self.am_getControlDirectory())
        _checkDir(self.am_getWorkDirectory())

        self.__moduleProperties['shifterProxy'] = self.am_getOption(
            'shifterProxy')
        if self.am_monitoringEnabled():
            self.monitor.enable()
        if len(self.__moduleProperties['executors']) < 1:
            return S_ERROR("At least one executor method has to be defined")
        if not self.am_Enabled():
            return S_ERROR("Agent is disabled via the configuration")
        self.log.notice("=" * 40)
        self.log.notice("Loaded agent module %s" %
                        self.__moduleProperties['fullName'])
        self.log.notice(" Site: %s" % DIRAC.siteName())
        self.log.notice(" Setup: %s" % gConfig.getValue("/DIRAC/Setup"))
        self.log.notice(" Base Module version: %s " % __RCSID__)
        self.log.notice(" Agent version: %s" %
                        self.__codeProperties['version'])
        self.log.notice(" DIRAC version: %s" % DIRAC.version)
        self.log.notice(" DIRAC platform: %s" % DIRAC.platform)
        pollingTime = int(self.am_getOption('PollingTime'))
        if pollingTime > 3600:
            self.log.notice(" Polling time: %s hours" % (pollingTime / 3600.))
        else:
            self.log.notice(" Polling time: %s seconds" %
                            self.am_getOption('PollingTime'))
        self.log.notice(" Control dir: %s" % self.am_getControlDirectory())
        self.log.notice(" Work dir: %s" % self.am_getWorkDirectory())
        if self.am_getOption('MaxCycles') > 0:
            self.log.notice(" Cycles: %s" % self.am_getMaxCycles())
        else:
            self.log.notice(" Cycles: unlimited")
        self.log.notice("=" * 40)
        self.__initialized = True
        return S_OK()
コード例 #48
0
def rescheduleFailedJob(jobID,message):
  try:
    import DIRAC
    global jobReport

    gLogger.warn('Failure during %s' %(message))

    #Setting a job parameter does not help since the job will be rescheduled,
    #instead set the status with the cause and then another status showing the
    #reschedule operation.

    if not jobReport:
      gLogger.info('Creating a new JobReport Object')
      jobReport = JobReport(int(jobID),'JobWrapperTemplate')

    jobReport.setApplicationStatus( 'Failed %s ' % message, sendFlag = False )
    jobReport.setJobStatus( 'Rescheduled', message, sendFlag = False )

    # We must send Job States and Parameters before it gets reschedule
    jobReport.sendStoredStatusInfo()
    jobReport.sendStoredJobParameters()

    gLogger.info('Job will be rescheduled after exception during execution of the JobWrapper')

    jobManager  = RPCClient('WorkloadManagement/JobManager')
    result = jobManager.rescheduleJob(int(jobID))
    if not result['OK']:
      gLogger.warn(result)

    # Send mail to debug errors
    mailAddress = DIRAC.alarmMail
    site        = DIRAC.siteName()
    subject     = 'Job rescheduled at %s' % site
    ret         = systemCall(0,'hostname')
    wn          = ret['Value'][1]
    msg         = 'Job %s rescheduled at %s, wn=%s\n' % ( jobID, site, wn )
    msg        += message

    NotificationClient().sendMail(mailAddress,subject,msg,fromAddress="*****@*****.**",localAttempt=False)

    return
  except Exception,x:
    gLogger.exception('JobWrapperTemplate failed to reschedule Job')
    return
コード例 #49
0
ファイル: TimeLeft.py プロジェクト: DIRACGrid/DIRAC
  def __getBatchSystemPlugin(self):
    """ Using the name of the batch system plugin, will return an instance of the plugin class.
    """
    batchSystems = {
        'LSF': 'LSB_JOBID',
        'PBS': 'PBS_JOBID',
        'BQS': 'QSUB_REQNAME',
        'SGE': 'SGE_TASK_ID'}  # more to be added later
    name = None
    for batchSystem, envVar in batchSystems.items():
      if envVar in os.environ:
        name = batchSystem
        break

    if name is None and 'MACHINEFEATURES' in os.environ and 'JOBFEATURES' in os.environ:
      # Only use MJF if legacy batch system information not available for now
      name = 'MJF'

    if name is None:
      self.log.warn('Batch system type for site %s is not currently supported' % DIRAC.siteName())
      return S_ERROR('Current batch system is not supported')

    self.log.debug('Creating plugin for %s batch system' % (name))
    try:
      batchSystemName = "%sTimeLeft" % (name)
      batchPlugin = __import__('DIRAC.Core.Utilities.TimeLeft.%s' %  # pylint: disable=unused-variable
                               batchSystemName, globals(), locals(), [batchSystemName])
    except ImportError as x:
      msg = 'Could not import DIRAC.Core.Utilities.TimeLeft.%s' % (batchSystemName)
      self.log.warn(x)
      self.log.warn(msg)
      return S_ERROR(msg)

    try:
      batchStr = 'batchPlugin.%s()' % (batchSystemName)
      batchInstance = eval(batchStr)
    except Exception as x:  # pylint: disable=broad-except
      msg = 'Could not instantiate %s()' % (batchSystemName)
      self.log.warn(x)
      self.log.warn(msg)
      return S_ERROR(msg)

    return S_OK(batchInstance)
コード例 #50
0
    def __getConfigFlags(infoDict=None):
        """Get the flags for dirac-configure inside the container.
        Returns a string containing the command line flags.
        """
        if not infoDict:
            infoDict = {}

        cfgOpts = []

        setup = infoDict.get("DefaultSetup")
        if not setup:
            setup = gConfig.getValue("/DIRAC/Setup", "unknown")
        cfgOpts.append("-S '%s'" % setup)

        csServers = infoDict.get("ConfigurationServers")
        if not csServers:
            csServers = gConfig.getValue("/DIRAC/Configuration/Servers", [])
        cfgOpts.append("-C '%s'" % ",".join([str(ce) for ce in csServers]))
        cfgOpts.append("-n '%s'" % DIRAC.siteName())
        return " ".join(cfgOpts)
コード例 #51
0
ファイル: MonitoringClient.py プロジェクト: DIRACGrid/DIRAC
 def initialize( self ):
   self.logger = gLogger.getSubLogger( "Monitoring" )
   self.logger.debug( "Initializing Monitoring Client" )
   self.sourceDict[ 'setup' ] = gConfig.getValue( "/DIRAC/Setup" )
   self.sourceDict[ 'site' ] = DIRAC.siteName()
   if self.sourceDict[ 'componentType' ] == self.COMPONENT_SERVICE:
     self.cfgSection = PathFinder.getSystemSection( self.sourceDict[ 'componentName' ] )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_AGENT:
     self.cfgSection = PathFinder.getAgentSection( self.sourceDict[ 'componentName' ] )
     self.setComponentLocation( Network.getFQDN() )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_WEB:
     self.cfgSection = "/WebApp"
     self.setComponentLocation( 'http://%s' % Network.getFQDN() )
     self.setComponentName( 'WebApp' )
   elif self.sourceDict[ 'componentType' ] == self.COMPONENT_SCRIPT:
     self.cfgSection = "/Script"
   else:
     raise Exception( "Component type has not been defined" )
   gMonitoringFlusher.registerMonitoringClient( self )
   # ExitCallback.registerExitCallback( self.forceFlush )
   self.__initialized = True
コード例 #52
0
ファイル: AgentModule.py プロジェクト: KrzysztofCiba/DIRAC
  def am_initialize( self, *initArgs ):
    agentName = self.am_getModuleParam( 'fullName' )
    result = self.initialize( *initArgs )
    if result == None:
      return S_ERROR( "Error while initializing %s module: initialize must return S_OK/S_ERROR" % agentName )
    if not result[ 'OK' ]:
      return S_ERROR( "Error while initializing %s: %s" % ( agentName, result[ 'Message' ] ) )
    _checkDir( self.am_getControlDirectory() )
    _checkDir( self.am_getWorkDirectory() )

    self.__moduleProperties[ 'shifterProxy' ] = self.am_getOption( 'shifterProxy' )
    if self.am_monitoringEnabled():
      self.monitor.enable()
    if len( self.__moduleProperties[ 'executors' ] ) < 1:
      return S_ERROR( "At least one executor method has to be defined" )
    if not self.am_Enabled():
      return S_ERROR( "Agent is disabled via the configuration" )
    self.log.notice( "="*40 )
    self.log.notice( "Loaded agent module %s" % self.__moduleProperties[ 'fullName' ] )
    self.log.notice( " Site: %s" % DIRAC.siteName() )
    self.log.notice( " Setup: %s" % gConfig.getValue( "/DIRAC/Setup" ) )
    self.log.notice( " Base Module version: %s " % __RCSID__ )
    self.log.notice( " Agent version: %s" % self.__codeProperties[ 'version' ] )
    self.log.notice( " DIRAC version: %s" % DIRAC.version )
    self.log.notice( " DIRAC platform: %s" % DIRAC.platform )
    pollingTime = int( self.am_getOption( 'PollingTime' ) )
    if pollingTime > 3600:
      self.log.notice( " Polling time: %s hours" % ( pollingTime / 3600. ) )
    else:
      self.log.notice( " Polling time: %s seconds" % self.am_getOption( 'PollingTime' ) )
    self.log.notice( " Control dir: %s" % self.am_getControlDirectory() )
    self.log.notice( " Work dir: %s" % self.am_getWorkDirectory() )
    if self.am_getOption( 'MaxCycles' ) > 0:
      self.log.notice( " Cycles: %s" % self.am_getMaxCycles() )
    else:
      self.log.notice( " Cycles: unlimited" )
    self.log.notice( "="*40 )
    self.__initialized = True
    return S_OK()
コード例 #53
0
ファイル: dirac-jobexec.py プロジェクト: marianne013/DIRAC
def jobexec( jobxml, wfParameters ):
  jobfile = os.path.abspath( jobxml )
  if not os.path.exists( jobfile ):
    gLogger.warn( 'Path to specified workflow %s does not exist' % ( jobfile ) )
    sys.exit( 1 )
  workflow = fromXMLFile( jobfile )
  gLogger.debug( workflow )
  code = workflow.createCode()
  gLogger.debug( code )
  jobID = 0
  if os.environ.has_key( 'JOBID' ):
    jobID = os.environ['JOBID']
    gLogger.info( 'DIRAC JobID %s is running at site %s' % ( jobID, DIRAC.siteName() ) )

  workflow.addTool( 'JobReport', JobReport( jobID ) )
  workflow.addTool( 'AccountingReport', DataStoreClient() )
  workflow.addTool( 'Request', Request() )

  # Propagate the command line parameters to the workflow if any
  for pName, pValue in wfParameters.items():
    workflow.setValue( pName, pValue )

  return workflow.execute()
コード例 #54
0
  def __init__( self, infosys = None, master_host = None, mirrors = [] ):
    """ Default constructor
    """
    if not infosys:
      configPath = '/Resources/FileCatalogs/LcgFileCatalogCombined/LcgGfalInfosys'
      infosys = gConfig.getValue( configPath )

    self.valid = False
    if not master_host:
      configPath = '/Resources/FileCatalogs/LcgFileCatalogCombined/MasterHost'
      master_host = gConfig.getValue( configPath )
    if master_host:
      # Create the master LFC client first
      self.lfc = LcgFileCatalogClient( infosys, master_host )
      if self.lfc.isOK():
        self.valid = True

      if not mirrors:
        siteName = DIRAC.siteName()
        res = getLocationOrderedCatalogs( siteName = siteName )
        if not res['OK']:
          mirrors = []
        else:
          mirrors = res['Value']
      # Create the mirror LFC instances
      self.mirrors = []
      for mirror in mirrors:
        lfc = LcgFileCatalogClient( infosys, mirror )
        self.mirrors.append( lfc )
      self.nmirrors = len( self.mirrors )

      # Keep the environment for the master instance
      self.master_host = self.lfc.host
      os.environ['LFC_HOST'] = self.master_host
      os.environ['LCG_GFAL_INFOSYS'] = infosys
      self.name = 'LFC'
      self.timeout = 3000
コード例 #55
0
      Script.localCfg.addDefaultEntry( '/LocalSite/Site', siteName )
      DIRAC.__siteName = False
      if ceName:
        DIRAC.gLogger.notice( 'Setting /LocalSite/GridCE = %s' % ceName )
        Script.localCfg.addDefaultEntry( '/LocalSite/GridCE', ceName )

      if not localSE and siteName in sites:
        localSE = getSEsForSite( siteName )
        if localSE['OK'] and localSE['Value']:
          localSE = ','.join( localSE['Value'] )
          DIRAC.gLogger.notice( 'Setting /LocalSite/LocalSE =', localSE )
          Script.localCfg.addDefaultEntry( '/LocalSite/LocalSE', localSE )
        break

if gatewayServer:
  DIRAC.gLogger.verbose( '/DIRAC/Gateways/%s =' % DIRAC.siteName(), gatewayServer )
  Script.localCfg.addDefaultEntry( '/DIRAC/Gateways/%s' % DIRAC.siteName(), gatewayServer )

# Create the local cfg if it is not yet there
if not outputFile:
  outputFile = DIRAC.gConfig.diracConfigFilePath
outputFile = os.path.abspath( outputFile )
if not os.path.exists( outputFile ):
  configDir = os.path.dirname( outputFile )
  mkDir(configDir)
  update = True
  DIRAC.gConfig.dumpLocalCFGToFile( outputFile )

# We need user proxy or server certificate to continue
if not useServerCert:
  Script.enableCS()
コード例 #56
0
ファイル: TimeLeft.py プロジェクト: JanEbbing/DIRAC
  def getTimeLeft( self, cpuConsumed = 0.0 ):
    """Returns the CPU Time Left for supported batch systems.  The CPUConsumed
       is the current raw total CPU.
    """
    # Quit if no scale factor available
    if not self.scaleFactor:
      return S_ERROR( '/LocalSite/CPUScalingFactor not defined for site %s' % DIRAC.siteName() )

    if not self.batchPlugin:
      return S_ERROR( self.batchError )

    resourceDict = self.batchPlugin.getResourceUsage()
    if not resourceDict['OK']:
      self.log.warn( 'Could not determine timeleft for batch system at site %s' % DIRAC.siteName() )
      return resourceDict

    resources = resourceDict['Value']
    if not resources['CPULimit'] or not resources['WallClockLimit']:
      # This should never happen
      return S_ERROR( 'No CPU or WallClock limit obtained' )

    timeLeft = 0.
    cpu = float( resources['CPU'] )
    cpuLimit = float( resources['CPULimit'] )
    cpuUsedFraction = cpu / cpuLimit
    cpuRemainingFraction = 1. - cpuUsedFraction
    wc = float( resources['WallClock'] )
    wcLimit = float( resources['WallClockLimit'] )
    wcUsedFraction = wc / wcLimit
    wcRemainingFraction = 1. - wcUsedFraction
    marginFraction = self.cpuMargin / 100.
    fractionTuple = ( 100. * cpuRemainingFraction, 100. * wcRemainingFraction, self.cpuMargin )
    self.log.verbose( 'Used CPU is %.1f s out of %.1f, Used WallClock is %.1f s out of %.1f.' % ( cpu, cpuLimit, wc, wcLimit ) )
    self.log.verbose( 'Remaining CPU %.02f%%, Remaining WallClock %.02f%%, margin %s%%' % fractionTuple )

    validTimeLeft = False
    if wcRemainingFraction > cpuRemainingFraction and ( wcRemainingFraction - cpuRemainingFraction ) > marginFraction:
      # FIXME: I have no idea why this test is done (PhC)
      self.log.verbose( 'Remaining CPU %.02f%% < Remaining WallClock  %.02f%% and difference > margin %s%%' % fractionTuple )
      validTimeLeft = True
    else:
      if cpuRemainingFraction > marginFraction and wcRemainingFraction > marginFraction:
        self.log.verbose( 'Remaining CPU %.02f%% and Remaining WallClock %.02f%% both > margin %s%%' % fractionTuple )
        validTimeLeft = True
      else:
        self.log.verbose( 'Remaining CPU %.02f%% or WallClock %.02f%% < margin %s%% so no time left' % fractionTuple )
    if validTimeLeft:
      if cpu and cpuConsumed > 3600. and self.normFactor:
        # If there has been more than 1 hour of consumed CPU and
        # there is a Normalization set for the current CPU
        # use that value to renormalize the values returned by the batch system
        # NOTE: cpuConsumed is non-zero for call by the JobAgent and 0 for call by the watchdog
        # cpuLimit and cpu may be in the units of the batch system, not real seconds... (in this case the other case won't work)
        # therefore renormalise it using cpuConsumed (which is in real seconds)
        timeLeft = ( cpuLimit - cpu ) * self.normFactor * cpuConsumed / cpu
      elif self.normFactor:
        # FIXME: this is always used by the watchdog... Also used by the JobAgent
        #        if consumed less than 1 hour of CPU
        # It was using self.scaleFactor but this is inconsistent: use the same as above
        # In case the returned cpu and cpuLimit are not in real seconds, this is however rubbish
        timeLeft = ( cpuLimit - cpu ) * self.normFactor
      else:
        # Last resort recovery...
        timeLeft = ( cpuLimit - cpu ) * self.scaleFactor

      self.log.verbose( 'Remaining CPU in normalized units is: %.02f' % timeLeft )
      return S_OK( timeLeft )
    else:
      return S_ERROR( 'No time left for slot' )
コード例 #57
0
ファイル: InputDataResolution.py プロジェクト: sposs/ILCDIRAC
    def __resolveInputData(self):
        """This method controls the execution of the DIRAC input data modules according
       to the ILC VO policy defined in the configuration service.
    """
        if self.arguments['Configuration'].has_key('SiteName'):
            site = self.arguments['Configuration']['SiteName']
        else:
            site = DIRAC.siteName()

        policy = []
        if not self.arguments.has_key('Job'):
            self.arguments['Job'] = {}

        if self.arguments['Job'].has_key('InputDataPolicy'):
            policy = self.arguments['Job']['InputDataPolicy']
            #In principle this can be a list of modules with the first taking precedence
            if type(policy) in types.StringTypes:
                policy = [policy]
            self.log.info('Job has a specific policy setting: %s' %
                          (string.join(policy, ', ')))
        else:
            self.log.verbose(
                'Attempting to resolve input data policy for site %s' % site)
            inputDataPolicy = self.ops.getOptionsDict('/InputDataPolicy')
            if not inputDataPolicy:
                return S_ERROR(
                    'Could not resolve InputDataPolicy from /InputDataPolicy')

            options = inputDataPolicy['Value']
            if options.has_key(site):
                policy = options[site]
                policy = [x.strip() for x in string.split(policy, ',')]
                self.log.info(
                    'Found specific input data policy for site %s:\n%s' %
                    (site, string.join(policy, ',\n')))
            elif options.has_key('Default'):
                policy = options['Default']
                policy = [x.strip() for x in string.split(policy, ',')]
                self.log.info(
                    'Applying default input data policy for site %s:\n%s' %
                    (site, string.join(policy, ',\n')))

        dataToResolve = None  #if none, all supplied input data is resolved
        allDataResolved = False
        successful = {}
        failedReplicas = []
        for modulePath in policy:
            if not allDataResolved:
                result = self.__runModule(modulePath, dataToResolve)
                if not result['OK']:
                    self.log.warn('Problem during %s execution' % modulePath)
                    return result

                if result.has_key('Failed'):
                    failedReplicas = result['Failed']

                if failedReplicas:
                    self.log.info(
                        '%s failed for the following files:\n%s' %
                        (modulePath, string.join(failedReplicas, '\n')))
                    dataToResolve = failedReplicas
                else:
                    self.log.info('All replicas resolved after %s execution' %
                                  (modulePath))
                    allDataResolved = True

                successful.update(result['Successful'])
                self.log.verbose(successful)

        result = S_OK()
        result['Successful'] = successful
        result['Failed'] = failedReplicas
        return result
コード例 #58
0
ファイル: InputDataResolution.py プロジェクト: sposs/ILCDIRAC
    def __resolveInputData(self):
        """This method controls the execution of the DIRAC input data modules according
       to the ILC VO policy defined in the configuration service.
    """
        if self.arguments["Configuration"].has_key("SiteName"):
            site = self.arguments["Configuration"]["SiteName"]
        else:
            site = DIRAC.siteName()

        policy = []
        if not self.arguments.has_key("Job"):
            self.arguments["Job"] = {}

        if self.arguments["Job"].has_key("InputDataPolicy"):
            policy = self.arguments["Job"]["InputDataPolicy"]
            # In principle this can be a list of modules with the first taking precedence
            if type(policy) in types.StringTypes:
                policy = [policy]
            self.log.info("Job has a specific policy setting: %s" % (string.join(policy, ", ")))
        else:
            self.log.verbose("Attempting to resolve input data policy for site %s" % site)
            inputDataPolicy = self.ops.getOptionsDict("/InputDataPolicy")
            if not inputDataPolicy:
                return S_ERROR("Could not resolve InputDataPolicy from /InputDataPolicy")

            options = inputDataPolicy["Value"]
            if options.has_key(site):
                policy = options[site]
                policy = [x.strip() for x in string.split(policy, ",")]
                self.log.info("Found specific input data policy for site %s:\n%s" % (site, string.join(policy, ",\n")))
            elif options.has_key("Default"):
                policy = options["Default"]
                policy = [x.strip() for x in string.split(policy, ",")]
                self.log.info(
                    "Applying default input data policy for site %s:\n%s" % (site, string.join(policy, ",\n"))
                )

        dataToResolve = None  # if none, all supplied input data is resolved
        allDataResolved = False
        successful = {}
        failedReplicas = []
        for modulePath in policy:
            if not allDataResolved:
                result = self.__runModule(modulePath, dataToResolve)
                if not result["OK"]:
                    self.log.warn("Problem during %s execution" % modulePath)
                    return result

                if result.has_key("Failed"):
                    failedReplicas = result["Failed"]

                if failedReplicas:
                    self.log.info(
                        "%s failed for the following files:\n%s" % (modulePath, string.join(failedReplicas, "\n"))
                    )
                    dataToResolve = failedReplicas
                else:
                    self.log.info("All replicas resolved after %s execution" % (modulePath))
                    allDataResolved = True

                successful.update(result["Successful"])
                self.log.verbose(successful)

        result = S_OK()
        result["Successful"] = successful
        result["Failed"] = failedReplicas
        return result
コード例 #59
0
ファイル: BaseClient.py プロジェクト: marianne013/DIRAC
  def __findServiceURL(self):
    """
        Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: the selected URL

    """
    if not self.__initStatus['OK']:
      return self.__initStatus

    # Load the Gateways URLs for the current site Name
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[self.KW_IGNORE_GATEWAYS]:
      dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName())
      if dRetVal['OK']:
        rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0]
        gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

    # If what was given as constructor attribute is a properly formed URL,
    # we just return this one.
    # If we have to use a gateway, we just replace the server name in it
    for protocol in gProtocolDict:
      if self._destinationSrv.find("%s://" % protocol) == 0:
        gLogger.debug("Already given a valid url", self._destinationSrv)
        if not gatewayURL:
          return S_OK(self._destinationSrv)
        gLogger.debug("Reconstructing given URL to pass through gateway")
        path = "/".join(self._destinationSrv.split("/")[3:])
        finalURL = "%s/%s" % (gatewayURL, path)
        gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
        return S_OK(finalURL)

    if gatewayURL:
      gLogger.debug("Using gateway", gatewayURL)
      return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

    # We extract the list of URLs from the CS (System/URLs/Component)
    try:
      urls = getServiceURL(self._destinationSrv, setup=self.setup)
    except Exception as e:
      return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
    if not urls:
      return S_ERROR("URL for service %s not found" % self._destinationSrv)

    failoverUrls = []
    # Try if there are some failover URLs to use as last resort
    try:
      failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup)
      if failoverUrlsStr:
        failoverUrls = failoverUrlsStr.split(',')
    except Exception as e:
      pass

    # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
    urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls
    self.__nbOfUrls = len(urlsList)
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
    if self.__nbOfUrls == len(self.__bannedUrls):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug("Retrying again all URLs")

    if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug("Removing banned URL", "%s" % i)
        urlsList.remove(i)

    # Take the first URL from the list
    #randUrls = List.randomize( urlsList ) + failoverUrls

    sURL = urlsList[0]

    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl
    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

    if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can
      # have a situation when two services are running on the same machine with different ports...
      retVal = Network.splitURL(sURL)
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL(i)
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break
          # We found a banned URL on the same host as the one we are running on
          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl(nexturl, urlsList[1:])
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
    return S_OK(sURL)