Esempio n. 1
0
def checkDatatype(prodID, datatype):
  """Check if the datatype makes sense for given production."""
  # skip data type check when creating replications in development for prod productions this check doesn't work
  if os.environ.get('SKIP_CHECK', False):
    LOG.warn("Skipping Datatype check!")
    return S_OK()

  tClient = TransformationClient()
  cond = dict(TransformationID=prodID)
  trafo = tClient.getTransformations(cond)
  if not trafo['OK']:
    return trafo
  if len(trafo['Value']) != 1:
    return S_ERROR("Did not get unique production for this prodID")

  trafoType = trafo['Value'][0]['Type'].split("_")[0]

  dataTypes = Operations().getOptionsDict('Production/TransformationDatatypes')
  if not dataTypes['OK']:
    return dataTypes

  dataTypes = dataTypes['Value']
  if trafoType not in dataTypes[datatype]:
    return S_ERROR("Datatype %r doesn't fit production type %r for prodID %s" % (datatype, trafoType, prodID))

  return S_OK()
  def __workflowxml(self, transid):

    tsClient = TransformationClient()
    retVal = tsClient.getTransformations({'TransformationID': transid})
    if not retVal['OK']:
      raise WErr.fromSERROR(retVal)
    print retVal['Value']
    return {"success": "true", "result": retVal['Value'][0]['Body']}
Esempio n. 3
0
    def __workflowxml(self, transid):

        tsClient = TransformationClient()
        retVal = tsClient.getTransformations({'TransformationID': transid})
        if not retVal['OK']:
            raise WErr.fromSERROR(retVal)
        print retVal['Value']
        return {"success": "true", "result": retVal['Value'][0]['Body']}
Esempio n. 4
0
  def checkDatatype( self, prodID, datatype ):
    """ check if the datatype makes sense for given production """
    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    tClient = TransformationClient()
    cond = dict( TransformationID=prodID )
    trafo = tClient.getTransformations( cond )
    if not trafo['OK']:
      return trafo
    if len(trafo['Value']) != 1:
      return S_ERROR( "Did not get unique production for this prodID" )

    trafoType = trafo['Value'][0]['Type'].split("_")[0]

    dataTypes = { 'MCGeneration': ['GEN'],
                  'Split': ['GEN'],
                  'MCSimulation': ['SIM'],
                  'MCReconstruction': ['REC', 'DST'],
                }.get( trafoType, [] )

    if datatype not in dataTypes:
      return S_ERROR( "Datatype %s doesn't fit production type %s" %( datatype, trafoType ) )

    return S_OK()
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Esempio n. 6
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}

    self.supportedPlugins = ['Broadcast', 'Standard', 'BySize', 'ByShare']
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError, 'TransformationID %d does not exist' % transID
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, se, seList ):
    if type( seList ) in types.StringTypes:
      try:
        seList = eval( seList )
      except:
        seList = seList.replace( ',', ' ' ).split()
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = se
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError, name

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      oldValue = self.paramValues[self.item_called]
      if oldValue != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] )
    if not self.item_called in self.paramTypes.keys():
      if not self.paramValues.has_key( self.item_called ):
        change = True
      else:
        oldValue = self.paramValues[self.item_called]
        if oldValue != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                             self.paramValues['Description'],
                                             self.paramValues['LongDescription'],
                                             self.paramValues['Type'],
                                             self.paramValues['Plugin'],
                                             self.paramValues['AgentType'],
                                             self.paramValues['FileMask'],
                                             transformationGroup = self.paramValues['TransformationGroup'],
                                             groupSize = self.paramValues['GroupSize'],
                                             inheritedFrom = self.paramValues['InheritedFrom'],
                                             body = self.paramValues['Body'],
                                             maxTasks = self.paramValues['MaxNumberOfTasks'],
                                             eventsPerTask = self.paramValues['EventsPerTask'],
                                             addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.info( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if not self.paramTypes.has_key( paramName ):
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.info( "To add this parameter later please execute the following." )
          gLogger.info( "oTransformation = Transformation(%d)" % transID )
          gLogger.info( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        res = self.__promptForParameter( parameter )
        if not res['OK']:
          return res

    plugin = self.paramValues['Plugin']
    if not plugin in self.supportedPlugins:
      gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
      res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
      if not res['OK']:
        return res
    plugin = self.paramValues['Plugin']
    checkPlugin = "_check%sPlugin" % plugin
    fcn = None
    if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
      fcn = getattr( self, checkPlugin )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
    res = fcn()
    return res

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if ( groupSize <= 0 ):
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( string.join( ['SourceSE', 'TargetSE'], ', ' ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ):
        res = self.__promptForParameter( requiredParam, insert = False )
        if not res['OK']:
          return res
        paramValue = res['Value']
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = []
    for se in seList:
      if not se in res['Value']:
        gLogger.error( "StorageElement %s is not known" % se )
        missing.append( se )
    if missing:
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.info( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
Esempio n. 7
0
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
  prod = clip.prod
  full_detail = clip.full_det
  fc = FileCatalogClient()
  
  processlist = gConfig.getValue('/LocalSite/ProcessListPath')
  prl = ProcessList(processlist)
  processesdict = prl.getProcessesDict()
  
  trc = TransformationClient()
  prodids = []
  if not prod:
   conddict = {}
   conddict['Status'] = clip.statuses
   if clip.ptypes:
     conddict['Type'] = clip.ptypes
   res = trc.getTransformations( conddict )
   if res['OK']:
     for transfs in res['Value']:
       prodids.append(transfs['TransformationID'])
  else:
    prodids.extend(prod)

  metadata = []
  
  gLogger.info("Will run on prods %s" % str(prodids))
  
  for prodID in prodids:
    if prodID<clip.minprod:
      continue
    meta = {}
    meta['ProdID']=prodID
class TransformationCleaningAgent( AgentModule ):
  """
  .. class:: TransformationCleaningAgent

  :param DataManger dm: DataManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    # # data manager
    self.dm = None
    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
#     self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in self.dataManipTTypes:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      if type( res['Value'] ) != type( [] ):
        transDirectories = ast.literal_eval( res['Value'] )
      else:
        transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )
  # FIXME If a classmethod, should it not have cls instead of self?
  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    """ append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    """ delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    """
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    """ wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    """
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )

    se = StorageElement( storageElement )

    res = se.getPfnForLfn( [directory] )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    if directory in res['Value']['Failed']:
      self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) )
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'][directory]

    res = returnSingleResult( se.exists( storageDirectory ) )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info( "No files are registered in the catalog directory %s" % directory )
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( filesFound, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str( reason ):
        self.log.warn( "File %s not found in some catalog: " % ( lfn ) )
      else:
        self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
        realFailure = True
    if realFailure:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = returnSingleResult( fc.listDirectory( currentDir ) )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info( "%s: %s" % ( currentDir, res['Message'] ) )
        else:
          self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( fileToRemove, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks( externalIDs )
      else:
        res = self.__removeRequests( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    """ This will remove requests from the (new) RMS system -

        #FIXME: if the old system is still installed, it won't remove anything!!!
        (we don't want to risk removing from the new RMS what is instead in the old)
    """
    # FIXME: checking if the old system is still installed!
    from DIRAC.ConfigurationSystem.Client import PathFinder
    if PathFinder.getServiceURL( "RequestManagement/RequestManager" ):
      self.log.warn( "NOT removing requests!!" )
      return S_OK()

    rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ]
    for requestName in rIDs:
      self.reqClient.deleteRequest( requestName )

    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    """ wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    failed = 0
    # FIXME: double request client: old/new -> only the new will survive sooner or later
    # this is the old
    try:
      res = RequestClient().getRequestForJobs( jobIDs )
      if not res['OK']:
        self.log.error( "Failed to get requestID for jobs.", res['Message'] )
        return res
      failoverRequests = res['Value']
      self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) )
      if not failoverRequests:
        return S_OK()
      for jobID, requestName in failoverRequests.items():
        # Put this check just in case, tasks must have associated jobs
        if jobID == 0 or jobID == '0':
          continue
        res = RequestClient().deleteRequest( requestName )
        if not res['OK']:
          self.log.error( "Failed to remove request from RequestDB", res['Message'] )
          failed += 1
        else:
          self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    except RuntimeError:
      failoverRequests = {}
      pass

    # FIXME: and this is the new
    res = self.reqClient.getRequestNamesForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests.update( res['Value']['Successful'] )
    if not failoverRequests:
      return S_OK()
    for jobID, requestName in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )


    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Esempio n. 10
0
class DataRecoveryAgent( AgentModule ):
  def __init__(self, *args, **kwargs):
    AgentModule.__init__( self, *args, **kwargs )
    self.name = 'DataRecoveryAgent'
    self.log = gLogger
  #############################################################################
  def initialize(self):
    """Sets defaults
    """
    self.enableFlag = '' #defined below
    self.replicaManager = ReplicaManager()
    self.prodDB = TransformationClient()
    self.requestClient = RequestClient()
    self.taskIDName = 'TaskID' 
    self.externalStatus = 'ExternalStatus'
    self.externalID = 'ExternalID'
    self.am_setOption('PollingTime',2*60*60) #no stalled jobs are considered so can be frequent
    self.enableFlag = self.am_getOption('EnableFlag', False)
    self.am_setModuleParam("shifterProxy", "ProductionManager")
    self.ops = Operations()
    return S_OK()
  #############################################################################
  def execute(self):
    """ The main execution method.
    """  
    self.log.info('Enable flag is %s' % self.enableFlag)  
    self.removalOKFlag = True
    
    transformationTypes = ['MCReconstruction', 'MCSimulation', 'MCReconstruction_Overlay', 'Merge']
    transformationStatus = ['Active', 'Completing']
    fileSelectionStatus = ['Assigned', 'MaxReset']
    updateStatus = 'Unused'
    wmsStatusList = ['Failed']
    #only worry about files > 12hrs since last update    
    selectDelay = self.am_getOption("Delay", 2) #hours 

    transformationDict = {}
    for transStatus in transformationStatus:
      result = self.getEligibleTransformations(transStatus, transformationTypes)
      if not result['OK']:
        self.log.error(result)
        return S_ERROR('Could not obtain eligible transformations for status "%s"' % (transStatus))
      
      if not result['Value']:
        self.log.info('No "%s" transformations of types %s to process.' % (transStatus, string.join(transformationTypes, ', ')))
        continue

      transformationDict.update(result['Value'])

    self.log.info('Selected %s transformations of types %s' % (len(transformationDict.keys()), string.join(transformationTypes, ', ')))
    self.log.verbose('The following transformations were selected out of %s:\n%s' % (string.join(transformationTypes, ', '), string.join(transformationDict.keys(), ', ')))

    trans = []
    #initially this was useful for restricting the considered list
    #now we use the DataRecoveryAgent in setups where IDs are low
    ignoreLessThan = self.ops.getValue("Transformations/IgnoreLessThan", '724') 
    
    if trans:
      self.log.info('Skipping all transformations except %s' % (string.join(trans, ', ')))
          
    for transformation, typeName in transformationDict.items():
      if trans:
        if not transformation in trans:
          continue
      if ignoreLessThan:
        if int(ignoreLessThan) > int(transformation):
          self.log.verbose('Ignoring transformation %s ( is less than specified limit %s )' % (transformation, ignoreLessThan))
          continue

      self.log.info('='*len('Looking at transformation %s type %s:' % (transformation, typeName)))
      self.log.info('Looking at transformation %s:' % (transformation))

      result = self.selectTransformationFiles(transformation, fileSelectionStatus)
      if not result['OK']:
        self.log.error(result)
        self.log.error('Could not select files for transformation %s' % transformation)
        continue
  
      if not result['Value']:
        self.log.info('No files in status %s selected for transformation %s' % (string.join(fileSelectionStatus, ', '), transformation))
        continue
    
      fileDict = result['Value']      
      result = self.obtainWMSJobIDs(transformation, fileDict, selectDelay, wmsStatusList)
      if not result['OK']:
        self.log.error(result)
        self.log.error('Could not obtain WMS jobIDs for files of transformation %s' % (transformation))
        continue
      if not result['Value']:
        self.log.info('No eligible WMS jobIDs found for %s files in list:\n%s ...' % (len(fileDict.keys()), fileDict.keys()[0]))
        continue
    
      jobFileDict = result['Value']
      fileCount = 0
      for lfnList in jobFileDict.values():
        fileCount += len(lfnList)
      
      if not fileCount:
        self.log.info('No files were selected for transformation %s after examining WMS jobs.' % transformation)
        continue
      
      self.log.info('%s files are selected after examining related WMS jobs' % (fileCount))   
      result = self.checkOutstandingRequests(jobFileDict)
      if not result['OK']:
        self.log.error(result)
        continue

      if not result['Value']:
        self.log.info('No WMS jobs without pending requests to process.')
        continue
      
      jobFileNoRequestsDict = result['Value']
      fileCount = 0
      for lfnList in jobFileNoRequestsDict.values():
        fileCount += len(lfnList)
      
      self.log.info('%s files are selected after removing any relating to jobs with pending requests' % (fileCount))
      result = self.checkDescendents(transformation, fileDict, jobFileNoRequestsDict)
      if not result['OK']:
        self.log.error(result)
        continue

      jobsWithFilesOKToUpdate = result['Value']['filesToMarkUnused']
      jobsWithFilesProcessed = result['Value']['filesprocessed']
      self.log.info('====> Transformation %s total files that can be updated now: %s' % (transformation, len(jobsWithFilesOKToUpdate)))

      filesToUpdateUnused = []
      for fileList in jobsWithFilesOKToUpdate:
        filesToUpdateUnused.append(fileList)
      
      if len(filesToUpdateUnused):
        result = self.updateFileStatus(transformation, filesToUpdateUnused, updateStatus)
        if not result['OK']:
          self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message']))
          continue
      else:
        self.log.info('There are no files with failed jobs to update for production %s in this cycle' % transformation)             

      filesToUpdateProcessed = []  
      for fileList in jobsWithFilesProcessed:
        filesToUpdateProcessed.append(fileList)
      
      if len(filesToUpdateProcessed):
        result = self.updateFileStatus(transformation, filesToUpdateProcessed, 'Processed')
        if not result['OK']:
          self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message']))
          continue          
      else:
        self.log.info('There are no files processed to update for production %s in this cycle' % transformation)              
      
    return S_OK()

  #############################################################################
  def getEligibleTransformations(self, status, typeList):
    """ Select transformations of given status and type.
    """
    res = self.prodDB.getTransformations(condDict = {'Status' : status, 'Type' : typeList})
    self.log.debug(res)
    if not res['OK']:
      return res
    transformations = {}
    for prod in res['Value']:
      prodID = prod['TransformationID']
      transformations[str(prodID)] = prod['Type']
    return S_OK(transformations)
  
  #############################################################################
  def selectTransformationFiles(self, transformation, statusList):
    """ Select files, production jobIDs in specified file status for a given transformation.
    """
    #Until a query for files with timestamp can be obtained must rely on the
    #WMS job last update
    res = self.prodDB.getTransformationFiles(condDict = {'TransformationID' : transformation, 'Status' : statusList})
    self.log.debug(res)
    if not res['OK']:
      return res
    resDict = {}
    for fileDict in res['Value']:
      if not fileDict.has_key('LFN') or not fileDict.has_key(self.taskIDName) or not fileDict.has_key('LastUpdate'):
        self.log.info('LFN, %s and LastUpdate are mandatory, >=1 are missing for:\n%s' % (self.taskIDName, fileDict))
        continue
      lfn = fileDict['LFN']
      jobID = fileDict[self.taskIDName]
      resDict[lfn] = jobID
    if resDict:
      self.log.info('Selected %s files overall for transformation %s' % (len(resDict.keys()), transformation))
    return S_OK(resDict)
  
  #############################################################################
  def obtainWMSJobIDs(self, transformation, fileDict, selectDelay, wmsStatusList):
    """ Group files by the corresponding WMS jobIDs, check the corresponding
        jobs have not been updated for the delay time.  Can't get into any 
        mess because we start from files only in MaxReset / Assigned and check
        corresponding jobs.  Mixtures of files for jobs in MaxReset and Assigned 
        statuses only possibly include some files in Unused status (not Processed 
        for example) that will not be touched.
    """
    prodJobIDs = uniqueElements(fileDict.values())
    self.log.info('The following %s production jobIDs apply to the selected files:\n%s' % (len(prodJobIDs), prodJobIDs))

    jobFileDict = {}
    condDict = {'TransformationID' : transformation, self.taskIDName : prodJobIDs}
    delta = datetime.timedelta( hours = selectDelay )
    now = dateTime()
    olderThan = now-delta

    res = self.prodDB.getTransformationTasks(condDict = condDict, older = olderThan,
                                             timeStamp = 'LastUpdateTime', inputVector = True)
    self.log.debug(res)
    if not res['OK']:
      self.log.error('getTransformationTasks returned an error:\n%s')
      return res
    
    for jobDict in res['Value']:
      missingKey = False
      for key in [self.taskIDName, self.externalID, 'LastUpdateTime', self.externalStatus, 'InputVector']:
        if not jobDict.has_key(key):
          self.log.info('Missing key %s for job dictionary, the following is available:\n%s' % (key, jobDict))
          missingKey = True
          continue
      
      if missingKey:
        continue
        
      job = jobDict[self.taskIDName]
      wmsID = jobDict[self.externalID]
      lastUpdate = jobDict['LastUpdateTime']
      wmsStatus = jobDict[self.externalStatus]
      jobInputData = jobDict['InputVector']
      jobInputData = [lfn.replace('LFN:','') for lfn in jobInputData.split(';')]
      
      if not int(wmsID):
        self.log.info('Prod job %s status is %s (ID = %s) so will not recheck with WMS' %(job, wmsStatus, wmsID))
        continue
      
      self.log.info('Job %s, prod job %s last update %s, production management system status %s' % (wmsID, job, lastUpdate, wmsStatus))
      #Exclude jobs not having appropriate WMS status - have to trust that production management status is correct        
      if not wmsStatus in wmsStatusList:
        self.log.info('Job %s is in status %s, not %s so will be ignored' % (wmsID, wmsStatus, string.join(wmsStatusList, ', ')))
        continue
        
      finalJobData = []
      #Must map unique files -> jobs in expected state
      for lfn,prodID in fileDict.items():
        if int(prodID) == int(job):
          finalJobData.append(lfn)
      
      self.log.info('Found %s files for job %s' % (len(finalJobData), job))    
      jobFileDict[wmsID] = finalJobData
 
    return S_OK(jobFileDict)
  
  #############################################################################
  def checkOutstandingRequests(self, jobFileDict):
    """ Before doing anything check that no outstanding requests are pending
        for the set of WMS jobIDs.
    """
    jobs = jobFileDict.keys()
    result = self.requestClient.getRequestForJobs(jobs)
    if not result['OK']:
      return result
    
    if not result['Value']:
      self.log.info('None of the jobs have pending requests')
      return S_OK(jobFileDict)
    
    for jobID in result['Value'].keys():
      del jobFileDict[str(jobID)]  
      self.log.info('Removing jobID %s from consideration until requests are completed' % (jobID))
    
    return S_OK(jobFileDict)
  
  ############################################################################
  def checkDescendents(self, transformation, filedict, jobFileDict):
    """ look that all jobs produced, or not output
    """
    res = self.prodDB.getTransformationParameters(transformation, ['Body'])
    if not res['OK']:
      self.log.error('Could not get Body from TransformationDB')
      return res
    body = res['Value']
    workflow = fromXMLString(body)
    workflow.resolveGlobalVars()

    olist = []
    jtype = workflow.findParameter('JobType')
    if not jtype:
      self.log.error('Type for transformation %d was not defined' % transformation)
      return S_ERROR('Type for transformation %d was not defined' % transformation)
    for step in workflow.step_instances:
      param = step.findParameter('listoutput')
      if not param:
        continue
      olist.extend(param.value)
    expectedlfns = []
    contactfailed = []
    fileprocessed = []
    files = []
    tasks_to_be_checked = {}
    for files in jobFileDict.values():
      for f in files:
        if f in filedict:
          tasks_to_be_checked[f] = filedict[f] #get the tasks that need to be checked
    for filep, task in tasks_to_be_checked.items():
      commons = {}
      commons['outputList'] = olist
      commons['PRODUCTION_ID'] = transformation
      commons['JOB_ID'] = task
      commons['JobType'] = jtype
      out = constructProductionLFNs(commons)
      expectedlfns = out['Value']['ProductionOutputData']
      res = self.replicaManager.getCatalogFileMetadata(expectedlfns)
      if not res['OK']:
        self.log.error('Getting metadata failed')
        contactfailed.append(filep)
        continue
      if not filep in files:
        files.append(filep)      
      success = res['Value']['Successful'].keys()
      failed = res['Value']['Failed'].keys()
      if len(success) and not len(failed):
        fileprocessed.append(filep)
        
    final_list_unused = files
    for file_all in files:
      if file_all in fileprocessed:
        try:
          final_list_unused.remove(filep)
        except:
          self.log.warn("Item not in list anymore")

        
    result = {'filesprocessed' : fileprocessed, 'filesToMarkUnused' : final_list_unused}    
    return S_OK(result)

  #############################################################################
  def updateFileStatus(self, transformation, fileList, fileStatus):
    """ Update file list to specified status.
    """
    if not self.enableFlag:
      self.log.info('Enable flag is False, would update  %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation))
      return S_OK()

    self.log.info('Updating %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation))
    result = self.prodDB.setFileStatusForTransformation(int(transformation), fileStatus, fileList, force = True)
    self.log.debug(result)
    if not result['OK']:
      self.log.error(result)
      return result
    if result['Value']['Failed']:
      self.log.error(result['Value']['Failed'])
      return result
    
    msg = result['Value']['Successful']
    for lfn, message in msg.items():
      self.log.info('%s => %s' % (lfn, message))
    
    return S_OK()
class FileStatusTransformationAgent(AgentModule):
  """ FileStatusTransformationAgent """

  def __init__(self, *args, **kwargs):
    AgentModule.__init__(self, *args, **kwargs)
    self.name = 'FileStatusTransformationAgent'
    self.enabled = False
    self.shifterProxy = 'DataManager'
    self.transformationTypes = ["Replication"]
    self.transformationStatuses = ["Active"]
    self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"]

    self.addressTo = ["*****@*****.**"]
    self.addressFrom = "*****@*****.**"
    self.emailSubject = "FileStatusTransformationAgent"

    self.accounting = defaultdict(list)
    self.errors = []

    self.fcClient = FileCatalogClient()
    self.tClient = TransformationClient()
    self.reqClient = ReqClient()
    self.nClient = NotificationClient()

  def checkFileStatusFuncExists(self, status):
    """ returns True/False if a function to check transformation files with a given status exists or not """
    checkFileStatusFuncName = "check_%s_files" % (status.lower())
    if not (hasattr(self, checkFileStatusFuncName) and callable(getattr(self, checkFileStatusFuncName))):
      self.log.warn("Unable to process transformation files with status ", status)
      return False

    return True

  def beginExecution(self):
    """ Reload the configurations before every cycle """
    self.enabled = self.am_getOption('EnableFlag', False)
    self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager')
    self.transformationTypes = self.am_getOption('TransformationTypes', ["Replication"])
    self.transformationStatuses = self.am_getOption('TransformationStatuses', ["Active"])
    self.transformationFileStatuses = self.am_getOption(
        'TransformationFileStatuses', ["Assigned", "Problematic", "Processed", "Unused"])

    self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"])
    self.addressFrom = self.am_getOption('MailFrom', "*****@*****.**")

    self.transformationFileStatuses = filter(self.checkFileStatusFuncExists, self.transformationFileStatuses)
    self.accounting.clear()

    return S_OK()

  def sendNotification(self, transID, transType=None, sourceSEs=None, targetSEs=None):
    """ sends email notification about accounting information of a transformation """
    if not(self.errors or self.accounting):
      return S_OK()

    emailBody = "Transformation ID: %s\n" % transID
    if transType:
      emailBody += "Transformation Type: %s\n" % transType

    if sourceSEs:
      emailBody += "Source SE: %s\n" % (" ".join(str(source) for source in sourceSEs))

    if targetSEs:
      emailBody += "Target SE: %s\n\n" % (" ".join(str(target) for target in targetSEs))

    rows = []
    for action, transFiles in self.accounting.iteritems():
      emailBody += "Total number of files with action %s: %s\n" % (action, len(transFiles))
      for transFile in transFiles:
        rows.append([[transFile['LFN']], [str(transFile['AvailableOnSource'])],
                     [str(transFile['AvailableOnTarget'])], [transFile['Status']], [action]])

    if rows:
      columns = ["LFN", "Source", "Target", "Old Status", "Action"]
      emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ')

    if self.errors:
      emailBody += "\n\nErrors:"
      emailBody += "\n".join(self.errors)

    self.log.notice(emailBody)
    subject = "%s: %s" % (self.emailSubject, transID)
    for address in self.addressTo:
      res = self.nClient.sendMail(address, subject, emailBody, self.addressFrom, localAttempt=False)
      if not res['OK']:
        self.log.error("Failure to send Email notification to ", address)
        continue

    self.errors = []
    self.accounting.clear()
    return S_OK()

  def logError(self, errStr, varMsg=''):
    self.log.error(errStr, varMsg)
    self.errors.append(errStr + varMsg)

  def execute(self):
    """ main execution loop of Agent """

    res = self.getTransformations()
    if not res['OK']:
      self.log.error('Failure to get transformations', res['Message'])
      return S_ERROR("Failure to get transformations")

    transformations = res['Value']
    if not transformations:
      self.log.notice('No transformations found with Status %s and Type %s ' %
                      (self.transformationStatuses, self.transformationTypes))
      return S_OK()

    self.log.notice('Will treat %d transformations' % len(transformations))
    self.log.notice('Transformations: %s' % ",".join([str(transformation['TransformationID'])
                                                      for transformation in transformations]))

    for trans in transformations:
      transID = trans['TransformationID']
      if 'SourceSE' not in trans or not trans['SourceSE']:
        self.logError("SourceSE not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID)
        continue

      if 'TargetSE' not in trans or not trans['TargetSE']:
        self.logError("TargetSE not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID, sourceSEs=trans['SourceSE'])
        continue

      if 'DataTransType' not in trans:
        self.logError("Transformation Type not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID, sourceSEs=trans['SourceSE'], targetSEs=trans['TargetSE'])
        continue

      res = self.processTransformation(transID, trans['SourceSE'], trans['TargetSE'], trans['DataTransType'])
      if not res['OK']:
        self.log.error('Failure to process transformation with ID:', transID)
        continue

    return S_OK()

  def getTransformations(self, transID=None):
    """ returns transformations of a given type and status """
    res = None
    if transID:
      res = self.tClient.getTransformations(
          condDict={'TransformationID': transID,
                    'Status': self.transformationStatuses,
                    'Type': self.transformationTypes})
    else:
      res = self.tClient.getTransformations(
          condDict={'Status': self.transformationStatuses, 'Type': self.transformationTypes})

    if not res['OK']:
      return res

    result = res['Value']
    for trans in result:
      res = self.tClient.getTransformationParameters(trans['TransformationID'], ['SourceSE', 'TargetSE'])
      if not res['OK']:
        self.log.error('Failure to get SourceSE and TargetSE parameters for Transformation ID:',
                       trans['TransformationID'])
        continue

      trans['SourceSE'] = eval(res['Value']['SourceSE'])
      trans['TargetSE'] = eval(res['Value']['TargetSE'])

      res = self.getDataTransformationType(trans['TransformationID'])
      if not res['OK']:
        self.log.error('Failure to determine Data Transformation Type', "%s: %s"
                       % (trans['TransformationID'], res['Message']))
        continue

      trans['DataTransType'] = res['Value']

    return S_OK(result)

  def getRequestStatus(self, transID, taskIDs):
    """ returns request statuses for a given list of task IDs """
    res = self.tClient.getTransformationTasks(condDict={'TransformationID': transID, 'TaskID': taskIDs})
    if not res['OK']:
      self.log.error('Failure to get Transformation Tasks for Transformation ID:', transID)
      return res

    result = res['Value']
    requestStatus = {}
    for task in result:
      requestStatus[task['TaskID']] = {'RequestStatus': task['ExternalStatus'], 'RequestID': long(task['ExternalID'])}

    return S_OK(requestStatus)

  def getDataTransformationType(self, transID):
    """ returns transformation types Replication/Moving/Unknown for a given transformation """
    res = self.tClient.getTransformationParameters(transID, 'Body')
    if not res['OK']:
      return res

    # if body is empty then we assume that it is a replication transformation
    if not res['Value']:
      return S_OK(REPLICATION_TRANS)

    replication = False
    rmReplica = False
    try:
      body = json.loads(res['Value'])
      for operation in body:
        if 'ReplicateAndRegister' in operation:
          replication = True
        if 'RemoveReplica' in operation:
          rmReplica = True
    except ValueError:
      if 'ReplicateAndRegister' in res['Value']:
        replication = True
        if 'RemoveReplica' in res['Value']:
          rmReplica = True

    if rmReplica and replication:
      return S_OK(MOVING_TRANS)

    if replication:
      return S_OK(REPLICATION_TRANS)

    return S_ERROR("Unknown Transformation Type '%r'" % res['Value'])

  def setFileStatus(self, transID, transFiles, status):
    """ sets transformation file status  """

    lfns = [transFile['LFN'] for transFile in transFiles]
    lfnStatuses = {lfn: status for lfn in lfns}

    if lfnStatuses:
      if self.enabled:
        res = self.tClient.setFileStatusForTransformation(transID, newLFNsStatus=lfnStatuses, force=True)
        if not res['OK']:
          self.logError('Failed to set statuses for LFNs ', "%s" % res['Message'])
          return res

      for transFile in transFiles:
        self.accounting[status].append({'LFN': transFile['LFN'],
                                        'Status': transFile['Status'],
                                        'AvailableOnSource': transFile['AvailableOnSource'],
                                        'AvailableOnTarget': transFile['AvailableOnTarget']})
    return S_OK()

  def selectFailedRequests(self, transFile):
    """ returns True if transformation file has a failed request otherwise returns False """
    res = self.getRequestStatus(transFile['TransformationID'], transFile['TaskID'])
    if not res['OK']:
      self.log.error('Failure to get Request Status for Assigned File')
      return False
    result = res['Value']

    if result[transFile['TaskID']]['RequestStatus'] == 'Failed':
      return True

    return False

  def retryStrategyForFiles(self, transID, transFiles):
    """ returns retryStrategy Reset Request if a request is found in RMS, otherwise returns set file status to unused"""
    taskIDs = [transFile['TaskID'] for transFile in transFiles]
    res = self.getRequestStatus(transID, taskIDs)
    if not res['OK']:
      return res
    result = res['Value']
    retryStrategy = defaultdict(dict)
    for taskID in taskIDs:
      if taskID is None:
        self.log.error("Task ID is None", "Transformation: %s\n Files: %r " % (transID, transFiles))
        retryStrategy[None]['Strategy'] = SET_UNUSED
        continue
      res = self.reqClient.getRequest(requestID=result[taskID]['RequestID'])
      if not res['OK']:
        self.log.notice('Request %s does not exist setting file status to unused' % result[taskID]['RequestID'])
        retryStrategy[taskID]['Strategy'] = SET_UNUSED
      else:
        retryStrategy[taskID]['Strategy'] = SET_UNUSED  # RESET_REQUEST
        retryStrategy[taskID]['RequestID'] = result[taskID]['RequestID']

    return S_OK(retryStrategy)

  def check_assigned_files(self, actions, transFiles, transType):
    """ treatment for transformation files with assigned status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        if transType == REPLICATION_TRANS:
          actions[SET_PROCESSED].append(transFile)
        if transType == MOVING_TRANS:
          actions[RETRY].append(transFile)

      elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      else:
        # not on src and target
        actions[SET_DELETED].append(transFile)

  def check_unused_files(self, actions, transFiles, transType):
    """ treatment for transformation files with unused status """
    for transFile in transFiles:
      if not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[SET_DELETED].append(transFile)

  def check_processed_files(self, actions, transFiles, transType):
    """ treatment for transformation files with processed status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget'] and transType == MOVING_TRANS:
        actions[RETRY].append(transFile)

      if transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[SET_DELETED].append(transFile)

  def check_problematic_files(self, actions, transFiles, transType):
    """ treatment for transformation files with problematic status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        if transType == REPLICATION_TRANS:
          actions[SET_PROCESSED].append(transFile)
        if transType == MOVING_TRANS:
          actions[RETRY].append(transFile)

      elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      else:
        # not available on source and target
        actions[SET_DELETED].append(transFile)

  def retryFiles(self, transID, transFiles):
    """ resubmits request or sets file status to unused based on the retry strategy of transformation file """
    setFilesUnused = []
    setFilesAssigned = []
    res = self.retryStrategyForFiles(transID, transFiles)
    if not res['OK']:
      self.logError('Failure to determine retry strategy (unused / reset request) for files ', "%s" % res['Message'])
      return res

    retryStrategy = res['Value']
    for transFile in transFiles:
      if retryStrategy[transFile['TaskID']]['Strategy'] != RESET_REQUEST:
        setFilesUnused.append(transFile)
        continue

      requestID = retryStrategy[transFile['TaskID']]['RequestID']
      if self.enabled:
        res = self.reqClient.resetFailedRequest(requestID, allR=True)
        if not res['OK']:
          self.logError('Failed to reset request ', 'ReqID: %s Error: %s' % (requestID, res['Message']))
          continue

        if res['Value'] == "Not reset":
          self.logError('Failed to reset request ', 'ReqID: %s is non-recoverable' % requestID)
          continue

        setFilesAssigned.append(transFile)

        res = self.tClient.setTaskStatus(transID, transFile['TaskID'], 'Waiting')
        if not res['OK']:
          self.logError('Failure to set Waiting status for Task ID: ', "%s %s" % (transFile['TaskID'], res['Message']))
          continue

      self.accounting[RESET_REQUEST].append({'LFN': transFile['LFN'],
                                             'Status': transFile['Status'],
                                             'AvailableOnSource': transFile['AvailableOnSource'],
                                             'AvailableOnTarget': transFile['AvailableOnTarget']})

    if setFilesUnused:
      self.setFileStatus(transID, setFilesUnused, 'Unused')

    if setFilesAssigned:
      self.setFileStatus(transID, setFilesAssigned, 'Assigned')

    return S_OK()

  def applyActions(self, transID, actions):
    """ sets new file statuses and resets requests """
    for action, transFiles in actions.iteritems():
      if action == SET_PROCESSED and transFiles:
        self.setFileStatus(transID, transFiles, 'Processed')

      if action == SET_DELETED and transFiles:
        self.setFileStatus(transID, transFiles, 'Deleted')

      if action == RETRY and transFiles:
        # if there is a request in RMS then reset request otherwise set file status unused
        self.retryFiles(transID, transFiles)

  def existsInFC(self, storageElements, lfns):
    """ checks if files have replicas registered in File Catalog for all given storageElements """
    res = self.fcClient.getReplicas(lfns)
    if not res['OK']:
      return res

    result = {}
    result['Successful'] = {}
    result['Failed'] = {}
    setOfSEs = set(storageElements)

    for lfn, msg in res['Value']['Failed'].iteritems():
      if msg == 'No such file or directory':
        result['Successful'][lfn] = False
      else:
        result['Failed'][lfn] = msg

    # check if all replicas are registered in FC
    filesFoundInFC = res['Value']['Successful']
    for lfn, replicas in filesFoundInFC.iteritems():
      result['Successful'][lfn] = setOfSEs.issubset(replicas.keys())

    return S_OK(result)

  def existsOnSE(self, storageElements, lfns):
    """ checks if the given files exist physically on a list of storage elements"""

    result = {}
    result['Failed'] = {}
    result['Successful'] = {}

    if not lfns:
      return S_OK(result)

    voName = lfns[0].split('/')[1]
    for se in storageElements:
      res = StorageElement(se, vo=voName).exists(lfns)
      if not res['OK']:
        return res
      for lfn, status in res['Value']['Successful'].iteritems():
        if lfn not in result['Successful']:
          result['Successful'][lfn] = status

        if not status:
          result['Successful'][lfn] = False

      result['Failed'][se] = res['Value']['Failed']

    return S_OK(result)

  def exists(self, storageElements, lfns):
    """ checks if files exists on both file catalog and storage elements """

    fcRes = self.existsInFC(storageElements, lfns)
    if not fcRes['OK']:
      self.logError('Failure to determine if files exists in File Catalog ', "%s" % fcRes['Message'])
      return fcRes

    if fcRes['Value']['Failed']:
      self.logError("Failed FileCatalog Response ", "%s" % fcRes['Value']['Failed'])

    # check if files found in file catalog also exist on SE
    checkLFNsOnStorage = [lfn for lfn in fcRes['Value']['Successful'] if fcRes['Value']['Successful'][lfn]]

    # no files were found in FC, return the result instead of verifying them on SE
    if not checkLFNsOnStorage:
      return fcRes

    seRes = self.existsOnSE(storageElements, checkLFNsOnStorage)
    if not seRes['OK']:
      self.logError('Failure to determine if files exist on SE ', "%s" % seRes['Message'])
      return seRes

    for se in storageElements:
      if seRes['Value']['Failed'][se]:
        self.logError('Failed to determine if files exist on SE ', "%s %s" % (se, seRes['Value']['Failed'][se]))
        return S_ERROR()

    fcResult = fcRes['Value']['Successful']
    seResult = seRes['Value']['Successful']
    for lfn in fcResult:
      if fcResult[lfn] and not seResult[lfn]:
        fcRes['Value']['Successful'][lfn] = False

    return fcRes

  def processTransformation(self, transID, sourceSE, targetSEs, transType):
    """ process transformation for a given transformation ID """

    actions = {}
    actions[SET_PROCESSED] = []
    actions[RETRY] = []
    actions[SET_DELETED] = []

    for status in self.transformationFileStatuses:
      res = self.tClient.getTransformationFiles(condDict={'TransformationID': transID, 'Status': status})
      if not res['OK']:
        errStr = 'Failure to get Transformation Files, Status: %s Transformation ID: %s Message: %s' % (status,
                                                                                                        transID,
                                                                                                        res['Message'])
        self.logError(errStr)
        continue

      transFiles = res['Value']
      if not transFiles:
        self.log.notice("No Transformation Files found with status %s for Transformation ID %d" % (status, transID))
        continue

      self.log.notice("Processing Transformation Files with status %s for TransformationID %d " % (status, transID))

      if status == 'Assigned':
        transFiles = filter(self.selectFailedRequests, transFiles)

      lfns = [transFile['LFN'] for transFile in transFiles]

      if not lfns:
        continue

      res = self.exists(sourceSE, lfns)
      if not res['OK']:
        continue

      resultSourceSe = res['Value']['Successful']

      res = self.exists(targetSEs, lfns)
      if not res['OK']:
        continue
      resultTargetSEs = res['Value']['Successful']

      for transFile in transFiles:
        lfn = transFile['LFN']
        transFile['AvailableOnSource'] = resultSourceSe[lfn]
        transFile['AvailableOnTarget'] = resultTargetSEs[lfn]

      checkFilesFuncName = "check_%s_files" % status.lower()
      checkFiles = getattr(self, checkFilesFuncName)
      checkFiles(actions, transFiles, transType)

    self.applyActions(transID, actions)
    self.sendNotification(transID, transType, sourceSE, targetSEs)

    return S_OK()
Esempio n. 12
0
class TransformationCleaningAgent(AgentModule):
    """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.shifterProxy = None

        # # transformation client
        self.transClient = None
        # # wms client
        self.wmsClient = None
        # # request client
        self.reqClient = None
        # # file catalog client
        self.metadataClient = None

        # # transformations types
        self.transformationTypes = None
        # # directory locations
        self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
        # # transformation metadata
        self.transfidmeta = 'TransformationID'
        # # archive periof in days
        self.archiveAfter = 7
        # # transformation log SEs
        self.logSE = 'LogSE'
        # # enable/disable execution
        self.enableFlag = 'True'

        self.dataProcTTypes = ['MCSimulation', 'Merge']
        self.dataManipTTypes = ['Replication', 'Removal']

    def initialize(self):
        """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
        # # shifter proxy
        # See cleanContent method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption('shifterProxy',
                                              self.shifterProxy)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            'Transformations/DataProcessing', self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            'Transformations/DataManipulation', self.dataManipTTypes)
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # transformation log SEs
        self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()

        return S_OK()

    #############################################################################
    def execute(self):
        """ execution in one agent's cycle

    :param self: self reference
    """

        self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
        if self.enableFlag != 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option EnableFlag'
            )
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Failed to get transformations", res['Message'])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime,
            timeStamp='LastUpdate')
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])
        return S_OK()

    def _executeClean(self, transDict):
        """Clean transformation."""
        # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # We just archive
        if transDict['Type'] in self.dataManipTTypes:
            res = self.archiveTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems archiving transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))
        else:
            res = self.cleanTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems cleaning transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))

    def _executeRemoval(self, transDict):
        """Remove files from given transformation."""
        res = self.removeTransformationOutput(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems removing transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

    def _executeArchive(self, transDict):
        """Archive the given transformation."""
        res = self.archiveTransformation(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems archiving transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.verbose(
            "Cleaning Transformation directories of transformation %d" %
            transID)
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                self.log.error("Failed to obtain transformation directories",
                               res['Message'])
                return res
            transDirectories = []
            if res['Value']:
                if not isinstance(res['Value'], list):
                    try:
                        transDirectories = ast.literal_eval(res['Value'])
                    except BaseException:
                        # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
                        transDirectories.append(res['Value'])
                else:
                    transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                self.log.error("Failed to obtain metadata catalog directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @classmethod
    def _addDirs(cls, transID, newDirs, existingDirs):
        """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
        for folder in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, str(folder)):
                if folder not in existingDirs:
                    existingDirs.append(os.path.normpath(folder))
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanContent(self, directory):
        """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
        self.log.verbose("Cleaning Catalog contents")
        res = self.__getCatalogDirectoryContents([directory])
        if not res['OK']:
            return res
        filesFound = res['Value']
        if not filesFound:
            self.log.info(
                "No files are registered in the catalog directory %s" %
                directory)
            return S_OK()
        self.log.info(
            "Attempting to remove %d possible remnants from the catalog and storage"
            % len(filesFound))

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(filesFound, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        realFailure = False
        for lfn, reason in res['Value']['Failed'].items():
            if "File does not exist" in str(reason):
                self.log.warn("File %s not found in some catalog: " % (lfn))
            else:
                self.log.error("Failed to remove file found in the catalog",
                               "%s %s" % (lfn, reason))
                realFailure = True
        if realFailure:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
        self.log.info('Obtaining the catalog contents for %d directories:' %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res['OK'] and 'Directory does not exist' in res[
                    'Message']:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res['OK']:
                if "No such file or directory" in res['Message']:
                    self.log.info("%s: %s" % (currentDir, res['Message']))
                else:
                    self.log.error("Failed to get directory %s content: %s" %
                                   (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        self.log.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())

    def cleanTransformationLogFiles(self, directory):
        """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
        self.log.verbose("Removing log files found in the directory %s" %
                         directory)
        res = returnSingleResult(
            StorageElement(self.logSE).removeDirectory(directory,
                                                       recursive=True))
        if not res['OK']:
            if cmpError(res, errno.ENOENT):  # No such file or directory
                self.log.warn("Transformation log directory does not exist",
                              directory)
                return S_OK()
            self.log.error("Failed to remove log files", res['Message'])
            return res
        self.log.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """ This just removes any mention of the output data from the catalog and storage """
        self.log.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        for directory in directories:
            if not re.search('/LOG/', directory):
                res = self.cleanContent(directory)
                if not res['OK']:
                    return res

        self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" %
                      (len(directories), transID))
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully removed output of transformation %d" %
                      transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'RemovedFiles')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to RemovedFiles" %
                      (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Archived')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Archived" %
                      (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
        self.log.info("Cleaning transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search('/LOG/', directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res['OK']:
                    return res
            res = self.cleanContent(directory)
            if not res['OK']:
                return res

        # Clean ALL the possible remnants found
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully cleaned transformation %d" % transID)
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Cleaned')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Cleaned" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Cleaned" %
                      (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """ wipe out files from catalog """
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res['OK']:
            return res
        fileToRemove = res['Value']
        if not fileToRemove:
            self.log.info('No files found for transID %s' % transID)
            return S_OK()

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(fileToRemove, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            self.log.error("Failed to remove file found in metadata catalog",
                           "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        self.log.info("Successfully removed all files found in the BK")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
        self.log.verbose("Cleaning Transformation tasks of transformation %d" %
                         transID)
        res = self.__getTransformationExternalIDs(transID)
        if not res['OK']:
            return res
        externalIDs = res['Value']
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ['Type'])
            if not res['OK']:
                self.log.error("Failed to determine transformation type")
                return res
            transType = res['Value']
            if transType in self.dataProcTTypes:
                res = self.__removeWMSTasks(externalIDs)
            else:
                res = self.__removeRequests(externalIDs)
            if not res['OK']:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
        res = self.transClient.getTransformationTasks(
            condDict={'TransformationID': transID})
        if not res['OK']:
            self.log.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res['Message'])
            return res
        externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
        self.log.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        """ This will remove requests from the RMS system -
    """
        rIDs = [int(long(j)) for j in requestIDs if long(j)]
        for reqID in rIDs:
            self.reqClient.cancelRequest(reqID)

        return S_OK()

    def __removeWMSTasks(self, transJobIDs):
        """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
        # Prevent 0 job IDs
        jobIDs = [int(j) for j in transJobIDs if int(j)]
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):

            res = self.wmsClient.killJob(jobList)
            if res['OK']:
                self.log.info("Successfully killed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to kill %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to kill %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

            res = self.wmsClient.deleteJob(jobList)
            if res['OK']:
                self.log.info("Successfully removed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to remove %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to remove %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

        if not allRemove:
            return S_ERROR("Failed to remove all remnants from WMS")
        self.log.info("Successfully removed all tasks from the WMS")

        if not jobIDs:
            self.log.info(
                "JobIDs not present, unable to remove asociated requests.")
            return S_OK()

        failed = 0
        failoverRequests = {}
        res = self.reqClient.getRequestIDsForJobs(jobIDs)
        if not res['OK']:
            self.log.error("Failed to get requestID for jobs.", res['Message'])
            return res
        failoverRequests.update(res['Value']['Successful'])
        if not failoverRequests:
            return S_OK()
        for jobID, requestID in res['Value']['Successful'].items():
            # Put this check just in case, tasks must have associated jobs
            if jobID == 0 or jobID == '0':
                continue
            res = self.reqClient.cancelRequest(requestID)
            if not res['OK']:
                self.log.error("Failed to remove request from RequestDB",
                               res['Message'])
                failed += 1
            else:
                self.log.verbose("Removed request %s associated to job %d." %
                                 (requestID, jobID))

        if failed:
            self.log.info("Successfully removed %s requests" %
                          (len(failoverRequests) - failed))
            self.log.info("Failed to remove %s requests" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        self.log.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Esempio n. 13
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                              ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                             orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                             orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                         orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author   = ""
    res = getProxyInfo()
    if res['OK']:
      author   = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " %res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                                     'AgentType', 'TransformationName',
                                                                                                     'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor   = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))
          return S_ERROR("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info("Will list transformations created by user '%s' with status '%s'" %(userName, ', '.join( transStatus )))
    else:
      gLogger.info("Will list transformations created by '%s' with status '%s'" %(authorDN, ', '.join( transStatus )))

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = []):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len(transID)
    paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                      'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                      'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                           'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map(lambda pname: paramValues[ paramNames.index(pname) ], paramShowNames)
          showDict = dict(zip( paramShowNamesShort, paramShowValues ))
          dictList.append( showDict )

      except Exception, x:
        print 'Exception %s ' %str(x)

    if not len(dictList) > 0:
      gLogger.error( 'No found transformations satisfying input condition')
      return S_ERROR( 'No found transformations satisfying input condition')
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )
Esempio n. 14
0
class TransformationCleaningAgent( AgentModule ):
  """
  .. class:: TransformationCleaningAgent

  :param DataManger dm: DataManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    # # data manager
    self.dm = None
    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
#     self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in self.dataManipTTypes:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )
  # FIXME If a classmethod, should it not have cls instead of self?
  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    """ append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    """ delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    """
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    """ wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    """
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )

    se = StorageElement( storageElement )

    res = se.getPfnForLfn( [directory] )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    if directory in res['Value']['Failed']:
      self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) )
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'][directory]

    res = returnSingleResult( se.exists( storageDirectory ) )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info( "No files are registered in the catalog directory %s" % directory )
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( filesFound, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str( reason ):
        self.log.warn( "File %s not found in some catalog: " % ( lfn ) )
      else:
        self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
        realFailure = True
    if realFailure:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = returnSingleResult( fc.listDirectory( currentDir ) )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info( "%s: %s" % ( currentDir, res['Message'] ) )
        else:
          self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( fileToRemove, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks( externalIDs )
      else:
        res = self.__removeRequests( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    """ This will remove requests from the (new) RMS system -

        #FIXME: if the old system is still installed, it won't remove anything!!!
        (we don't want to risk removing from the new RMS what is instead in the old)
    """
    # FIXME: checking if the old system is still installed!
    from DIRAC.ConfigurationSystem.Client import PathFinder
    if PathFinder.getServiceURL( "RequestManagement/RequestManager" ):
      self.log.warn( "NOT removing requests!!" )
      return S_OK()

    rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ]
    for requestName in rIDs:
      self.reqClient.deleteRequest( requestName )

    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    """ wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    failed = 0
    # FIXME: double request client: old/new -> only the new will survive sooner or later
    # this is the old
    try:
      res = RequestClient().getRequestForJobs( jobIDs )
      if not res['OK']:
        self.log.error( "Failed to get requestID for jobs.", res['Message'] )
        return res
      failoverRequests = res['Value']
      self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) )
      if not failoverRequests:
        return S_OK()
      for jobID, requestName in failoverRequests.items():
        # Put this check just in case, tasks must have associated jobs
        if jobID == 0 or jobID == '0':
          continue
        res = RequestClient().deleteRequest( requestName )
        if not res['OK']:
          self.log.error( "Failed to remove request from RequestDB", res['Message'] )
          failed += 1
        else:
          self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    except RuntimeError:
      failoverRequests = {}
      pass

    # FIXME: and this is the new
    res = self.reqClient.getRequestNamesForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests.update( res['Value']['Successful'] )
    if not failoverRequests:
      return S_OK()
    for jobID, requestName in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )


    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Esempio n. 15
0
class UpdateTransformationCounters( AgentModule ):
  """ This agent is doing what getTransformationSummaryWeb does, but can take the time it needs
  """
  def __init__( self, *args, **kwargs ):
    """ c'tor

    :param self: self reference
    :param str agentName: name of agent
    :param bool baseAgentName: whatever
    :param dict properties: whatever else
    """
    AgentModule.__init__( self, *args, **kwargs )

    self.transClient = TransformationClient()
    self.transfStatuses = self.am_getOption( 'TransformationStatuses', ['Active', 'Stopped'] )

  def initialize( self ):
    ''' Make the necessary initializations
    '''
    gMonitor.registerActivity( "Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM )
    return S_OK()

  def execute( self ):
    ''' Main execution method
    '''

    gMonitor.addMark( 'Iteration', 1 )
    # Get all the transformations
    result = self.transClient.getTransformations( condDict = {'Status': self.transfStatuses }, timeout = 320 )
    if not result['OK']:
      gLogger.error( "UpdateTransformationCounters.execute: Failed to get transformations.", result['Message'] )
      return S_OK()
    # Process each transformation
    jobsStates = self.transClient.getTransformationCountersStatuses( 'Tasks' )['Value']
    filesStates = self.transClient.getTransformationCountersStatuses( 'Files' )['Value']

    for transDict in result['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.debug( "Looking at transformationID %d" % transID )
      counterDict = {}
      counterDict['TransformationID'] = transID

      #Take care of the Tasks' states
      gLogger.verbose( "Getting the tasks stats for Transformation %s" % transID )
      res = self.transClient.getTransformationTaskStats( transID )
      if not res['OK']:
        gLogger.warn( "Could not get Transformation Task Stats for transformation %s : %s" % ( transID,
                                                                                               res['Message'] ) )
        break
      else:
        taskDict = {}
        if res['Value']:
          taskDict = res['Value']
          gLogger.verbose( "Got %s tasks dict for transformation %s" % ( str( taskDict ), transID ) )
          for state in jobsStates:
            counterDict[state] = taskDict.get( state, 0 )
        else:
          gLogger.warn( "No Task Statuses found" )
          break

      #Now look for the files' states  
      gLogger.verbose( "Getting the files stats for Transformation %s" % transID )
      res = self.transClient.getTransformationStats( transID )
      if not res['OK']:
        gLogger.warn( "Could not get Transformation Stats for transformation %s : %s" % ( transID,
                                                                                          res['Message'] ) )
        break
      else:
        fileDict = {}
        if res['Value']:
          fileDict = res['Value']
          gLogger.debug( "Got %s file dict for transformation %s" % ( str( fileDict ), transID ) )
          for state in filesStates:
            counterDict[state] = fileDict.get( state, 0 )
        else:
          gLogger.warn( "No File Statuses found" )
          break

      gLogger.verbose( "Updating the counters for transformation %s" % transID )
      res = self.transClient.updateTransformationCounters( counterDict )
      if not res['OK']:
        gLogger.error( "Failed updating counters for transformation %s: %s" % ( transID, res['Message'] ) )
      else:
        gLogger.verbose( "Updated the counters of transformation %s" % transID )

    return S_OK()
Esempio n. 16
0
class TransformationCleaningAgent(AgentModule):

    #############################################################################
    def initialize(self):
        """Sets defaults """
        self.replicaManager = ReplicaManager()
        self.transClient = TransformationClient()
        self.wmsClient = WMSClient()
        self.requestClient = RequestClient()
        self.metadataClient = FileCatalogClient()
        self.storageUsageClient = StorageUsageClient()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        self.transformationTypes = sortList(
            self.am_getOption('TransformationTypes', [
                'MCSimulation', 'DataReconstruction', 'DataStripping',
                'MCStripping', 'Merge', 'Replication'
            ]))
        gLogger.info("Will consider the following transformation types: %s" %
                     str(self.transformationTypes))
        self.directoryLocations = sortList(
            self.am_getOption(
                'DirectoryLocations',
                ['TransformationDB', 'StorageUsage', 'MetadataCatalog']))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        self.archiveAfter = self.am_getOption('ArchiveAfter', 7)  # days
        gLogger.info("Will archive Completed transformations after %d days" %
                     self.archiveAfter)
        self.activeStorages = sortList(self.am_getOption('ActiveSEs', []))
        gLogger.info("Will check the following storage elements: %s" %
                     str(self.activeStorages))
        self.logSE = self.am_getOption('TransformationLogSE', 'LogSE')
        gLogger.info("Will remove logs found on storage element: %s" %
                     self.logSE)
        return S_OK()

    #############################################################################
    def execute(self):
        """ The TransformationCleaningAgent execution method.
    """
        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag'
                % (self.section))
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                self.cleanTransformation(transDict['TransformationID'])

        # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                self.removeTransformationOutput(transDict['TransformationID'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime)
        if res['OK']:
            for transDict in res['Value']:
                self.archiveTransformation(transDict['TransformationID'])

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ Get the directories for the supplied transformation from the transformation system """
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                gLogger.error("Failed to obtain transformation directories",
                              res['Message'])
                return res
            transDirectories = res['Value'].splitlines()
            directories = self.__addDirs(transID, transDirectories,
                                         directories)

        if 'StorageUsage' in self.directoryLocations:
            res = self.storageUsageClient.getStorageDirectories(
                '', '', transID, [])
            if not res['OK']:
                gLogger.error("Failed to obtain storage usage directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self.__addDirs(transID, transDirectories,
                                         directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                gLogger.error("Failed to obtain metadata catalog directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self.__addDirs(transID, transDirectories,
                                         directories)
        if not directories:
            gLogger.info("No output directories found")
        directories = sortList(directories)
        return S_OK(directories)

    def __addDirs(self, transID, newDirs, existingDirs):
        for dir in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, dir):
                if not dir in existingDirs:
                    existingDirs.append(dir)
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanStorageContents(self, directory):
        for storageElement in self.activeStorages:
            res = self.__removeStorageDirectory(directory, storageElement)
            if not res['OK']:
                return res
        return S_OK()

    def __removeStorageDirectory(self, directory, storageElement):
        gLogger.info('Removing the contents of %s at %s' %
                     (directory, storageElement))
        res = self.replicaManager.getPfnForLfn([directory], storageElement)
        if not res['OK']:
            gLogger.error("Failed to get PFN for directory", res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to obtain directory PFN from LFN',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to obtain directory PFN from LFNs')
        storageDirectory = res['Value']['Successful'].values()[0]
        res = self.replicaManager.getStorageFileExists(storageDirectory,
                                                       storageElement,
                                                       singleFile=True)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directory",
                          res['Message'])
            return res
        exists = res['Value']
        if not exists:
            gLogger.info("The directory %s does not exist at %s " %
                         (directory, storageElement))
            return S_OK()
        res = self.replicaManager.removeStorageDirectory(storageDirectory,
                                                         storageElement,
                                                         recursive=True,
                                                         singleDirectory=True)
        if not res['OK']:
            gLogger.error("Failed to remove storage directory", res['Message'])
            return res
        gLogger.info("Successfully removed %d files from %s at %s" %
                     (res['Value']['FilesRemoved'], directory, storageElement))
        return S_OK()

    def cleanCatalogContents(self, directory):
        res = self.__getCatalogDirectoryContents([directory])
        if not res['OK']:
            return res
        filesFound = res['Value']
        if not filesFound:
            return S_OK()
        gLogger.info(
            "Attempting to remove %d possible remnants from the catalog and storage"
            % len(filesFound))
        res = self.replicaManager.removeFile(filesFound)
        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            gLogger.error("Failed to remove file found in the catalog",
                          "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        gLogger.info('Obtaining the catalog contents for %d directories:' %
                     len(directories))
        for directory in directories:
            gLogger.info(directory)
        activeDirs = directories
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.replicaManager.getCatalogListDirectory(currentDir,
                                                              singleFile=True)
            activeDirs.remove(currentDir)
            if not res['OK'] and res['Message'].endswith(
                    'The supplied path does not exist'):
                gLogger.info("The supplied directory %s does not exist" %
                             currentDir)
            elif not res['OK']:
                gLogger.error('Failed to get directory contents',
                              '%s %s' % (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        gLogger.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())

    def cleanTransformationLogFiles(self, directory):
        gLogger.info("Removing log files found in the directory %s" %
                     directory)
        res = self.replicaManager.removeStorageDirectory(directory,
                                                         self.logSE,
                                                         singleDirectory=True)
        if not res['OK']:
            gLogger.error("Failed to remove log files", res['Message'])
            return res
        gLogger.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """ This just removes any mention of the output data from the catalog and storage """
        gLogger.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            gLogger.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        for directory in directories:
            if not re.search('/LOG/', directory):
                res = self.cleanCatalogContents(directory)
                if not res['OK']:
                    return res
                res = self.cleanStorageContents(directory)
                if not res['OK']:
                    return res
        gLogger.info(
            "Removed directories in the catalog and storage for transformation"
        )
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID, directories)
        if not res['OK']:
            return res
        gLogger.info("Successfully removed output of transformation %d" %
                     transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'RemovedFiles')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to RemovedFiles" %
                     (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """ This just removes job from the jobDB and the transformation DB """
        gLogger.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        gLogger.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Archived')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to Archived" %
                     (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """ This removes any mention of the supplied transformation 
    """
        gLogger.info("Cleaning transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            gLogger.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search('/LOG/', directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res['OK']:
                    return res
            res = self.cleanCatalogContents(directory)
            if not res['OK']:
                return res
            res = self.cleanStorageContents(directory)
            if not res['OK']:
                return res
        # Clean ALL the possible remnants found in the BK
        res = self.cleanMetadataCatalogFiles(transID, directories)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        gLogger.info("Successfully cleaned transformation %d" % transID)
        # Change the status of the transformation to deleted
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Deleted')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to Deleted" %
                (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to Deleted" %
                     (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID, directories):
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res['OK']:
            return res
        fileToRemove = res['Value']
        if not len(fileToRemove):
            gLogger.info('No files found for transID %s' % transID)
            return S_OK()
        res = self.replicaManager.removeFile(fileToRemove)
        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            gLogger.error("Failed to remove file found in metadata catalog",
                          "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        gLogger.info("Successfully removed all files found in the BK")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        res = self.__getTransformationExternalIDs(transID)
        if not res['OK']:
            return res
        externalIDs = res['Value']
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ['Type'])
            if not res['OK']:
                gLogger.error("Failed to determine transformation type")
                return res
            transType = res['Value']
            if transType == 'Replication':
                res = self.__removeRequests(externalIDs)
            else:
                res = self.__removeWMSTasks(externalIDs)
            if not res['OK']:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        res = self.transClient.getTransformationTasks(
            condDict={'TransformationID': transID})
        if not res['OK']:
            gLogger.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res['Message'])
            return res
        externalIDs = []
        for taskDict in res['Value']:
            externalIDs.append(taskDict['ExternalID'])
        gLogger.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        gLogger.error("Not removing requests but should do")
        return S_OK()

    def __removeWMSTasks(self, jobIDs):
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):
            res = self.wmsClient.deleteJob(jobList)
            if res['OK']:
                gLogger.info("Successfully removed %d jobs from WMS" %
                             len(jobList))
            elif (res.has_key('InvalidJobIDs')) and (
                    not res.has_key('NonauthorizedJobIDs')) and (
                        not res.has_key('FailedJobIDs')):
                gLogger.info("Found %s jobs which did not exist in the WMS" %
                             len(res['InvalidJobIDs']))
            elif res.has_key('NonauthorizedJobIDs'):
                gLogger.error(
                    "Failed to remove %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif res.has_key('FailedJobIDs'):
                gLogger.error("Failed to remove %s jobs" %
                              len(res['FailedJobIDs']))
                allRemove = False
        if not allRemove:
            return S_ERROR("Failed to remove all remnants from WMS")
        gLogger.info("Successfully removed all tasks from the WMS")
        res = self.requestClient.getRequestForJobs(jobIDs)
        if not res['OK']:
            gLogger.error("Failed to get requestID for jobs.", res['Message'])
            return res
        failoverRequests = res['Value']
        gLogger.info("Found %d jobs with associated failover requests" %
                     len(failoverRequests))
        if not failoverRequests:
            return S_OK()
        failed = 0
        for jobID, requestName in failoverRequests.items():
            res = self.requestClient.deleteRequest(requestName)
            if not res['OK']:
                gLogger.error("Failed to remove request from RequestDB",
                              res['Message'])
                failed += 1
            else:
                gLogger.verbose("Removed request %s associated to job %d." %
                                (requestName, jobID))
        if failed:
            gLogger.info("Successfully removed %s requests" %
                         (len(failoverRequests) - failed))
            gLogger.info("Failed to remove %s requests" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        gLogger.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Esempio n. 17
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
  """ Usually subclass of AgentModule
  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)
    TransformationAgentsUtilities.__init__(self)

    # few parameters
    self.pluginLocation = ''
    self.transformationStatus = []
    self.maxFiles = 0
    self.transformationTypes = []

    # clients (out of the threads)
    self.transfClient = None

    # parameters for the threading
    self.transQueue = Queue.Queue()
    self.transInQueue = []

    # parameters for caching
    self.workDirectory = ''
    self.cacheFile = ''
    self.controlDirectory = ''

    self.lastFileOffset = {}
    # Validity of the cache
    self.replicaCache = None
    self.replicaCacheValidity = None
    self.writingCache = False
    self.removedFromCache = 0

    self.noUnusedDelay = 0
    self.unusedFiles = {}
    self.unusedTimeStamp = {}

    self.debug = False
    self.transInThread = {}
    self.pluginTimeout = {}

  def initialize(self):
    """ standard initialize
    """
    # few parameters
    self.pluginLocation = self.am_getOption('PluginLocation',
                                            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
    self.transformationStatus = self.am_getOption('transformationStatus', ['Active', 'Completing', 'Flush'])
    # Prepare to change the name of the CS option as MaxFiles is ambiguous
    self.maxFiles = self.am_getOption('MaxFilesToProcess', self.am_getOption('MaxFiles', 5000))

    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      dataProc = Operations().getValue('Transformations/DataProcessing', ['MCSimulation', 'Merge'])
      dataManip = Operations().getValue('Transformations/DataManipulation', ['Replication', 'Removal'])
      self.transformationTypes = sorted(dataProc + dataManip)

    # clients
    self.transfClient = TransformationClient()

    # for caching using a pickle file
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
    self.controlDirectory = self.am_getControlDirectory()

    # remember the offset if any in TS
    self.lastFileOffset = {}

    # Validity of the cache
    self.replicaCache = {}
    self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity', 2)

    self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

    # Get it threaded
    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
    self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    self.log.info("Will treat the following transformation types: %s" % str(self.transformationTypes))

    return S_OK()

  def finalize(self):
    """ graceful finalization
    """
    method = 'finalize'
    if self.transInQueue:
      self.transInQueue = []
      self._logInfo("Wait for threads to get empty before terminating the agent (%d tasks)" %
                    len(self.transInThread), method=method)
      self._logInfo('Remaining transformations:',
                    ','.join(str(transID) for transID in self.transInThread), method=method)
      while self.transInThread:
        time.sleep(2)
      self._logInfo("Threads are empty, terminating the agent...", method=method)
    self.__writeCache()
    return S_OK()

  def execute(self):
    """ Just puts transformations in the queue
    """
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      self._logError("Failed to obtain transformations:", res['Message'])
      return S_OK()
    # Process the transformations
    count = 0
    for transDict in res['Value']:
      transID = long(transDict['TransformationID'])
      if transDict.get('InheritedFrom'):
        # Try and move datasets from the ancestor production
        res = self.transfClient.moveFilesToDerivedTransformation(transDict)
        if not res['OK']:
          self._logError("Error moving files from an inherited transformation", res['Message'], transID=transID)
        else:
          parentProd, movedFiles = res['Value']
          if movedFiles:
            self._logInfo("Successfully moved files from %d to %d:" % (parentProd, transID), transID=transID)
            for status, val in movedFiles.iteritems():
              self._logInfo("\t%d files to status %s" % (val, status), transID=transID)
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append(transID)
        self.transQueue.put(transDict)
    self._logInfo("Out of %d transformations, %d put in thread queue" % (len(res['Value']), count))
    return S_OK()

  def getTransformations(self):
    """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
    transName = self.am_getOption('Transformation', 'All')
    method = 'getTransformations'
    if transName == 'All':
      self._logInfo("Getting all transformations%s, status %s." %
                    (' of type %s' % str(self.transformationTypes) if self.transformationTypes else '',
                     str(self.transformationStatus)),
                    method=method)
      transfDict = {'Status': self.transformationStatus}
      if self.transformationTypes:
        transfDict['Type'] = self.transformationTypes
      res = self.transfClient.getTransformations(transfDict, extraParams=True)
      if not res['OK']:
        return res
      transformations = res['Value']
      self._logInfo("Obtained %d transformations to process" % len(transformations), method=method)
    else:
      self._logInfo("Getting transformation %s." % transName, method=method)
      res = self.transfClient.getTransformation(transName, extraParams=True)
      if not res['OK']:
        self._logError("Failed to get transformation:", res['Message'], method=method)
        return res
      transformations = [res['Value']]
    return S_OK(transformations)

  def _getClients(self):
    """ returns the clients used in the threads
    """
    threadTransformationClient = TransformationClient()
    threadDataManager = DataManager()

    return {'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager}

  def _execute(self, threadID):
    """ thread - does the real job: processing the transformations to be processed
    """

    # Each thread will have its own clients
    clients = self._getClients()

    while True:
      transDict = self.transQueue.get()
      try:
        transID = long(transDict['TransformationID'])
        if transID not in self.transInQueue:
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID))
        self._logInfo("Processing transformation %s." % transID, transID=transID)
        startTime = time.time()
        res = self.processTransformation(transDict, clients)
        if not res['OK']:
          self._logInfo("Failed to process transformation:", res['Message'], transID=transID)
      except Exception as x:  # pylint: disable=broad-except
        self._logException('Exception in plugin', lException=x, transID=transID)
      finally:
        if not transID:
          transID = 'None'
        self._logInfo("Processed transformation in %.1f seconds" % (time.time() - startTime), transID=transID)
        if transID in self.transInQueue:
          self.transInQueue.remove(transID)
        self.transInThread.pop(transID, None)
        self._logVerbose("%d transformations still in queue" % len(self.transInQueue))
    return S_OK()

  def processTransformation(self, transDict, clients):
    """ process a single transformation (in transDict)
    """
    method = 'processTransformation'
    transID = transDict['TransformationID']
    forJobs = transDict['Type'].lower() not in ('replication', 'removal')

    # First get the LFNs associated to the transformation
    transFiles = self._getTransformationFiles(transDict, clients, replicateOrRemove=not forJobs)
    if not transFiles['OK']:
      return transFiles
    if not transFiles['Value']:
      return S_OK()

    if transID not in self.replicaCache:
      self.__readCache(transID)
    transFiles = transFiles['Value']
    unusedLfns = [f['LFN'] for f in transFiles]
    unusedFiles = len(unusedLfns)

    plugin = transDict.get('Plugin', 'Standard')
    # Limit the number of LFNs to be considered for replication or removal as they are treated individually
    if not forJobs:
      maxFiles = Operations().getValue('TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
      # Get plugin-specific limit in number of files (0 means no limit)
      totLfns = len(unusedLfns)
      lfnsToProcess = self.__applyReduction(unusedLfns, maxFiles=maxFiles)
      if len(lfnsToProcess) != totLfns:
        self._logInfo("Reduced number of files from %d to %d" % (totLfns, len(lfnsToProcess)),
                      method=method, transID=transID)
        transFiles = [f for f in transFiles if f['LFN'] in lfnsToProcess]
    else:
      lfnsToProcess = unusedLfns

    # Check the data is available with replicas
    res = self.__getDataReplicas(transDict, lfnsToProcess, clients, forJobs=forJobs)
    if not res['OK']:
      self._logError("Failed to get data replicas:", res['Message'],
                     method=method, transID=transID)
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    self._logInfo("Processing transformation with '%s' plug-in." % plugin,
                  method=method, transID=transID)
    res = self.__generatePluginObject(plugin, clients)
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters(transDict)
    oPlugin.setInputData(dataReplicas)
    oPlugin.setTransformationFiles(transFiles)
    res = oPlugin.run()
    if not res['OK']:
      self._logError("Failed to generate tasks for transformation:", res['Message'],
                     method=method, transID=transID)
      return res
    tasks = res['Value']
    self.pluginTimeout[transID] = res.get('Timeout', False)
    # Create the tasks
    allCreated = True
    created = 0
    lfnsInTasks = []
    for se, lfns in tasks:
      res = clients['TransformationClient'].addTaskForTransformation(transID, lfns, se)
      if not res['OK']:
        self._logError("Failed to add task generated by plug-in:", res['Message'],
                       method=method, transID=transID)
        allCreated = False
      else:
        created += 1
        lfnsInTasks += [lfn for lfn in lfns if lfn in lfnsToProcess]
    if created:
      self._logInfo("Successfully created %d tasks for transformation." % created,
                    method=method, transID=transID)
    else:
      self._logInfo("No new tasks created for transformation.",
                    method=method, transID=transID)
    self.unusedFiles[transID] = unusedFiles - len(lfnsInTasks)
    # If not all files were obtained, move the offset
    lastOffset = self.lastFileOffset.get(transID)
    if lastOffset:
      self.lastFileOffset[transID] = max(0, lastOffset - len(lfnsInTasks))
    self.__removeFilesFromCache(transID, lfnsInTasks)

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = clients['TransformationClient'].setTransformationParameter(transID, 'Status', 'Active')
      if not res['OK']:
        self._logError("Failed to update transformation status to 'Active':", res['Message'],
                       method=method, transID=transID)
      else:
        self._logInfo("Updated transformation status to 'Active'.",
                      method=method, transID=transID)
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def _getTransformationFiles(self, transDict, clients, statusList=None, replicateOrRemove=False):
    """ get the data replicas for a certain transID
    """
    # By default, don't skip if no new Unused for DM transformations
    skipIfNoNewUnused = not replicateOrRemove
    transID = transDict['TransformationID']
    plugin = transDict.get('Plugin', 'Standard')
    # Check if files should be sorted and limited in number
    operations = Operations()
    sortedBy = operations.getValue('TransformationPlugins/%s/SortedBy' % plugin, None)
    maxFiles = operations.getValue('TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
    # If the NoUnuse delay is explicitly set, we want to take it into account, and skip if no new Unused
    if operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, 0):
      skipIfNoNewUnused = True
    noUnusedDelay = 0 if self.pluginTimeout.get(transID, False) else \
        operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, self.noUnusedDelay)
    method = '_getTransformationFiles'
    lastOffset = self.lastFileOffset.setdefault(transID, 0)

    # Files that were problematic (either explicit or because SE was banned) may be recovered,
    # and always removing the missing ones
    if not statusList:
      statusList = ['Unused', 'ProbInFC']
    statusList += ['MissingInFC'] if transDict['Type'] == 'Removal' else []
    transClient = clients['TransformationClient']
    res = transClient.getTransformationFiles(condDict={'TransformationID': transID,
                                                       'Status': statusList},
                                             orderAttribute=sortedBy,
                                             offset=lastOffset, maxfiles=maxFiles)
    if not res['OK']:
      self._logError("Failed to obtain input data:", res['Message'],
                     method=method, transID=transID)
      return res
    transFiles = res['Value']
    if maxFiles and len(transFiles) == maxFiles:
      self.lastFileOffset[transID] += maxFiles
    else:
      del self.lastFileOffset[transID]

    if not transFiles:
      self._logInfo("No '%s' files found for transformation." % ','.join(statusList),
                    method=method, transID=transID)
      if transDict['Status'] == 'Flush':
        res = transClient.setTransformationParameter(transID, 'Status', 'Active')
        if not res['OK']:
          self._logError("Failed to update transformation status to 'Active':", res['Message'],
                         method=method, transID=transID)
        else:
          self._logInfo("Updated transformation status to 'Active'.",
                        method=method, transID=transID)
      return S_OK()
    # Check if transformation is kicked
    kickFile = os.path.join(self.controlDirectory, 'KickTransformation_%s' % str(transID))
    try:
      kickTrans = os.path.exists(kickFile)
      if kickTrans:
        os.remove(kickFile)
    except OSError:
      pass

    # Check if something new happened
    now = datetime.datetime.utcnow()
    if not kickTrans and skipIfNoNewUnused and noUnusedDelay:
      nextStamp = self.unusedTimeStamp.setdefault(transID, now) + datetime.timedelta(hours=noUnusedDelay)
      skip = now < nextStamp
      if len(transFiles) == self.unusedFiles.get(transID, 0) and transDict['Status'] != 'Flush' and skip:
        self._logInfo("No new '%s' files found for transformation." % ','.join(statusList),
                      method=method, transID=transID)
        return S_OK()

    self.unusedTimeStamp[transID] = now
    # If files are not Unused, set them Unused
    notUnused = [trFile['LFN'] for trFile in transFiles if trFile['Status'] != 'Unused']
    otherStatuses = sorted(set([trFile['Status'] for trFile in transFiles]) - set(['Unused']))
    if notUnused:
      res = transClient.setFileStatusForTransformation(transID, 'Unused', notUnused, force=True)
      if not res['OK']:
        self._logError("Error setting %d files Unused:" % len(notUnused), res['Message'],
                       method=method, transID=transID)
      else:
        self._logInfo("Set %d files from %s to Unused" % (len(notUnused), ','.join(otherStatuses)))
        self.__removeFilesFromCache(transID, notUnused)
    return S_OK(transFiles)

  def __applyReduction(self, lfns, maxFiles=None):
    """ eventually remove the number of files to be considered
    """
    if maxFiles is None:
      maxFiles = self.maxFiles
    if not maxFiles or len(lfns) <= maxFiles:
      return lfns
    return randomize(lfns)[:maxFiles]

  def __getDataReplicas(self, transDict, lfns, clients, forJobs=True):
    """ Get the replicas for the LFNs and check their statuses. It first looks within the cache.
    """
    method = '__getDataReplicas'
    transID = transDict['TransformationID']
    if 'RemoveFile' in transDict['Body']:
      # When removing files, we don't care about their replicas
      return S_OK(dict.fromkeys(lfns, ['None']))
    clearCacheFile = os.path.join(self.controlDirectory, 'ClearCache_%s' % str(transID))
    try:
      clearCache = os.path.exists(clearCacheFile)
      if clearCache:
        os.remove(clearCacheFile)
    except:
      pass
    if clearCache or transDict['Status'] == 'Flush':
      self._logInfo("Replica cache cleared", method=method, transID=transID)
      # We may need to get new replicas
      self.__clearCacheForTrans(transID)
    else:
      # If the cache needs to be cleaned
      self.__cleanCache(transID)
    startTime = time.time()
    dataReplicas = {}
    nLfns = len(lfns)
    self._logVerbose("Getting replicas for %d files" % nLfns, method=method, transID=transID)
    cachedReplicaSets = self.replicaCache.get(transID, {})
    cachedReplicas = {}
    # Merge all sets of replicas
    for replicas in cachedReplicaSets.itervalues():
      cachedReplicas.update(replicas)
    self._logInfo("Number of cached replicas: %d" % len(cachedReplicas), method=method, transID=transID)
    setCached = set(cachedReplicas)
    setLfns = set(lfns)
    for lfn in setLfns & setCached:
      dataReplicas[lfn] = cachedReplicas[lfn]
    newLFNs = setLfns - setCached
    self._logInfo("ReplicaCache hit for %d out of %d LFNs" % (len(dataReplicas), nLfns),
                  method=method, transID=transID)
    if newLFNs:
      startTime = time.time()
      self._logInfo("Getting replicas for %d files from catalog" % len(newLFNs),
                    method=method, transID=transID)
      newReplicas = {}
      for chunk in breakListIntoChunks(newLFNs, 10000):
        res = self._getDataReplicasDM(transID, chunk, clients, forJobs=forJobs)
        if res['OK']:
          reps = dict((lfn, ses) for lfn, ses in res['Value'].iteritems() if ses)
          newReplicas.update(reps)
          self.__updateCache(transID, reps)
        else:
          self._logWarn("Failed to get replicas for %d files" % len(chunk), res['Message'],
                        method=method, transID=transID)

      self._logInfo("Obtained %d replicas from catalog in %.1f seconds"
                    % (len(newReplicas), time.time() - startTime),
                    method=method, transID=transID)
      dataReplicas.update(newReplicas)
      noReplicas = newLFNs - set(dataReplicas)
      self.__writeCache(transID)
      if noReplicas:
        self._logWarn("Found %d files without replicas (or only in Failover)" % len(noReplicas),
                      method=method, transID=transID)
    return S_OK(dataReplicas)

  def _getDataReplicasDM(self, transID, lfns, clients, forJobs=True, ignoreMissing=False):
    """ Get the replicas for the LFNs and check their statuses, using the replica manager
    """
    method = '_getDataReplicasDM'

    startTime = time.time()
    self._logVerbose("Getting replicas%s from catalog for %d files" % (' for jobs' if forJobs else '', len(lfns)),
                     method=method, transID=transID)
    if forJobs:
      # Get only replicas eligible for jobs
      res = clients['DataManager'].getReplicasForJobs(lfns, getUrl=False)
    else:
      # Get all replicas
      res = clients['DataManager'].getReplicas(lfns, getUrl=False)
    if not res['OK']:
      return res
    replicas = res['Value']
    # Prepare a dictionary for all LFNs
    dataReplicas = {}
    self._logVerbose("Replica results for %d files obtained in %.2f seconds" %
                     (len(lfns), time.time() - startTime),
                     method=method, transID=transID)
    # If files are neither Successful nor Failed, they are set problematic in the FC
    problematicLfns = [lfn for lfn in lfns if lfn not in replicas['Successful'] and lfn not in replicas['Failed']]
    if problematicLfns:
      self._logInfo("%d files found problematic in the catalog, set ProbInFC" % len(problematicLfns))
      res = clients['TransformationClient'].setFileStatusForTransformation(transID, 'ProbInFC', problematicLfns)
      if not res['OK']:
        self._logError("Failed to update status of problematic files:", res['Message'],
                       method=method, transID=transID)
    # Create a dictionary containing all the file replicas
    failoverLfns = []
    for lfn, replicaDict in replicas['Successful'].iteritems():
      for se in replicaDict:
        # This sremains here for backward compatibility in case VOs have not defined SEs not to be used for jobs
        if forJobs and 'failover' in se.lower():
          self._logVerbose("Ignoring failover replica for %s." % lfn, method=method, transID=transID)
        else:
          dataReplicas.setdefault(lfn, []).append(se)
      if not dataReplicas.get(lfn):
        failoverLfns.append(lfn)
    if failoverLfns:
      self._logVerbose("%d files have no replica but possibly in Failover SE" % len(failoverLfns))
    # Make sure that file missing from the catalog are marked in the transformation DB.
    missingLfns = []
    for lfn, reason in replicas['Failed'].iteritems():
      if "No such file or directory" in reason:
        self._logVerbose("%s not found in the catalog." % lfn, method=method, transID=transID)
        missingLfns.append(lfn)
    if missingLfns:
      self._logInfo("%d files not found in the catalog" % len(missingLfns))
      if ignoreMissing:
        dataReplicas.update(dict.fromkeys(missingLfns, []))
      else:
        res = clients['TransformationClient'].setFileStatusForTransformation(transID, 'MissingInFC', missingLfns)
        if not res['OK']:
          self._logError("Failed to update status of missing files:", res['Message'],
                         method=method, transID=transID)
    return S_OK(dataReplicas)

  def __updateCache(self, transID, newReplicas):
    """ Add replicas to the cache
    """
    self.replicaCache.setdefault(transID, {})[datetime.datetime.utcnow()] = newReplicas
#    if len( newReplicas ) > 5000:
#      self.__writeCache( transID )

  def __clearCacheForTrans(self, transID):
    """ Remove all replicas for a transformation
    """
    self.replicaCache.pop(transID, None)

  def __cleanReplicas(self, transID, lfns):
    """ Remove cached replicas that are not in a list
    """
    cachedReplicas = set()
    for replicas in self.replicaCache.get(transID, {}).itervalues():
      cachedReplicas.update(replicas)
    toRemove = cachedReplicas - set(lfns)
    if toRemove:
      self._logInfo("Remove %d files from cache" % len(toRemove), method='__cleanReplicas', transID=transID)
      self.__removeFromCache(transID, toRemove)

  def __cleanCache(self, transID):
    """ Cleans the cache
    """
    try:
      if transID in self.replicaCache:
        timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=self.replicaCacheValidity)
        for updateTime in set(self.replicaCache[transID]):
          nCache = len(self.replicaCache[transID][updateTime])
          if updateTime < timeLimit or not nCache:
            self._logInfo("Clear %s replicas for transformation %s, time %s" %
                          ('%d cached' % nCache if nCache else 'empty cache', str(transID), str(updateTime)),
                          transID=transID, method='__cleanCache')
            del self.replicaCache[transID][updateTime]
        # Remove empty transformations
        if not self.replicaCache[transID]:
          del self.replicaCache[transID]
    except Exception as x:
      self._logException("Exception when cleaning replica cache:", lException=x)

  def __removeFilesFromCache(self, transID, lfns):
    removed = self.__removeFromCache(transID, lfns)
    if removed:
      self._logInfo("Removed %d replicas from cache" % removed, method='__removeFilesFromCache', transID=transID)
      self.__writeCache(transID)

  def __removeFromCache(self, transID, lfns):
    if transID not in self.replicaCache:
      return
    removed = 0
    if self.replicaCache[transID] and lfns:
      for lfn in lfns:
        for timeKey in self.replicaCache[transID]:
          if self.replicaCache[transID][timeKey].pop(lfn, None):
            removed += 1
    return removed

  def __cacheFile(self, transID):
    return self.cacheFile.replace('.pkl', '_%s.pkl' % str(transID))

  @gSynchro
  def __readCache(self, transID):
    """ Reads from the cache
    """
    if transID in self.replicaCache:
      return
    try:
      method = '__readCache'
      fileName = self.__cacheFile(transID)
      if not os.path.exists(fileName):
        self.replicaCache[transID] = {}
      else:
        with open(fileName, 'r') as cacheFile:
          self.replicaCache[transID] = pickle.load(cacheFile)
        self._logInfo("Successfully loaded replica cache from file %s (%d files)" %
                      (fileName, self.__filesInCache(transID)),
                      method=method, transID=transID)
    except Exception as x:
      self._logException("Failed to load replica cache from file %s" % fileName, lException=x,
                         method=method, transID=transID)
      self.replicaCache[transID] = {}

  def __filesInCache(self, transID):
    cache = self.replicaCache.get(transID, {})
    return sum(len(lfns) for lfns in cache.itervalues())

  @gSynchro
  def __writeCache(self, transID=None):
    """ Writes the cache
    """
    method = '__writeCache'
    try:
      startTime = time.time()
      transList = [transID] if transID else set(self.replicaCache)
      filesInCache = 0
      nCache = 0
      for t_id in transList:
        # Protect the copy of the cache
        filesInCache += self.__filesInCache(t_id)
        # write to a temporary file in order to avoid corrupted files
        cacheFile = self.__cacheFile(t_id)
        tmpFile = cacheFile + '.tmp'
        with open(tmpFile, 'w') as fd:
          pickle.dump(self.replicaCache.get(t_id, {}), fd)
        # Now rename the file as it shold
        os.rename(tmpFile, cacheFile)
        nCache += 1
      self._logInfo("Successfully wrote %d replica cache file(s) (%d files) in %.1f seconds"
                    % (nCache, filesInCache, time.time() - startTime),
                    method=method, transID=transID if transID else None)
    except Exception as x:
      self._logException("Could not write replica cache file %s" % cacheFile, lException=x,
                         method=method, transID=t_id)

  def __generatePluginObject(self, plugin, clients):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin'])
    except ImportError as e:
      self._logException("Failed to import 'TransformationPlugin' %s" % plugin, lException=e,
                         method="__generatePluginObject")
      return S_ERROR()
    try:
      plugin_o = getattr(plugModule, 'TransformationPlugin')('%s' % plugin,
                                                             transClient=clients['TransformationClient'],
                                                             dataManager=clients['DataManager'])
      return S_OK(plugin_o)
    except AttributeError as e:
      self._logException("Failed to create %s()" % plugin, lException=e, method="__generatePluginObject")
      return S_ERROR()
    plugin_o.setDirectory(self.workDirectory)
    plugin_o.setCallback(self.pluginCallback)

  def pluginCallback(self, transID, invalidateCache=False):
    """ Standard plugin callback
    """
    if invalidateCache:
      try:
        if transID in self.replicaCache:
          self._logInfo("Removed cached replicas for transformation", method='pluginCallBack', transID=transID)
          self.replicaCache.pop(transID)
          self.__writeCache(transID)
      except:
        pass
Esempio n. 18
0
class TransformationAgent(AgentModule):
    def initialize(self):
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/ProductionManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        return S_OK()

    def execute(self):
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("%s.execute: Failed to obtain transformations: %s" %
                         (AGENT_NAME, res['Message']))
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("%s.execute: Processing transformation %s." %
                         (AGENT_NAME, transID))
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info(
                    "%s.execute: Failed to process transformation: %s" %
                    (AGENT_NAME, res['Message']))
            else:
                gLogger.info(
                    "%s.execute: Processed transformation in %.1f seconds" %
                    (AGENT_NAME, time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        # Obtain the transformations to be executed
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "%s.getTransformations: Initializing general purpose agent." %
                AGENT_NAME)
            res = self.transDB.getTransformations(
                {'Status': ['Active', 'Completing', 'Flush']},
                extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformations." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "%s.getTransformations: Obtained %d transformations to process"
                % (AGENT_NAME, len(transformations)))
        else:
            gLogger.info(
                "%s.getTransformations: Initializing for transformation %s." %
                (AGENT_NAME, transName))
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformation." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to obtain input data." %
                AGENT_NAME, res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']
        if not lfns:
            gLogger.info(
                "%s.processTransformation: No 'Unused' files found for transformation."
                % AGENT_NAME)
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "%s.execute: Failed to update transformation status to 'Active'."
                        % AGENT_NAME, res['Message'])
                else:
                    gLogger.info(
                        "%s.execute: Updated transformation status to 'Active'."
                        % AGENT_NAME)
            return S_OK()

        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=(transDict['Type'].lower()
                                             not in ["replication",
                                                     "removal"]))
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to get data replicas" %
                AGENT_NAME, res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "%s.processTransformation: Processing transformation with '%s' plug-in."
            % (AGENT_NAME, plugin))
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to generate tasks for transformation."
                % AGENT_NAME, res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "%s.processTransformation: Failed to add task generated by plug-in."
                    % AGENT_NAME, res['Message'])
                allCreated = False
            else:
                created += 1
        if created:
            gLogger.info(
                "%s.processTransformation: Successfully created %d tasks for transformation."
                % (AGENT_NAME, created))

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "%s.execute: Failed to update transformation status to 'Active'."
                    % AGENT_NAME, res['Message'])
            else:
                gLogger.info(
                    "%s.execute: Updated transformation status to 'Active'." %
                    AGENT_NAME)
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to import 'TransformationPlugin'"
                % AGENT_NAME, '', x)
            return S_ERROR()
        try:
            evalString = "plugModule.TransformationPlugin('%s')" % plugin
            return S_OK(eval(evalString))
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to create %s()." %
                (AGENT_NAME, plugin), '', x)
            return S_ERROR()
Esempio n. 19
0
class MCExtensionAgent( AgentModule ):

  #############################################################################
  def initialize( self ):
    """Sets defaults """
    self.transClient = TransformationClient()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'Simulation'] ) )
    gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    self.maxIterationTasks = self.am_getOption( 'TasksPerIteration', 50 )
    gLogger.info( "Will create a maximum of %s tasks per iteration" % self.maxIterationTasks )
    self.maxFailRate = self.am_getOption( 'MaxFailureRate', 30 )
    gLogger.info( "Will not submit tasks for transformations with failure rate greater than %s%s" % ( self.maxFailRate, '%' ) )
    self.maxWaitingJobs = self.am_getOption( 'MaxWaitingJobs', 1000 )
    gLogger.info( "Will not submit tasks for transformations with more than %d waiting jobs" % self.maxWaitingJobs )
    return S_OK()

  #############################################################################
  def execute( self ):
    """ The MCExtensionAgent execution method."""

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'MCExtensionAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( {'Status':'Active', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        transID = transDict['TransformationID']
        maxTasks = transDict['MaxNumberOfTasks']
        self.extendTransformation( transID, maxTasks )
    return S_OK()

  def extendTransformation( self, transID, maxTasks ):
    gLogger.info( "Considering extension of transformation %d" % transID )
    # Get the current count of tasks submitted for this transformation
    res = self.transClient.getTransformationTaskStats( transID )
    if not res['OK']:
      if res['Message'] != 'No records found':
        gLogger.error( "Failed to get task statistics", "%s %s" % ( transID, res['Message'] ) )
        return res
      else:
        statusDict = {}
    else:
      statusDict = res['Value']
    gLogger.verbose( "Current task count for transformation %d" % transID )
    for status in sortList( statusDict.keys() ):
      statusCount = statusDict[status]
      gLogger.verbose( "%s : %s" % ( status.ljust( 20 ), str( statusCount ).rjust( 8 ) ) )
    # Determine the number of tasks to be created
    numberOfTasks = self.calculateTaskNumber( maxTasks, statusDict )
    if not numberOfTasks:
      gLogger.info( "No tasks required for transformation %d" % transID )
      return S_OK()
    # Extend the transformation by the determined number of tasks
    res = self.transClient.extendTransformation( transID, numberOfTasks )
    if not res['OK']:
      gLogger.error( "Failed to extend transformation", "%s %s" % ( transID, res['Message'] ) )
      return res
    gLogger.info( "Successfully extended transformation %d by %d tasks" % ( transID, numberOfTasks ) )
    return S_OK()

  def calculateTaskNumber( self, maxTasks, statusDict ):
    done = statusDict.get( 'Done', 0 )
    failed = statusDict.get( 'Failed', 0 )
    running = statusDict.get( 'Running', 0 )
    waiting = statusDict.get( 'Waiting', 0 )
    total = statusDict.get( 'Created', 0 )
    # If the failure rate is higher than acceptable
    if ( total != 0 ) and ( ( 100.0 * float( failed ) / float( total ) ) > self.maxFailRate ):
      return 0
    # If we already have enough completed jobs
    if done >= maxTasks:
      return 0
    if waiting > self.maxWaitingJobs:
      return 0
    numberOfTasks = maxTasks - ( total - failed )
    if numberOfTasks > self.maxIterationTasks:
      numberOfTasks = self.maxIterationTasks
    return numberOfTasks
Esempio n. 20
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                        'TransformationName'    : types.StringTypes,
                        'Status'                : types.StringTypes,
                        'Description'           : types.StringTypes,
                        'LongDescription'       : types.StringTypes,
                        'Type'                  : types.StringTypes,
                        'Plugin'                : types.StringTypes,
                        'AgentType'             : types.StringTypes,
                        'FileMask'              : types.StringTypes,
                        'TransformationGroup'   : types.StringTypes,
                        'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                        'InheritedFrom'         : [types.IntType, types.LongType],
                        'Body'                  : types.StringTypes,
                        'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                        'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                         'TransformationName'    : '',
                         'Status'                : 'New',
                         'Description'           : '',
                         'LongDescription'       : '',
                         'Type'                  : '',
                         'Plugin'                : 'Standard',
                         'AgentType'             : 'Manual',
                         'FileMask'              : '',
                         'TransformationGroup'   : 'General',
                         'GroupSize'             : 1,
                         'InheritedFrom'         : 0,
                         'Body'                  : '',
                         'MaxNumberOfTasks'       : 0,
                         'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                               ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def setBody( self, body ):
    """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
    self.item_called = "Body"
    if isinstance( body, basestring ):
      return self.__setParam( body )
    if not isinstance( body, ( list, tuple ) ):
      raise TypeError( "Expected list or string, but %r is %s" % ( body, type( body ) ) )

    for tup in body:
      if not isinstance( tup, ( tuple, list ) ):
        raise TypeError( "Expected tuple or list, but %r is %s" % ( tup, type( tup ) ) )
      if len( tup ) != 2:
        raise TypeError( "Expected 2-tuple, but %r is length %d" % ( tup, len( tup ) ) )
      if not isinstance( tup[0], basestring ):
        raise TypeError( "Expected string, but first entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      if not isinstance( tup[1], dict ):
        raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      for par, val in tup[1].iteritems():
        if not isinstance( par, basestring ):
          raise TypeError( "Expected string, but key in dictionary %r is %s" % ( par, type( par ) ) )
        if not par in Operation.ATTRIBUTE_NAMES:
          raise ValueError( "Unknown attribute for Operation: %s" % par )
        if not isinstance( val, ( basestring, int, long, float, list, tuple, dict ) ):
          raise TypeError( "Cannot encode %r, in json" % ( val ) )
      return self.__setParam( json.dumps( body ) )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                              orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                              orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                          orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author = ""
    res = getProxyInfo()
    if res['OK']:
      author = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " % res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [],
                                outputFields = ['TransformationID', 'Status',
                                                'AgentType', 'TransformationName',
                                                'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )
          return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info( "Will list transformations created by user '%s' with status '%s'" % ( userName, ', '.join( transStatus ) ) )
    else:
      gLogger.info( "Will list transformations created by '%s' with status '%s'" % ( authorDN, ', '.join( transStatus ) ) )

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = [] ):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len( transID )
    paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                      'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                      'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                           'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map( lambda pname: paramValues[ paramNames.index( pname ) ], paramShowNames )
          showDict = dict( zip( paramShowNamesShort, paramShowValues ) )
          dictList.append( showDict )

      except Exception as x:
        print 'Exception %s ' % str( x )

    if not len( dictList ) > 0:
      gLogger.error( 'No found transformations satisfying input condition' )
      return S_ERROR( 'No found transformations satisfying input condition' )
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                              self.paramValues['Description'],
                                              self.paramValues['LongDescription'],
                                              self.paramValues['Type'],
                                              self.paramValues['Plugin'],
                                              self.paramValues['AgentType'],
                                              self.paramValues['FileMask'],
                                              transformationGroup = self.paramValues['TransformationGroup'],
                                              groupSize = self.paramValues['GroupSize'],
                                              inheritedFrom = self.paramValues['InheritedFrom'],
                                              body = self.paramValues['Body'],
                                              maxTasks = self.paramValues['MaxNumberOfTasks'],
                                              eventsPerTask = self.paramValues['EventsPerTask'],
                                              addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.notice( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if paramName not in self.paramTypes:
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.notice( "To add this parameter later please execute the following." )
          gLogger.notice( "oTransformation = Transformation(%d)" % transID )
          gLogger.notice( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    """ Few checks
    """
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " )

    plugin = self.paramValues['Plugin']
    if plugin:
      if not plugin in self.supportedPlugins:
        gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
        res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
        if not res['OK']:
          return res
        self.paramValues['Plugin'] = res['Value']

    plugin = self.paramValues['Plugin']

    return S_OK()

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if groupSize <= 0:
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE',
                                                                                                      'TargetSE'] ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if not self.paramValues.get( requiredParam ):
        paramValue = raw_input( "Please enter " + requiredParam + " " )
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = set( seList ) - set( res['Value'] )
    if missing:
      for se in missing:
        gLogger.error( "StorageElement %s is not known" % se )
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
Esempio n. 21
0
class TransformationAgent( AgentModule ):

  def initialize( self ):
    """ standard init
    """
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    self.unusedFiles = {}
    return S_OK()

  def execute( self ):
    """ get and process the transformations to be processed
    """
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "execute: Failed to obtain transformations: %s" % res['Message'] )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "execute: Processing transformation %s." % transID )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "execute: Failed to process transformation: %s" % res['Message'] )
      else:
        gLogger.info( "execute: Processed transformation in %.1f seconds" % ( time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    """ Obtain the transformations to be executed 
    """
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "getTransformations: Initializing general purpose agent." )
      res = self.transDB.getTransformations( {'Status':self.transformationStatus}, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "getTransformations: Obtained %d transformations to process" % len( transformations ) )
    else:
      gLogger.info( "getTransformations: Initializing for transformation %s." % transName )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']

    if not lfns:
      gLogger.info( "processTransformation: No 'Unused' files found for transformation." )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
        else:
          gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
      return S_OK()
    #Check if something new happened
    if len( lfns ) == self.unusedFiles.get( transID, 0 ) and transDict['Status'] != 'Flush':
      gLogger.info( "processTransformation: No new 'Unused' files found for transformation." )
      return S_OK()

    replicateOrRemove = transDict['Type'].lower() in ["replication", "removal"]
    # Limit the number of LFNs to be considered for replication or removal as they are treated individually
    if replicateOrRemove:
      lfns = lfns[0:self.maxFiles - 1]
    unusedFiles = len( lfns )
    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = not replicateOrRemove )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message'] )
        allCreated = False
      else:
        created += 1
        unusedFiles -= len( lfns )
    if created:
      gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created )
    self.unusedFiles[transID] = unusedFiles

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
      else:
        gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except ImportError, e:
      gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % ( plugin, e ) )
      return S_ERROR()
    try:
      plugin_o = getattr( plugModule, 'TransformationPlugin' )( '%s' % plugin,
                                                                transClient = self.transDB,
                                                                replicaManager = self.rm )
      return S_OK( plugin_o )
    except AttributeError, e:
      gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % ( plugin, e ) )
      return S_ERROR()
Esempio n. 22
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
    """ Usually subclass of AgentModule
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        # few parameters
        self.pluginLocation = ''
        self.transformationStatus = []
        self.maxFiles = 0
        self.transformationTypes = []

        # clients (out of the threads)
        self.transfClient = None

        # parameters for the threading
        self.transQueue = Queue.Queue()
        self.transInQueue = []

        # parameters for caching
        self.workDirectory = ''
        self.cacheFile = ''
        self.controlDirectory = ''

        self.lastFileOffset = {}
        # Validity of the cache
        self.replicaCache = None
        self.replicaCacheValidity = None
        self.writingCache = False
        self.removedFromCache = 0

        self.noUnusedDelay = 0
        self.unusedFiles = {}
        self.unusedTimeStamp = {}

        self.debug = False
        self.transInThread = {}
        self.pluginTimeout = {}

    def initialize(self):
        """ standard initialize
    """
        # few parameters
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        self.maxFiles = self.am_getOption('MaxFiles', 5000)

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)

        # clients
        self.transfClient = TransformationClient()

        # for caching using a pickle file
        self.workDirectory = self.am_getWorkDirectory()
        self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
        self.controlDirectory = self.am_getControlDirectory()

        # remember the offset if any in TS
        self.lastFileOffset = {}

        # Validity of the cache
        self.replicaCache = {}
        self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity',
                                                      2)

        self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

        # Get it threaded
        maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        self.log.info("Will treat the following transformation types: %s" %
                      str(self.transformationTypes))

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        method = 'finalize'
        if self.transInQueue:
            self.transInQueue = []
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread),
                method=method)
            self._logInfo(
                'Remaining transformations: ' +
                ','.join([str(transID) for transID in self.transInThread]),
                method=method)
            while self.transInThread:
                time.sleep(2)
            self._logInfo("Threads are empty, terminating the agent...",
                          method=method)
        self.__writeCache()
        return S_OK()

    def execute(self):
        """ Just puts transformations in the queue
    """
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            self._logError("Failed to obtain transformations:", res['Message'])
            return S_OK()
        # Process the transformations
        count = 0
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            if transDict.get('InheritedFrom'):
                # Try and move datasets from the ancestor production
                res = self.transfClient.moveFilesToDerivedTransformation(
                    transDict)
                if not res['OK']:
                    self._logError(
                        "Error moving files from an inherited transformation",
                        res['Message'],
                        transID=transID)
                else:
                    parentProd, movedFiles = res['Value']
                    if movedFiles:
                        self._logInfo(
                            "Successfully moved files from %d to %d:" %
                            (parentProd, transID),
                            transID=transID)
                        for status, val in movedFiles.items():
                            self._logInfo("\t%d files to status %s" %
                                          (val, status),
                                          transID=transID)
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put(transDict)
        self._logInfo("Out of %d transformations, %d put in thread queue" %
                      (len(res['Value']), count))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
        transName = self.am_getOption('Transformation', 'All')
        method = 'getTransformations'
        if transName == 'All':
            self._logInfo("Getting all transformations%s, status %s." %
                          (' of type %s' % str(self.transformationTypes)
                           if self.transformationTypes else '',
                           str(self.transformationStatus)),
                          method=method)
            transfDict = {'Status': self.transformationStatus}
            if self.transformationTypes:
                transfDict['Type'] = self.transformationTypes
            res = self.transfClient.getTransformations(transfDict,
                                                       extraParams=True)
            if not res['OK']:
                return res
            transformations = res['Value']
            self._logInfo("Obtained %d transformations to process" %
                          len(transformations),
                          method=method)
        else:
            self._logInfo("Getting transformation %s." % transName,
                          method=method)
            res = self.transfClient.getTransformation(transName,
                                                      extraParams=True)
            if not res['OK']:
                self._logError("Failed to get transformation:",
                               res['Message'],
                               method=method)
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def _getClients(self):
        """ returns the clients used in the threads
    """
        threadTransformationClient = TransformationClient()
        threadDataManager = DataManager()

        return {
            'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager
        }

    def _execute(self, threadID):
        """ thread - does the real job: processing the transformations to be processed
    """

        # Each thread will have its own clients
        clients = self._getClients()

        while True:
            transDict = self.transQueue.get()
            try:
                transID = long(transDict['TransformationID'])
                if transID not in self.transInQueue:
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Processing transformation %s." % transID,
                              transID=transID)
                startTime = time.time()
                res = self.processTransformation(transDict, clients)
                if not res['OK']:
                    self._logInfo("Failed to process transformation:",
                                  res['Message'],
                                  transID=transID)
            except Exception, x:
                self._logException('%s' % x, transID=transID)
            finally:
def _getProductionSummary():
  clip = _Params()
  clip.registerSwitch()
  Script.parseCommandLine()
  from ILCDIRAC.Core.Utilities.HTML                             import Table
  from ILCDIRAC.Core.Utilities.ProcessList                      import ProcessList
  from DIRAC.TransformationSystem.Client.TransformationClient   import TransformationClient
  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  from DIRAC import gConfig, gLogger
  prod = clip.prod
  full_detail = clip.full_det
  fc = FileCatalogClient()
  
  processlist = gConfig.getValue('/LocalSite/ProcessListPath')
  prl = ProcessList(processlist)
  processesdict = prl.getProcessesDict()
  
  trc = TransformationClient()
  prodids = []
  if not prod:
    conddict = {}
    conddict['Status'] = clip.statuses
    if clip.ptypes:
      conddict['Type'] = clip.ptypes
    res = trc.getTransformations( conddict )
    if res['OK']:
      for transfs in res['Value']:
        prodids.append(transfs['TransformationID'])
  else:
    prodids.extend(prod)

  metadata = []
  
  gLogger.info("Will run on prods %s" % str(prodids))
  
  for prodID in prodids:
    if prodID<clip.minprod:
      continue
    meta = {}
    meta['ProdID']=prodID
    res = trc.getTransformation(str(prodID))
    if not res['OK']:
      gLogger.error("Error getting transformation %s" % prodID )
      continue
    prodtype = res['Value']['Type']
    proddetail = res['Value']['Description']
    if prodtype == 'MCReconstruction' or prodtype == 'MCReconstruction_Overlay' :
      meta['Datatype']='DST'
    elif prodtype == 'MCGeneration':
      meta['Datatype']='gen'
    elif prodtype == 'MCSimulation':
      meta['Datatype']='SIM'
    elif prodtype in ['Split','Merge']:
      gLogger.warn("Invalid query for %s productions" % prodtype)
      continue
    else:
      gLogger.error("Unknown production type %s"% prodtype)
      continue
    res = fc.findFilesByMetadata(meta)  
    if not res['OK']:
      gLogger.error(res['Message'])
      continue
    lfns = res['Value']
    nb_files = len(lfns)
    path = ""
    if not len(lfns):
      gLogger.warn("No files found for prod %s" % prodID)
      continue
    path = os.path.dirname(lfns[0])
    res = fc.getDirectoryUserMetadata(path)
    if not res['OK']:
      gLogger.warn('No meta data found for %s' % path)
      continue
    dirmeta = {}
    dirmeta['proddetail'] = proddetail
    dirmeta['prodtype'] = prodtype
    dirmeta['nb_files']=nb_files
    dirmeta.update(res['Value'])
    lumi  = 0.
    nbevts = 0
    addinfo = None
    files = 0
    xsec = 0.0
    if not full_detail:
      lfn  = lfns[0]
      info = _getFileInfo(lfn)
      nbevts = info[1]*len(lfns)
      lumi = info[0]*len(lfns)
      addinfo = info[2]
      if 'xsection' in addinfo:
        if 'sum' in addinfo['xsection']:
          if 'xsection' in addinfo['xsection']['sum']:
            xsec += addinfo['xsection']['sum']['xsection']
            files += 1
    else:
      for lfn in lfns:
        info = _getFileInfo(lfn)
        lumi += info[0]
        nbevts += info[1]
        addinfo = info[2]
        if 'xsection' in addinfo:
          if 'sum' in addinfo['xsection']:
            if 'xsection' in addinfo['xsection']['sum']:
              xsec += addinfo['xsection']['sum']['xsection']
              files += 1
    if not lumi:
      xsec = 0
      files = 0
      depthDict = {}  
      depSet = set()  
      res = fc.getFileAncestors(lfns,[1,2,3,4])
      temp_ancestorlist = []
      if res['OK']:
        for lfn,ancestorsDict in res['Value']['Successful'].items():
          for ancestor,dep in ancestorsDict.items():
            depthDict.setdefault(dep,[])
            if ancestor not in temp_ancestorlist:
              depthDict[dep].append(ancestor)
              depSet.add(dep)
              temp_ancestorlist.append(ancestor)
      depList = list(depSet)
      depList.sort()
      for ancestor in depthDict[depList[-1]]:
        info = _getFileInfo(ancestor)
        lumi += info[0]
        addinfo = info[2]
        if 'xsection' in addinfo:
          if 'sum' in addinfo['xsection']:
            if 'xsection' in addinfo['xsection']['sum']:
              xsec += addinfo['xsection']['sum']['xsection']
              files += 1
    if xsec and files:
      xsec /= files
      dirmeta['CrossSection']=xsec
    else:
      dirmeta['CrossSection']=0.0
          
    if nbevts:
      dirmeta['NumberOfEvents']=nbevts
    #if not lumi:
    #  dirmeta['Luminosity']=0
    #  dirmeta['CrossSection']=0
    #else:
    #  if nbevts:
    #    dirmeta['CrossSection']=nbevts/lumi
    #  else:
    #    dirmeta['CrossSection']=0
    #if addinfo:
    #  if 'xsection' in addinfo:
    #    if 'sum' in addinfo['xsection']:
    #      if 'xsection' in addinfo['xsection']['sum']:
    #        dirmeta['CrossSection']=addinfo['xsection']['sum']['xsection']
    if 'NumberOfEvents' not in dirmeta:
      dirmeta['NumberOfEvents']=0
    #print processesdict[dirmeta['EvtType']]
    dirmeta['detail']=''
    if dirmeta['EvtType'] in processesdict:
      if 'Detail' in processesdict[dirmeta['EvtType']]:
        detail = processesdict[dirmeta['EvtType']]['Detail']
        
    else:
      detail=dirmeta['EvtType']
  
  
    if not prodtype == 'MCGeneration':
      res = trc.getTransformationInputDataQuery(str(prodID))
      if res['OK']:
        if 'ProdID' in res['Value']:
          dirmeta['MomProdID']=res['Value']['ProdID']
    if 'MomProdID' not in dirmeta:
      dirmeta['MomProdID']=0
    dirmeta['detail']= _translate(detail)

    metadata.append(dirmeta)
  
  detectors = {}
  detectors['ILD'] = {}
  corres = {"MCGeneration":'gen',"MCSimulation":'SIM',"MCReconstruction":"REC","MCReconstruction_Overlay":"REC"}
  detectors['ILD']['SIM'] = []
  detectors['ILD']['REC'] = []
  detectors['SID'] = {}
  detectors['SID']['SIM'] = []
  detectors['SID']['REC'] = []
  detectors['sid'] = {}
  detectors['sid']['SIM'] = []
  detectors['sid']['REC'] = []
  detectors['gen']=[]
  for channel in metadata:
    if 'DetectorType'  not in channel:
      detectors['gen'].append((channel['detail'],
                               channel['Energy'],
                               channel['ProdID'],
                               channel['nb_files'],
                               channel['NumberOfEvents']/channel['nb_files'],
                               channel['NumberOfEvents'],
                               channel['CrossSection'],str(channel['proddetail'])))
    else:
      if not channel['DetectorType'] in detectors:
        gLogger.error("This is unknown detector", channel['DetectorType'])
        continue
      detectors[channel['DetectorType']][corres[channel['prodtype']]].append((channel['detail'],
                                                                              channel['Energy'],
                                                                              channel['DetectorType'],
                                                                              channel['ProdID'],
                                                                              channel['nb_files'],
                                                                              channel['NumberOfEvents']/channel['nb_files'],
                                                                              channel['NumberOfEvents'],
                                                                              channel['CrossSection'],
                                                                              channel['MomProdID'],
                                                                              str(channel['proddetail'])))
  
  with open("tables.html","w") as of:
    of.write("""<!DOCTYPE html>
<html>
 <head>
<title> Production summary </title>
</head>
<body>
""")
    if len(detectors['gen']):
      of.write("<h1>gen prods</h1>\n")
      table = Table(header_row = ('Channel', 'Energy','ProdID','Tasks','Average Evts/task','Statistics','Cross Section (fb)','Comment'))
      for item in detectors['gen']:
        table.rows.append( item )
      of.write(str(table))
      gLogger.info("Gen prods")
      gLogger.info(str(table))

    if len(detectors['ILD']):
      of.write("<h1>ILD prods</h1>\n")
      for ptype in detectors['ILD'].keys():
        if len(detectors['ILD'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['ILD'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("ILC CDR prods %s" % ptype)
          gLogger.info(str(table))

    if len(detectors['SID']):
      of.write("<h1>SID prods</h1>\n")
      for ptype in detectors['SID'].keys():
        if len(detectors['SID'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['SID'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("SID CDR prods %s"%ptype)
          gLogger.info(str(table))

    if len(detectors['sid']):
      of.write("<h1>sid dbd prods</h1>\n")
      for ptype in detectors['sid'].keys():
        if len(detectors['sid'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['sid'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("sid DBD prods %s"%ptype)
          gLogger.info(str(table))

    of.write("""
</body>
</html>
""")
  gLogger.notice("Check ./tables.html in any browser for the results")
  dexit(0)
Esempio n. 24
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError, 'TransformationID %d does not exist' % transID
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(',', ' ').split()
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called, value, type(value),
                        self.paramTypes[self.item_called])
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if not plugin in self.supportedPlugins:
            gLogger.info(
                "The selected Plugin (%s) is not known to the transformation agent."
                % plugin)
            res = self.__promptForParameter('Plugin',
                                            choices=self.supportedPlugins,
                                            default='Standard')
            if not res['OK']:
                return res
            self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']
        #checkPlugin = "_check%sPlugin" % plugin
        #fcn = None
        #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
        #  fcn = getattr( self, checkPlugin )
        #if not fcn:
        #  return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
        #res = fcn()
        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if (groupSize <= 0):
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (
                    not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = []
        for se in seList:
            if not se in res['Value']:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Esempio n. 25
0
class InputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """c'tor"""
        AgentModule.__init__(self, *args, **kwargs)

        self.fileLog = {}
        self.timeLog = {}
        self.fullTimeLog = {}

        self.pollingTime = self.am_getOption("PollingTime", 120)
        self.fullUpdatePeriod = self.am_getOption("FullUpdatePeriod", 86400)
        self.refreshonly = self.am_getOption("RefreshOnly", False)
        self.dateKey = self.am_getOption("DateKey", None)

        self.transClient = TransformationClient()
        self.metadataClient = FileCatalogClient()
        self.transformationTypes = None

    #############################################################################
    def initialize(self):
        """Make the necessary initializations"""
        agentTSTypes = self.am_getOption("TransformationTypes", [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue("Transformations/DataProcessing",
                                             ["MCSimulation", "Merge"])
            dataManip = Operations().getValue(
                "Transformations/DataManipulation", ["Replication", "Removal"])
            self.transformationTypes = sorted(dataProc + dataManip)
        extendables = Operations().getValue(
            "Transformations/ExtendableTransfTypes", [])
        if extendables:
            for extendable in extendables:
                if extendable in self.transformationTypes:
                    self.transformationTypes.remove(extendable)
                    # This is because the Extendables do not use this Agent (have no Input data query)

        return S_OK()

    ##############################################################################
    def execute(self):
        """Main execution method"""

        # Get all the transformations
        result = self.transClient.getTransformations({
            "Status":
            "Active",
            "Type":
            self.transformationTypes
        })
        if not result["OK"]:
            self.log.error(
                "InputDataAgent.execute: Failed to get transformations.",
                result["Message"])
            return S_OK()

        # Process each transformation
        for transDict in result["Value"]:
            transID = int(transDict["TransformationID"])
            # res = self.transClient.getTransformationInputDataQuery( transID )
            res = self.transClient.getTransformationMetaQuery(transID, "Input")
            if not res["OK"]:
                if cmpError(res, ENOENT):
                    self.log.info(
                        "InputDataAgent.execute: No input data query found for transformation",
                        transID)
                else:
                    self.log.error(
                        "InputDataAgent.execute: Failed to get input data query",
                        "for %d: %s" % (transID, res["Message"]),
                    )
                continue
            inputDataQuery = res["Value"]

            if self.refreshonly:
                # Determine the correct time stamp to use for this transformation
                if transID in self.timeLog:
                    if transID in self.fullTimeLog:
                        # If it is more than a day since the last reduced query, make a full query just in case
                        if (datetime.datetime.utcnow() -
                                self.fullTimeLog[transID]
                            ) < datetime.timedelta(
                                seconds=self.fullUpdatePeriod):
                            timeStamp = self.timeLog[transID]
                            if self.dateKey:
                                inputDataQuery[self.dateKey] = (
                                    timeStamp - datetime.timedelta(seconds=10)
                                ).strftime("%Y-%m-%d %H:%M:%S")
                            else:
                                self.log.error(
                                    "DateKey was not set in the CS, cannot use the RefreshOnly"
                                )
                        else:
                            self.fullTimeLog[
                                transID] = datetime.datetime.utcnow()
                self.timeLog[transID] = datetime.datetime.utcnow()
                if transID not in self.fullTimeLog:
                    self.fullTimeLog[transID] = datetime.datetime.utcnow()

            # Perform the query to the metadata catalog
            self.log.verbose("Using input data query for transformation",
                             "%d: %s" % (transID, str(inputDataQuery)))
            start = time.time()
            result = self.metadataClient.findFilesByMetadata(inputDataQuery)
            rtime = time.time() - start
            self.log.verbose("Metadata catalog query time",
                             ": %.2f seconds." % (rtime))
            if not result["OK"]:
                self.log.error(
                    "InputDataAgent.execute: Failed to get response from the metadata catalog",
                    result["Message"])
                continue
            lfnList = result["Value"]

            # Check if the number of files has changed since the last cycle
            nlfns = len(lfnList)
            self.log.info(
                "files returned for transformation from the metadata catalog: ",
                "%d -> %d" % (int(transID), nlfns))
            if nlfns == self.fileLog.get(transID):
                self.log.verbose(
                    "No new files in metadata catalog since last check")
            self.fileLog[transID] = nlfns

            # Add any new files to the transformation
            addedLfns = []
            if lfnList:
                self.log.verbose("Processing lfns for transformation:",
                                 "%d -> %d" % (transID, len(lfnList)))
                # Add the files to the transformation
                self.log.verbose("Adding lfns for transformation:",
                                 "%d -> %d" % (transID, len(lfnList)))
                result = self.transClient.addFilesToTransformation(
                    transID, sorted(lfnList))
                if not result["OK"]:
                    self.log.warn(
                        "InputDataAgent.execute: failed to add lfns to transformation",
                        result["Message"])
                    self.fileLog[transID] = 0
                else:
                    if result["Value"]["Failed"]:
                        for lfn, error in res["Value"]["Failed"].items():
                            self.log.warn(
                                "InputDataAgent.execute: Failed to add to transformation:",
                                "%s: %s" % (lfn, error))
                    if result["Value"]["Successful"]:
                        for lfn, status in result["Value"]["Successful"].items(
                        ):
                            if status == "Added":
                                addedLfns.append(lfn)
                        self.log.info(
                            "InputDataAgent.execute: Added files to transformation",
                            "(%d)" % len(addedLfns))

        return S_OK()
Esempio n. 26
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def setBody(self, body):
        """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
        self.item_called = "Body"
        if isinstance(body, basestring):
            return self.__setParam(body)
        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], basestring):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].iteritems():
                if not isinstance(par, basestring):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if not par in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val,
                    (basestring, int, long, float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
            return self.__setParam(json.dumps(body))

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except BaseException:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                          'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                          'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                               'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter('Plugin',
                                                choices=self.supportedPlugins,
                                                default='Standard')
                if not res['OK']:
                    return res
                self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = set(seList) - set(res['Value'])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Esempio n. 27
0
class DataRecoveryAgent( AgentModule ):
  """Data Recovery Agent"""
  def __init__(self, *args, **kwargs):
    AgentModule.__init__( self, *args, **kwargs )
    self.name = 'DataRecoveryAgent'
    self.enabled = False

    self.productionsToIgnore = self.am_getOption("TransformationsToIgnore", [])
    self.transformationTypes = self.am_getOption( "TransformationTypes",
                                                  ['MCReconstruction',
                                                   'MCSimulation',
                                                   'MCReconstruction_Overlay',
                                                   'MCGeneration'] )
    self.transformationStatus = self.am_getOption( "TransformationStatus", ['Active', 'Completing'] )

    self.jobStatus = ['Failed','Done'] ##This needs to be both otherwise we cannot account for all cases

    self.jobMon = JobMonitoringClient()
    self.fcClient = FileCatalogClient()
    self.tClient = TransformationClient()
    self.reqClient = ReqClient()
    self.diracILC = DiracILC()
    self.inputFilesProcessed = set()
    self.todo = {'MCGeneration':
                 [ dict( Message="MCGeneration: OutputExists: Job 'Done'",
                         ShortMessage="MCGeneration: job 'Done' ",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and job.status=='Failed',
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                       ),
                   dict( Message="MCGeneration: OutputMissing: Job 'Failed'",
                         ShortMessage="MCGeneration: job 'Failed' ",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and job.status=='Done',
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                       ),
                   # dict( Message="MCGeneration, job 'Done': OutputExists: Task 'Done'",
                   #       ShortMessage="MCGeneration: job already 'Done' ",
                   #       Counter=0,
                   #       Check=lambda job: job.allFilesExist() and job.status=='Done',
                   #       Actions=lambda job,tInfo: [ tInfo._TransformationInfo__setTaskStatus(job, 'Done') ]
                   #     ),
                 ],
                 'OtherProductions':
                 [ \
                   ## should always be first!
                   dict( Message="One of many Successful: clean others",
                         ShortMessage="Other Tasks --> Keep",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and job.otherTasks and job.inputFile not in self.inputFilesProcessed,
                         Actions=lambda job,tInfo: [ self.inputFilesProcessed.add(job.inputFile), job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                       ),
                   dict( Message="Other Task processed Input, no Output: Fail",
                         ShortMessage="Other Tasks --> Fail",
                         Counter=0,
                         Check=lambda job: job.inputFile in self.inputFilesProcessed and job.allFilesMissing() and job.status!='Failed',
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                       ),
                   dict( Message="Other Task processed Input: Fail and clean",
                         ShortMessage="Other Tasks --> Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile in self.inputFilesProcessed and not job.allFilesMissing(),
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo), job.cleanOutputs(tInfo) ]
                       ),
                   dict( Message="InputFile missing: mark job 'Failed', mark input 'Deleted', clean",
                         ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus != "Deleted",
                         Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo), job.setInputDeleted(tInfo) ]
                       ),
                   dict( Message="InputFile Deleted, output Exists: mark job 'Failed', clean",
                         ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus == "Deleted" and not job.allFilesMissing(),
                         Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo) ]
                       ),
                   ## All Output Exists
                   dict( Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                         ShortMessage="Output Exists --> Job Done, Input Processed",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and \
                                           not job.otherTasks and \
                                           job.status=='Failed' and \
                                           job.fileStatus!="Processed" and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                       ),
                   dict( Message="Output Exists, job Failed, input Processed --> Job Done",
                         ShortMessage="Output Exists --> Job Done",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and \
                                           not job.otherTasks and \
                                           job.status=='Failed' and \
                                           job.fileStatus=="Processed" and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                       ),
                   dict( Message="Output Exists, job Done, input not Processed --> Input Processed",
                         ShortMessage="Output Exists --> Input Processed",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and \
                                           not job.otherTasks and \
                                           job.status=='Done' and \
                                           job.fileStatus!="Processed" and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputProcessed(tInfo) ]
                       ),
                   ## outputmissing
                   dict( Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                         ShortMessage="Max ErrorCount --> Input MaxReset",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and \
                                           not job.otherTasks and \
                                           job.status=='Failed' and \
                                           job.fileStatus in ASSIGNEDSTATES and \
                                           job.inputFile not in self.inputFilesProcessed and \
                                           job.inputFileExists and \
                                           job.errorCount > MAXRESET,
                         Actions=lambda job,tInfo: [ job.setInputMaxReset(tInfo) ]
                       ),
                   dict( Message="Output Missing, job Failed, input Assigned --> Input Unused",
                         ShortMessage="Output Missing --> Input Unused",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and \
                                           not job.otherTasks and \
                                           job.status=='Failed' and \
                                           job.fileStatus in ASSIGNEDSTATES and \
                                           job.inputFile not in self.inputFilesProcessed and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputUnused(tInfo) ]
                       ),
                   dict( Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                         ShortMessage="Output Missing --> Job Failed, Input Unused",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and \
                                           not job.otherTasks and \
                                           job.status=='Done' and \
                                           job.fileStatus in ASSIGNEDSTATES and \
                                           job.inputFile not in self.inputFilesProcessed and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputUnused(tInfo), job.setJobFailed(tInfo) ]
                       ),
                   ## some files missing, needing cleanup. Only checking for
                   ## assigned, because processed could mean an earlier job was
                   ## succesful and this one is just the duplicate that needed
                   ## to be removed! But we check for other tasks earlier, so
                   ## this should not happen
                   dict( Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                         ShortMessage="Output Missing --> Cleanup, Input Unused",
                         Counter=0,
                         Check=lambda job: job.someFilesMissing() and \
                                           not job.otherTasks and \
                                           job.status=='Failed' and \
                                           job.fileStatus in ASSIGNEDSTATES and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo)]
                         #Actions=lambda job,tInfo: []
                       ),
                   dict( Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                         ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                         Counter=0,
                         Check=lambda job: job.someFilesMissing() and \
                                           not job.otherTasks and \
                                           job.status=='Done' and \
                                           job.fileStatus in ASSIGNEDSTATES and \
                                           job.inputFileExists,
                         Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo),job.setJobFailed(tInfo)]
                         #Actions=lambda job,tInfo: []
                       ),
                   dict( Message="Some missing, job Done --> job Failed",
                         ShortMessage="Output Missing, Done --> Job Failed",
                         Counter=0,
                         Check=lambda job: not job.allFilesExist() and job.status=='Done',
                         Actions=lambda job,tInfo: [job.setJobFailed(tInfo)]
                       ),
                   dict ( Message="Something Strange",
                          ShortMessage="Strange",
                          Counter=0,
                          Check=lambda job: job.status not in ("Failed","Done"),
                          Actions=lambda job,tInfo: []
                        ),
                   ##should always be the last one!
                   dict ( Message="Failed Hard",
                          ShortMessage="Failed Hard",
                          Counter=0,
                          Check=lambda job: False, ## never
                          Actions=lambda job,tInfo: []
                        ),
                 ]
                }
    self.jobCache = defaultdict( lambda: (0, 0) )
    self.printEveryNJobs = self.am_getOption( 'PrintEvery', 200 )
    ##Notification
    self.notesToSend = ""
    self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"])
    self.addressFrom = self.am_getOption( 'MailFrom', "*****@*****.**" )
    self.subject = "DataRecoveryAgent"
    self.startTime = time.time()
    
    #############################################################################
  def beginExecution(self):
    """Resets defaults after one cycle
    """
    self.enabled = self.am_getOption('EnableFlag', False)
    self.productionsToIgnore = self.am_getOption("TransformationsToIgnore", [])
    self.transformationTypes = self.am_getOption( "TransformationTypes",
                                                  ['MCReconstruction',
                                                   'MCSimulation',
                                                   'MCReconstruction_Overlay',
                                                   'MCGeneration'] )
    self.transformationStatus = self.am_getOption( "TransformationStatus", ['Active', 'Completing'] )
    self.addressTo = self.am_getOption('MailTo', self.addressTo)
    self.addressFrom = self.am_getOption( 'MailFrom', "*****@*****.**" )
    self.printEveryNJobs = self.am_getOption( 'PrintEvery', 200 )

    return S_OK()
  #############################################################################
  def execute(self):
    """ The main execution method.
    """  
    self.log.notice( "Will ignore the following productions: %s" % self.productionsToIgnore )
    self.log.notice( " Job Cache: %s " % self.jobCache )
    transformations = self.getEligibleTransformations( self.transformationStatus, self.transformationTypes )
    if not transformations['OK']:
      self.log.error( "Failure to get transformations", transformations['Message'] )
      return S_ERROR( "Failure to get transformations" )
    for prodID, transInfoDict in transformations['Value'].iteritems():
      if prodID in self.productionsToIgnore:
        self.log.notice( "Ignoring Production: %s " % prodID )
        continue
      self.__resetCounters()
      self.inputFilesProcessed = set()
      self.log.notice("Running over Production: %s " % prodID)
      self.treatProduction(int(prodID), transInfoDict)

      if self.notesToSend and self.__notOnlyKeepers(transInfoDict['Type']):
        # remove from the jobCache because something happened
        self.jobCache.pop(int(prodID), None)
        notification = NotificationClient()
        for address in self.addressTo:
          result = notification.sendMail( address, "%s: %s" %( self.subject, prodID ), self.notesToSend, self.addressFrom, localAttempt = False )
          if not result['OK']:
            self.log.error( 'Cannot send notification mail', result['Message'] )
      self.notesToSend = ""

    return S_OK()

  def getEligibleTransformations( self, status, typeList ):
    """ Select transformations of given status and type.
    """
    res = self.tClient.getTransformations(condDict = {'Status' : status, 'Type' : typeList})
    if not res['OK']:
      return res
    transformations = {}
    for prod in res['Value']:
      prodID = prod['TransformationID']
      transformations[str(prodID)] = prod
    return S_OK(transformations)

  def treatProduction(self, prodID, transInfoDict):
    """Run this thing for given production."""
    tInfo = TransformationInfo(prodID, transInfoDict, self.enabled,
                               self.tClient, self.fcClient, self.jobMon)
    jobs, nDone, nFailed = tInfo.getJobs(statusList=self.jobStatus)

    if self.jobCache[prodID][0] == nDone and self.jobCache[prodID][1] == nFailed:
      self.log.notice( "Skipping production %s because nothing changed" % prodID )
      return

    self.jobCache[prodID] = (nDone, nFailed)

    tasksDict=None
    lfnTaskDict=None

    self.startTime = time.time()
    if not transInfoDict['Type'].startswith("MCGeneration"):
      self.log.notice('Getting tasks...')
      tasksDict = tInfo.checkTasksStatus()
      lfnTaskDict = dict([(tasksDict[taskID]['LFN'], taskID) for taskID in tasksDict])

    self.checkAllJobs(jobs, tInfo, tasksDict, lfnTaskDict)
    self.printSummary()

  def checkJob( self, job, tInfo ):
    """ deal with the job """
    checks = self.todo['MCGeneration'] if job.tType.startswith('MCGeneration') else self.todo['OtherProductions']
    for do in checks:
      if do['Check'](job):
        do['Counter'] += 1
        self.log.notice( do['Message'] )
        self.log.notice( job )
        self.notesToSend += do['Message']+'\n'
        self.notesToSend += str(job)+'\n'
        do['Actions'](job, tInfo)
        return

  def getLFNStatus(self, jobs):
    """Get all the LFNs for the jobs and get their status."""
    self.log.notice('Collecting LFNs...')
    lfnExistence = {}
    lfnCache = []
    for counter, job in enumerate(jobs.values()):
      if counter % self.printEveryNJobs == 0:
        self.log.notice('Getting JobInfo: %d/%d: %3.1fs' % (counter, len(jobs), float(time.time() - self.startTime)))
      while True:
        try:
          job.getJobInformation(self.diracILC)
          if job.inputFile:
            lfnCache.append(job.inputFile)
          if job.outputFiles:
            lfnCache.extend(job.outputFiles)
          break
        except RuntimeError as e:  # try again
          self.log.error('+++++ Failure for job:', job.jobID)
          self.log.error('+++++ Exception: ', str(e))

    counter = 0
    for lfnChunk in breakListIntoChunks(list(lfnCache), 200):
      counter += 200
      if counter % 1000 == 0:
        self.log.notice('Getting FileInfo: %d/%d: %3.1fs' % (counter, len(jobs), float(time.time() - self.startTime)))
      while True:
        try:
          reps = self.fcClient.exists(lfnChunk)
          if not reps['OK']:
            self.log.error('Failed to check file existence, try again...', reps['Message'])
            raise RuntimeError('Try again')
          statuses = reps['Value']
          lfnExistence.update(statuses['Successful'])
          break
        except RuntimeError:  # try again
          pass

    return lfnExistence

  def setPendingRequests(self, jobs):
    """Loop over all the jobs and get requests, if any."""
    for jobChunk in breakListIntoChunks(jobs.values(), 1000):
      jobIDs = [job.jobID for job in jobChunk]
      while True:
        result = self.reqClient.readRequestsForJobs(jobIDs)
        if result['OK']:
          break
        self.log.error('Failed to read requests', result['Message'])
        # repeat
      for jobID in result['Value']['Successful']:
        request = result['Value']['Successful'][jobID]
        requestID = request.RequestID
        dbStatus = self.reqClient.getRequestStatus(requestID).get('Value', 'Unknown')
        for job in jobChunk:
          if job.jobID == jobID:
            job.pendingRequest = dbStatus not in ('Done', 'Canceled')
            self.log.notice('Found %s request for job %d' % ('pending' if job.pendingRequest else 'finished', jobID))
            break

  def checkAllJobs(self, jobs, tInfo, tasksDict=None, lfnTaskDict=None):
    """run over all jobs and do checks"""
    fileJobDict = defaultdict(list)
    counter = 0
    nJobs = len(jobs)
    self.setPendingRequests(jobs)
    lfnExistence = self.getLFNStatus(jobs)
    self.log.notice('Running over all the jobs')
    for counter, job in enumerate(jobs.values()):
      if counter % self.printEveryNJobs == 0:
        self.log.notice('%d/%d: %3.1fs' % (counter, nJobs, float(time.time() - self.startTime)))
      while True:
        try:
          if job.pendingRequest:
            self.log.warn('Job has Pending requests:\n%s' % job)
            break
          job.checkFileExistence(lfnExistence)
          if tasksDict and lfnTaskDict:
            try:
              job.getTaskInfo(tasksDict, lfnTaskDict)
            except TaskInfoException as e:
              self.log.error(" Skip Task, due to TaskInfoException: %s" % e )
              if job.inputFile is None and not job.tType.startswith( "MCGeneration" ):
                self.__failJobHard(job, tInfo)
              break
            fileJobDict[job.inputFile].append( job.jobID )
          self.checkJob( job, tInfo )
          break # get out of the while loop
        except RuntimeError as e:
          self.log.error( "+++++ Failure for job: %d " % job.jobID )
          self.log.error( "+++++ Exception: ", str(e) )
          ## runs these again because of RuntimeError

  def printSummary( self ):
    """print summary of changes"""
    self.log.notice( "Summary:" )
    for do in itertools.chain.from_iterable(self.todo.values()):
      message = "%s: %s" % ( do['ShortMessage'].ljust(56), str(do['Counter']).rjust(5) )
      self.log.notice( message )
      if self.notesToSend:
        self.notesToSend = str(message)+'\n' + self.notesToSend

  def __resetCounters( self ):
    """ reset counters for modified jobs """
    for _name,checks in self.todo.iteritems():
      for do in checks:
        do['Counter'] = 0


  def __failJobHard( self, job, tInfo ):
    """ set job to failed and remove output files if there are any """
    if job.inputFile is not None:
      return
    if job.status in ("Failed",) \
       and job.allFilesMissing():
      return
    self.log.notice( "Failing job hard %s" % job )
    self.notesToSend += "Failing job %s: no input file?\n" % job.jobID
    self.notesToSend += str(job)+'\n'
    self.todo['OtherProductions'][-1]['Counter'] += 1
    job.cleanOutputs(tInfo)
    job.setJobFailed(tInfo)
    # if job.inputFile is not None:
    #   job.setInputDeleted(tInfo)

  def __notOnlyKeepers( self, transType ):
    """check of we only have 'Keep' messages

    in this case we do not have to send report email or run again next time

    """
    if transType.startswith('MCGeneration'):
      return True

    checks = self.todo['OtherProductions']
    totalCount = 0
    for check in checks[1:]:
      totalCount += check['Counter']

    return totalCount > 0
Esempio n. 28
0
class TransformationAgent( AgentModule ):

  def initialize( self ):
    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/ProductionManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    return S_OK()

  def execute( self ):
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "%s.execute: Failed to obtain transformations: %s" % ( AGENT_NAME, res['Message'] ) )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "%s.execute: Processing transformation %s." % ( AGENT_NAME, transID ) )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "%s.execute: Failed to process transformation: %s" % ( AGENT_NAME, res['Message'] ) )
      else:
        gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % ( AGENT_NAME, time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    # Obtain the transformations to be executed
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME )
      res = self.transDB.getTransformations( {'Status':['Active', 'Completing', 'Flush']}, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % ( AGENT_NAME, len( transformations ) ) )
    else:
      gLogger.info( "%s.getTransformations: Initializing for transformation %s." % ( AGENT_NAME, transName ) )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']
    if not lfns:
      gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
        else:
          gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
      return S_OK()

    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = ( transDict['Type'].lower() not in ["replication", "removal"] ) )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % ( AGENT_NAME, plugin ) )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message'] )
        allCreated = False
      else:
        created += 1
    if created:
      gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % ( AGENT_NAME, created ) )

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
      else:
        gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x )
      return S_ERROR()
    try:
      evalString = "plugModule.TransformationPlugin('%s')" % plugin
      return S_OK( eval( evalString ) )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % ( AGENT_NAME, plugin ), '', x )
      return S_ERROR()
Esempio n. 29
0
class TransformationAgent(AgentModule):
    def initialize(self):
        """ standard init
    """
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        self.maxFiles = self.am_getOption('MaxFiles', 5000)

        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        self.unusedFiles = {}
        return S_OK()

    def execute(self):
        """ get and process the transformations to be processed
    """
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("execute: Failed to obtain transformations: %s" %
                         res['Message'])
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("execute: Processing transformation %s." % transID)
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info("execute: Failed to process transformation: %s" %
                             res['Message'])
            else:
                gLogger.info(
                    "execute: Processed transformation in %.1f seconds" %
                    (time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed 
    """
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "getTransformations: Initializing general purpose agent.")
            res = self.transDB.getTransformations(
                {'Status': self.transformationStatus}, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformations: %s" %
                    res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "getTransformations: Obtained %d transformations to process" %
                len(transformations))
        else:
            gLogger.info(
                "getTransformations: Initializing for transformation %s." %
                transName)
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformation: %s." %
                    res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to obtain input data: %s." %
                res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']

        if not lfns:
            gLogger.info(
                "processTransformation: No 'Unused' files found for transformation."
            )
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "processTransformation: Failed to update transformation status to 'Active': %s."
                        % res['Message'])
                else:
                    gLogger.info(
                        "processTransformation: Updated transformation status to 'Active'."
                    )
            return S_OK()
        #Check if something new happened
        if len(lfns) == self.unusedFiles.get(
                transID, 0) and transDict['Status'] != 'Flush':
            gLogger.info(
                "processTransformation: No new 'Unused' files found for transformation."
            )
            return S_OK()

        replicateOrRemove = transDict['Type'].lower() in [
            "replication", "removal"
        ]
        # Limit the number of LFNs to be considered for replication or removal as they are treated individually
        if replicateOrRemove:
            lfns = lfns[0:self.maxFiles - 1]
        unusedFiles = len(lfns)
        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=not replicateOrRemove)
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to get data replicas: %s" %
                res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "processTransformation: Processing transformation with '%s' plug-in."
            % plugin)
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to generate tasks for transformation: %s"
                % res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to add task generated by plug-in: %s."
                    % res['Message'])
                allCreated = False
            else:
                created += 1
                unusedFiles -= len(lfns)
        if created:
            gLogger.info(
                "processTransformation: Successfully created %d tasks for transformation."
                % created)
        self.unusedFiles[transID] = unusedFiles

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to update transformation status to 'Active': %s."
                    % res['Message'])
            else:
                gLogger.info(
                    "processTransformation: Updated transformation status to 'Active'."
                )
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except ImportError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s"
                % (plugin, e))
            return S_ERROR()
        try:
            plugin_o = getattr(plugModule, 'TransformationPlugin')(
                '%s' % plugin,
                transClient=self.transDB,
                replicaManager=self.rm)
            return S_OK(plugin_o)
        except AttributeError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to create %s(): %s." %
                (plugin, e))
            return S_ERROR()
Esempio n. 30
0
class TaskManagerAgentBase(AgentModule, TransformationAgentsUtilities):
    """To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent."""

    def __init__(self, *args, **kwargs):
        """c'tor

        Always call this in the extension agent
        """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        self.transClient = None
        self.jobManagerClient = None
        self.transType = []

        self.tasksPerLoop = 50
        self.maxParametricJobs = 20  # will be updated in execute()

        # credentials
        self.shifterProxy = None
        self.credentials = None
        self.credTuple = (None, None, None)

        self.pluginLocation = ""
        self.bulkSubmissionFlag = False

    #############################################################################

    def initialize(self):
        """Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
        """

        gMonitor.registerActivity(
            "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks", gMonitor.OP_ACUM
        )

        self.pluginLocation = self.am_getOption("PluginLocation", "DIRAC.TransformationSystem.Client.TaskManagerPlugin")

        # Default clients
        self.transClient = TransformationClient()
        self.jobManagerClient = JobManagerClient()

        # Bulk submission flag
        self.bulkSubmissionFlag = self.am_getOption("BulkSubmission", self.bulkSubmissionFlag)

        # Shifter credentials to use, could replace the use of shifterProxy eventually
        self.shifterProxy = self.am_getOption("shifterProxy", self.shifterProxy)
        self.credentials = self.am_getOption("ShifterCredentials", self.credentials)
        resCred = self.__getCredentials()
        if not resCred["OK"]:
            return resCred
        # setting up the threading
        maxNumberOfThreads = self.am_getOption("maxNumberOfThreads", 15)
        self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

        self.threadPoolExecutor = concurrent.futures.ThreadPoolExecutor(max_workers=maxNumberOfThreads)

        return S_OK()

    def finalize(self):
        """graceful finalization"""
        method = "finalize"
        self._logInfo("Wait for threads to get empty before terminating the agent", method=method)
        self.threadPoolExecutor.shutdown()
        self._logInfo("Threads are empty, terminating the agent...", method=method)
        return S_OK()

    def execute(self):
        """The execution method is transformations that need to be processed"""

        # 1. determining which credentials will be used for the submission
        owner, ownerGroup, ownerDN = None, None, None
        # getting the credentials for submission
        resProxy = getProxyInfo(proxy=False, disableVOMS=False)
        if resProxy["OK"]:  # there is a shifterProxy
            proxyInfo = resProxy["Value"]
            owner = proxyInfo["username"]
            ownerGroup = proxyInfo["group"]
            ownerDN = proxyInfo["identity"]
            self.log.info("ShifterProxy: Tasks will be submitted with the credentials %s:%s" % (owner, ownerGroup))
        elif self.credentials:
            owner, ownerGroup, ownerDN = self.credTuple
        else:
            self.log.info("Using per Transformation Credentials!")

        # 2. Determining which operations to do on each transformation
        self.operationsOnTransformationDict = {}  # key: TransID. Value: dict with body, and list of operations

        # 2.1 Determine whether the task status is to be monitored and updated
        if not self.am_getOption("MonitorTasks", ""):
            self.log.verbose("Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option")
        else:
            # Get the transformations for which the tasks have to be updated
            status = self.am_getOption(
                "UpdateTasksTransformationStatus",
                self.am_getOption("UpdateTasksStatus", ["Active", "Completing", "Stopped"]),
            )
            transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
            if not transformations["OK"]:
                self.log.warn("Could not select transformations:", transformations["Message"])
            else:
                self._addOperationForTransformations(
                    self.operationsOnTransformationDict,
                    "updateTaskStatus",
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN,
                )

        # 2.2. Determine whether the task files status is to be monitored and updated
        if not self.am_getOption("MonitorFiles", ""):
            self.log.verbose("Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option")
        else:
            # Get the transformations for which the files have to be updated
            status = self.am_getOption(
                "UpdateFilesTransformationStatus",
                self.am_getOption("UpdateFilesStatus", ["Active", "Completing", "Stopped"]),
            )
            transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
            if not transformations["OK"]:
                self.log.warn("Could not select transformations:", transformations["Message"])
            else:
                self._addOperationForTransformations(
                    self.operationsOnTransformationDict,
                    "updateFileStatus",
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN,
                )

        # Determine whether the checking of reserved tasks is to be performed
        if not self.am_getOption("CheckReserved", ""):
            self.log.verbose("Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option")
        else:
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption(
                "CheckReservedTransformationStatus",
                self.am_getOption("CheckReservedStatus", ["Active", "Completing", "Stopped"]),
            )
            transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
            if not transformations["OK"]:
                self.log.warn("Could not select transformations:", transformations["Message"])
            else:
                self._addOperationForTransformations(
                    self.operationsOnTransformationDict,
                    "checkReservedTasks",
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN,
                )

        # Determine whether the submission of tasks is to be performed
        if not self.am_getOption("SubmitTasks", "yes"):
            self.log.verbose("Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option")
        else:
            # Get the transformations for which the submission of tasks have to be performed
            status = self.am_getOption(
                "SubmitTransformationStatus", self.am_getOption("SubmitStatus", ["Active", "Completing"])
            )
            transformations = self._selectTransformations(transType=self.transType, status=status)
            if not transformations["OK"]:
                self.log.warn("Could not select transformations:", transformations["Message"])
            else:
                # Get the transformations which should be submitted
                self.tasksPerLoop = self.am_getOption("TasksPerLoop", self.tasksPerLoop)
                res = self.jobManagerClient.getMaxParametricJobs()
                if not res["OK"]:
                    self.log.warn("Could not get the maxParametricJobs from JobManager", res["Message"])
                else:
                    self.maxParametricJobs = res["Value"]

                self._addOperationForTransformations(
                    self.operationsOnTransformationDict,
                    "submitTasks",
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN,
                )

        # now call _execute...
        future_to_transID = {}
        for transID, transDict in self.operationsOnTransformationDict.items():
            future = self.threadPoolExecutor.submit(self._execute, transDict)
            future_to_transID[future] = transID

        for future in concurrent.futures.as_completed(future_to_transID):
            transID = future_to_transID[future]
            try:
                future.result()
            except Exception as exc:
                self._logError("%d generated an exception: %s" % (transID, exc))
            else:
                self._logInfo("Processed %d" % transID)

        return S_OK()

    def _selectTransformations(self, transType=None, status=None, agentType=None):
        """get the transformations"""
        if status is None:
            status = ["Active", "Completing"]
        if agentType is None:
            agentType = ["Automatic"]
        selectCond = {}
        if status:
            selectCond["Status"] = status
        if transType is not None:
            selectCond["Type"] = transType
        if agentType:
            selectCond["AgentType"] = agentType
        res = self.transClient.getTransformations(condDict=selectCond)
        if not res["OK"]:
            self.log.error("Failed to get transformations:", res["Message"])
        elif not res["Value"]:
            self.log.verbose("No transformations found")
        else:
            self.log.verbose("Obtained %d transformations" % len(res["Value"]))
        return res

    #############################################################################

    def _getClients(self, ownerDN=None, ownerGroup=None):
        """Returns the clients used in the threads

        This is another function that should be extended.

        The clients provided here are defaults, and should be adapted

        If ownerDN and ownerGroup are not None the clients will delegate to these credentials

        :param str ownerDN: DN of the owner of the submitted jobs
        :param str ownerGroup: group of the owner of the submitted jobs
        :returns: dict of Clients
        """
        threadTransformationClient = TransformationClient()
        threadTaskManager = WorkflowTasks(ownerDN=ownerDN, ownerGroup=ownerGroup)
        threadTaskManager.pluginLocation = self.pluginLocation

        return {"TransformationClient": threadTransformationClient, "TaskManager": threadTaskManager}

    def _execute(self, transDict):
        """This is what runs inside the threads, in practice this is the function that does the real stuff"""
        # Each thread will have its own clients if we use credentials/shifterProxy
        clients = (
            self._getClients()
            if self.shifterProxy
            else self._getClients(ownerGroup=self.credTuple[1], ownerDN=self.credTuple[2])
            if self.credentials
            else None
        )

        method = "_execute"
        operation = "None"

        startTime = time.time()

        try:
            transID = transDict["TransformationID"]
            operations = transDict["Operations"]
            if not (self.credentials or self.shifterProxy):
                ownerDN, group = transDict["OwnerDN"], transDict["OwnerGroup"]
                clients = self._getClients(ownerDN=ownerDN, ownerGroup=group)
            self._logInfo("Start processing transformation", method=method, transID=transID)
            for operation in operations:
                self._logInfo("Executing %s" % operation, method=method, transID=transID)
                startOperation = time.time()
                res = getattr(self, operation)(transDict, clients)
                if not res["OK"]:
                    self._logError(
                        "Failed to execute '%s': %s" % (operation, res["Message"]), method=method, transID=transID
                    )
                self._logInfo(
                    "Executed %s in %.1f seconds" % (operation, time.time() - startOperation),
                    method=method,
                    transID=transID,
                )
        except Exception as x:  # pylint: disable=broad-except
            self._logException(
                "Exception executing operation %s" % operation, lException=x, method=method, transID=transID
            )
        finally:
            self._logInfo(
                "Processed transformation in %.1f seconds" % (time.time() - startTime), method=method, transID=transID
            )

    #############################################################################
    # real operations done

    def updateTaskStatus(self, transDict, clients):
        """Updates the task status"""
        transID = transDict["TransformationID"]
        method = "updateTaskStatus"

        # Get the tasks which are in an UPDATE state, i.e. job statuses + request-specific statuses
        updateStatus = self.am_getOption(
            "TaskUpdateStatus",
            [
                JobStatus.CHECKING,
                JobStatus.DELETED,
                JobStatus.KILLED,
                JobStatus.STAGING,
                JobStatus.STALLED,
                JobStatus.MATCHED,
                JobStatus.RESCHEDULED,
                JobStatus.COMPLETING,
                JobStatus.COMPLETED,
                JobStatus.SUBMITTING,
                JobStatus.RECEIVED,
                JobStatus.WAITING,
                JobStatus.RUNNING,
                "Scheduled",
                "Assigned",
            ],
        )
        condDict = {"TransformationID": transID, "ExternalStatus": updateStatus}
        timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))

        # Get transformation tasks
        transformationTasks = clients["TransformationClient"].getTransformationTasks(
            condDict=condDict, older=timeStamp, timeStamp="LastUpdateTime"
        )
        if not transformationTasks["OK"]:
            self._logError(
                "Failed to get tasks to update:", transformationTasks["Message"], method=method, transID=transID
            )
            return transformationTasks
        if not transformationTasks["Value"]:
            self._logVerbose("No tasks found to update", method=method, transID=transID)
            return transformationTasks

        # Get status for the transformation tasks
        chunkSize = self.am_getOption("TaskUpdateChunkSize", 0)
        try:
            chunkSize = int(chunkSize)
        except ValueError:
            chunkSize = 0
        if chunkSize:
            self._logVerbose(
                "Getting %d tasks status (chunks of %d)" % (len(transformationTasks["Value"]), chunkSize),
                method=method,
                transID=transID,
            )
        else:
            self._logVerbose(
                "Getting %d tasks status" % len(transformationTasks["Value"]), method=method, transID=transID
            )
        updated = {}
        for nb, taskChunk in enumerate(
            breakListIntoChunks(transformationTasks["Value"], chunkSize)
            if chunkSize
            else [transformationTasks["Value"]]
        ):
            submittedTaskStatus = clients["TaskManager"].getSubmittedTaskStatus(taskChunk)
            if not submittedTaskStatus["OK"]:
                self._logError(
                    "Failed to get updated task states:", submittedTaskStatus["Message"], method=method, transID=transID
                )
                return submittedTaskStatus
            statusDict = submittedTaskStatus["Value"]
            if not statusDict:
                self._logVerbose("%4d: No tasks to update" % nb, method=method, transID=transID)

            # Set status for tasks that changes
            for status, taskIDs in statusDict.items():
                self._logVerbose(
                    "%4d: Updating %d task(s) to %s" % (nb, len(taskIDs), status), method=method, transID=transID
                )
                setTaskStatus = clients["TransformationClient"].setTaskStatus(transID, taskIDs, status)
                if not setTaskStatus["OK"]:
                    self._logError(
                        "Failed to update task status for transformation:",
                        setTaskStatus["Message"],
                        method=method,
                        transID=transID,
                    )
                    return setTaskStatus
                updated[status] = updated.setdefault(status, 0) + len(taskIDs)

        for status, nb in updated.items():
            self._logInfo("Updated %d tasks to status %s" % (nb, status), method=method, transID=transID)
        return S_OK()

    def updateFileStatus(self, transDict, clients):
        """Update the files status"""
        transID = transDict["TransformationID"]
        method = "updateFileStatus"

        timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))

        # get transformation files
        condDict = {"TransformationID": transID, "Status": ["Assigned"]}
        transformationFiles = clients["TransformationClient"].getTransformationFiles(
            condDict=condDict, older=timeStamp, timeStamp="LastUpdate"
        )
        if not transformationFiles["OK"]:
            self._logError(
                "Failed to get transformation files to update:",
                transformationFiles["Message"],
                method=method,
                transID=transID,
            )
            return transformationFiles
        if not transformationFiles["Value"]:
            self._logInfo("No files to be updated", method=method, transID=transID)
            return transformationFiles

        # Get the status of the transformation files
        # Sort the files by taskID
        taskFiles = {}
        for fileDict in transformationFiles["Value"]:
            taskFiles.setdefault(fileDict["TaskID"], []).append(fileDict)

        chunkSize = 100
        self._logVerbose(
            "Getting file status for %d tasks (chunks of %d)" % (len(taskFiles), chunkSize),
            method=method,
            transID=transID,
        )
        updated = {}
        # Process 100 tasks at a time
        for nb, taskIDs in enumerate(breakListIntoChunks(taskFiles, chunkSize)):
            fileChunk = []
            for taskID in taskIDs:
                fileChunk += taskFiles[taskID]
            submittedFileStatus = clients["TaskManager"].getSubmittedFileStatus(fileChunk)
            if not submittedFileStatus["OK"]:
                self._logError(
                    "Failed to get updated file states for transformation:",
                    submittedFileStatus["Message"],
                    method=method,
                    transID=transID,
                )
                return submittedFileStatus
            statusDict = submittedFileStatus["Value"]
            if not statusDict:
                self._logVerbose("%4d: No file states to be updated" % nb, method=method, transID=transID)
                continue

            # Set the status of files
            fileReport = FileReport(server=clients["TransformationClient"].getServer())
            for lfn, status in statusDict.items():
                updated[status] = updated.setdefault(status, 0) + 1
                setFileStatus = fileReport.setFileStatus(transID, lfn, status)
                if not setFileStatus["OK"]:
                    return setFileStatus
            commit = fileReport.commit()
            if not commit["OK"]:
                self._logError(
                    "Failed to update file states for transformation:",
                    commit["Message"],
                    method=method,
                    transID=transID,
                )
                return commit
            else:
                self._logVerbose(
                    "%4d: Updated the states of %d files" % (nb, len(commit["Value"])), method=method, transID=transID
                )

        for status, nb in updated.items():
            self._logInfo("Updated %d files to status %s" % (nb, status), method=method, transID=transID)
        return S_OK()

    def checkReservedTasks(self, transDict, clients):
        """Checking Reserved tasks"""
        transID = transDict["TransformationID"]
        method = "checkReservedTasks"

        # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
        condDict = {"TransformationID": transID, "ExternalStatus": "Reserved"}
        time_stamp_older = str(datetime.datetime.utcnow() - datetime.timedelta(hours=1))

        res = clients["TransformationClient"].getTransformationTasks(condDict=condDict, older=time_stamp_older)
        self._logDebug("getTransformationTasks(%s) return value:" % condDict, res, method=method, transID=transID)
        if not res["OK"]:
            self._logError("Failed to get Reserved tasks:", res["Message"], method=method, transID=transID)
            return res
        if not res["Value"]:
            self._logVerbose("No Reserved tasks found", transID=transID)
            return res
        reservedTasks = res["Value"]

        # Update the reserved tasks
        res = clients["TaskManager"].updateTransformationReservedTasks(reservedTasks)
        self._logDebug(
            "updateTransformationReservedTasks(%s) return value:" % reservedTasks, res, method=method, transID=transID
        )
        if not res["OK"]:
            self._logError(
                "Failed to update transformation reserved tasks:", res["Message"], method=method, transID=transID
            )
            return res
        noTasks = res["Value"]["NoTasks"]
        taskNameIDs = res["Value"]["TaskNameIDs"]

        # For the tasks with no associated request found re-set the status of the task in the transformationDB
        if noTasks:
            self._logInfo(
                "Resetting status of %d tasks to Created as no associated job/request found" % len(noTasks),
                method=method,
                transID=transID,
            )
            for taskName in noTasks:
                transID, taskID = self._parseTaskName(taskName)
                res = clients["TransformationClient"].setTaskStatus(transID, taskID, "Created")
                if not res["OK"]:
                    self._logError(
                        "Failed to update task status and ID after recovery:",
                        "%s %s" % (taskName, res["Message"]),
                        method=method,
                        transID=transID,
                    )
                    return res

        # For the tasks for which an associated request was found update the task details in the transformationDB
        for taskName, extTaskID in taskNameIDs.items():
            transID, taskID = self._parseTaskName(taskName)
            self._logInfo(
                "Setting status of %s to Submitted with ID %s" % (taskName, extTaskID), method=method, transID=transID
            )
            setTaskStatusAndWmsID = clients["TransformationClient"].setTaskStatusAndWmsID(
                transID, taskID, "Submitted", str(extTaskID)
            )
            if not setTaskStatusAndWmsID["OK"]:
                self._logError(
                    "Failed to update task status and ID after recovery:",
                    "%s %s" % (taskName, setTaskStatusAndWmsID["Message"]),
                    method=method,
                    transID=transID,
                )
                return setTaskStatusAndWmsID

        return S_OK()

    def submitTasks(self, transDict, clients):
        """Submit the tasks to an external system, using the taskManager provided

        :param dict transIDOPBody: transformation body
        :param dict clients: dictionary of client objects

        :return: S_OK/S_ERROR
        """
        transID = transDict["TransformationID"]
        transBody = transDict["Body"]
        owner = transDict["Owner"]
        ownerGroup = transDict["OwnerGroup"]
        ownerDN = transDict["OwnerDN"]
        method = "submitTasks"

        # Get all tasks to submit
        tasksToSubmit = clients["TransformationClient"].getTasksToSubmit(transID, self.tasksPerLoop)
        self._logDebug(
            "getTasksToSubmit(%s, %s) return value:" % (transID, self.tasksPerLoop),
            tasksToSubmit,
            method=method,
            transID=transID,
        )
        if not tasksToSubmit["OK"]:
            self._logError("Failed to obtain tasks:", tasksToSubmit["Message"], method=method, transID=transID)
            return tasksToSubmit
        tasks = tasksToSubmit["Value"]["JobDictionary"]
        if not tasks:
            self._logVerbose("No tasks found for submission", method=method, transID=transID)
            return tasksToSubmit
        self._logInfo("Obtained %d tasks for submission" % len(tasks), method=method, transID=transID)

        # Prepare tasks and submits them, by chunks
        chunkSize = self.maxParametricJobs if self.bulkSubmissionFlag else self.tasksPerLoop
        for taskDictChunk in breakDictionaryIntoChunks(tasks, chunkSize):
            res = self._prepareAndSubmitAndUpdateTasks(
                transID, transBody, taskDictChunk, owner, ownerDN, ownerGroup, clients
            )
            if not res["OK"]:
                return res
            self._logVerbose(
                "Submitted %d jobs, bulkSubmissionFlag = %s" % (len(taskDictChunk), self.bulkSubmissionFlag)
            )

        return S_OK()

    def _prepareAndSubmitAndUpdateTasks(self, transID, transBody, tasks, owner, ownerDN, ownerGroup, clients):
        """prepare + submit + monitor a dictionary of tasks

        :param int transID: transformation ID
        :param str transBody: transformation job template
        :param dict tasks: dictionary of per task parameters
        :param str owner: owner of the transformation
        :param str ownerDN: DN of the owner of the transformation
        :param str ownerGroup: group of the owner of the transformation
        :param dict clients: dictionary of client objects

        :return: S_OK/S_ERROR
        """

        method = "_prepareAndSubmitAndUpdateTasks"
        # prepare tasks
        preparedTransformationTasks = clients["TaskManager"].prepareTransformationTasks(
            transBody, tasks, owner, ownerGroup, ownerDN, self.bulkSubmissionFlag
        )
        self._logDebug(
            "prepareTransformationTasks return value:", preparedTransformationTasks, method=method, transID=transID
        )
        if not preparedTransformationTasks["OK"]:
            self._logError(
                "Failed to prepare tasks", preparedTransformationTasks["Message"], method=method, transID=transID
            )
            return preparedTransformationTasks

        # Submit tasks
        res = clients["TaskManager"].submitTransformationTasks(preparedTransformationTasks["Value"])
        self._logDebug("submitTransformationTasks return value:", res, method=method, transID=transID)
        if not res["OK"]:
            self._logError("Failed to submit prepared tasks:", res["Message"], method=method, transID=transID)
            return res

        # Update tasks after submission
        res = clients["TaskManager"].updateDBAfterTaskSubmission(res["Value"])
        self._logDebug("updateDBAfterTaskSubmission return value:", res, method=method, transID=transID)
        if not res["OK"]:
            self._logError("Failed to update DB after task submission:", res["Message"], method=method, transID=transID)
            return res

        return S_OK()

    @staticmethod
    def _addOperationForTransformations(
        operationsOnTransformationDict,
        operation,
        transformations,
        owner=None,
        ownerGroup=None,
        ownerDN=None,
    ):
        """Fill the operationsOnTransformationDict"""

        transformationIDsAndBodies = (
            (
                transformation["TransformationID"],
                transformation["Body"],
                transformation["AuthorDN"],
                transformation["AuthorGroup"],
            )
            for transformation in transformations["Value"]
        )
        for transID, body, t_ownerDN, t_ownerGroup in transformationIDsAndBodies:
            if transID in operationsOnTransformationDict:
                operationsOnTransformationDict[transID]["Operations"].append(operation)
            else:
                operationsOnTransformationDict[transID] = {
                    "TransformationID": transID,
                    "Body": body,
                    "Operations": [operation],
                    "Owner": owner if owner else getUsernameForDN(t_ownerDN)["Value"],
                    "OwnerGroup": ownerGroup if owner else t_ownerGroup,
                    "OwnerDN": ownerDN if owner else t_ownerDN,
                }

    def __getCredentials(self):
        """Get the credentials to use if ShifterCredentials are set, otherwise do nothing.

        This function fills the self.credTuple tuple.
        """
        if not self.credentials:
            return S_OK()
        resCred = Operations().getOptionsDict("/Shifter/%s" % self.credentials)
        if not resCred["OK"]:
            self.log.error("Cred: Failed to find shifter credentials", self.credentials)
            return resCred
        owner = resCred["Value"]["User"]
        ownerGroup = resCred["Value"]["Group"]
        # returns  a list
        ownerDN = getDNForUsername(owner)["Value"][0]
        self.credTuple = (owner, ownerGroup, ownerDN)
        self.log.info("Cred: Tasks will be submitted with the credentials %s:%s" % (owner, ownerGroup))
        return S_OK()
Esempio n. 31
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                          'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                          'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                               'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception, x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)
class TransformationCleaningAgent(AgentModule):
  """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)

    self.shifterProxy = None

    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
    # # transformation metadata
    self.transfidmeta = 'TransformationID'
    # # archive periof in days
    self.archiveAfter = 7
    # # transformation log SEs
    self.logSE = 'LogSE'
    # # enable/disable execution
    self.enableFlag = 'True'

    self.dataProcTTypes = ['MCSimulation', 'Merge']
    self.dataManipTTypes = ['Replication', 'Removal']

  def initialize(self):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    # See cleanContent method: this proxy will be used ALSO when the file catalog used
    # is the DIRAC File Catalog (DFC).
    # This is possible because of unset of the "UseServerCertificate" option
    self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy)

    # # transformations types
    self.dataProcTTypes = Operations().getValue('Transformations/DataProcessing', self.dataProcTTypes)
    self.dataManipTTypes = Operations().getValue('Transformations/DataManipulation', self.dataManipTTypes)
    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes)
    self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes))
    # # directory locations
    self.directoryLocations = sorted(self.am_getOption('DirectoryLocations', self.directoryLocations))
    self.log.info("Will search for directories in the following locations: %s" % str(self.directoryLocations))
    # # transformation metadata
    self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta)
    self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta)
    # # archive periof in days
    self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter)  # days
    self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter)
    # # transformation log SEs
    self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
    self.log.info("Will remove logs found on storage element: %s" % self.logSE)

    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute(self):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
    if self.enableFlag != 'True':
      self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag')
      return S_OK('Disabled via CS flag')

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations({'Status': 'Cleaning',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeClean(transDict)
        else:
          self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeClean)(transDict,
                                                   proxyUserDN=transDict['AuthorDN'],
                                                   proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Failed to get transformations", res['Message'])

    # Obtain the transformations in RemovingFiles status and removes the output files
    res = self.transClient.getTransformations({'Status': 'RemovingFiles',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeRemoval(transDict)
        else:
          self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeRemoval)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
    res = self.transClient.getTransformations({'Status': 'Completed',
                                               'Type': self.transformationTypes},
                                              older=olderThanTime,
                                              timeStamp='LastUpdate')
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeArchive(transDict)
        else:
          self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeArchive)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])
    return S_OK()

  def _executeClean(self, transDict):
    """Clean transformation."""
    # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
    # We just archive
    if transDict['Type'] in self.dataManipTTypes:
      res = self.archiveTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                     res['Message']))
    else:
      res = self.cleanTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems cleaning transformation %s: %s" % (transDict['TransformationID'],
                                                                    res['Message']))

  def _executeRemoval(self, transDict):
    """Remove files from given transformation."""
    res = self.removeTransformationOutput(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems removing transformation %s: %s" % (transDict['TransformationID'],
                                                                  res['Message']))

  def _executeArchive(self, transDict):
    """Archive the given transformation."""
    res = self.archiveTransformation(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                   res['Message']))

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories(self, transID):
    """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.verbose("Cleaning Transformation directories of transformation %d" % transID)
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters(transID, ['OutputDirectories'])
      if not res['OK']:
        self.log.error("Failed to obtain transformation directories", res['Message'])
        return res
      transDirectories = []
      if res['Value']:
        if not isinstance(res['Value'], list):
          try:
            transDirectories = ast.literal_eval(res['Value'])
          except BaseException:
            # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
            transDirectories.append(res['Value'])
        else:
          transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata({self.transfidmeta: transID})
      if not res['OK']:
        self.log.error("Failed to obtain metadata catalog directories", res['Message'])
        return res
      transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if not directories:
      self.log.info("No output directories found")
    directories = sorted(directories)
    return S_OK(directories)

  @classmethod
  def _addDirs(cls, transID, newDirs, existingDirs):
    """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str(transID).zfill(8)
      if re.search(transStr, str(folder)):
        if folder not in existingDirs:
          existingDirs.append(os.path.normpath(folder))
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanContent(self, directory):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    self.log.verbose("Cleaning Catalog contents")
    res = self.__getCatalogDirectoryContents([directory])
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info("No files are registered in the catalog directory %s" % directory)
      return S_OK()
    self.log.info("Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound))

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(filesFound, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str(reason):
        self.log.warn("File %s not found in some catalog: " % (lfn))
      else:
        self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason))
        realFailure = True
    if realFailure:
      return S_ERROR("Failed to remove all files found in the catalog")
    return S_OK()

  def __getCatalogDirectoryContents(self, directories):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info('Obtaining the catalog contents for %d directories:' % len(directories))
    for directory in directories:
      self.log.info(directory)
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while activeDirs:
      currentDir = activeDirs[0]
      res = returnSingleResult(fc.listDirectory(currentDir))
      activeDirs.remove(currentDir)
      if not res['OK'] and 'Directory does not exist' in res['Message']:  # FIXME: DFC should return errno
        self.log.info("The supplied directory %s does not exist" % currentDir)
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info("%s: %s" % (currentDir, res['Message']))
        else:
          self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message']))
      else:
        dirContents = res['Value']
        activeDirs.extend(dirContents['SubDirs'])
        allFiles.update(dirContents['Files'])
    self.log.info("Found %d files" % len(allFiles))
    return S_OK(allFiles.keys())

  def cleanTransformationLogFiles(self, directory):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.verbose("Removing log files found in the directory %s" % directory)
    res = returnSingleResult(StorageElement(self.logSE).removeDirectory(directory, recursive=True))
    if not res['OK']:
      if cmpError(res, errno.ENOENT):  # No such file or directory
        self.log.warn("Transformation log directory does not exist", directory)
        return S_OK()
      self.log.error("Failed to remove log files", res['Message'])
      return res
    self.log.info("Successfully removed transformation log directory")
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput(self, transID):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info("Removing output data for transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search('/LOG/', directory):
        res = self.cleanContent(directory)
        if not res['OK']:
          return res

    self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" % (len(directories), transID))
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully removed output of transformation %d" % transID)
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter(transID, 'Status', 'RemovedFiles')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to RemovedFiles" % (transID))
    return S_OK()

  def archiveTransformation(self, transID):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info("Archiving transformation %s" % transID)
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully archived transformation %d" % transID)
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Archived')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Archived" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Archived" % (transID))
    return S_OK()

  def cleanTransformation(self, transID):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info("Cleaning transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search('/LOG/', directory):
        res = self.cleanTransformationLogFiles(directory)
        if not res['OK']:
          return res
      res = self.cleanContent(directory)
      if not res['OK']:
        return res

    # Clean ALL the possible remnants found
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully cleaned transformation %d" % transID)
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Cleaned')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Cleaned" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Cleaned" % (transID))
    return S_OK()

  def cleanMetadataCatalogFiles(self, transID):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata({self.transfidmeta: transID})
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info('No files found for transID %s' % transID)
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(fileToRemove, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason))
    if res['Value']['Failed']:
      return S_ERROR("Failed to remove all files found in the metadata catalog")
    self.log.info("Successfully removed all files found in the BK")
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks(self, transID):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    self.log.verbose("Cleaning Transformation tasks of transformation %d" % transID)
    res = self.__getTransformationExternalIDs(transID)
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters(transID, ['Type'])
      if not res['OK']:
        self.log.error("Failed to determine transformation type")
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks(externalIDs)
      else:
        res = self.__removeRequests(externalIDs)
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs(self, transID):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks(condDict={'TransformationID': transID})
    if not res['OK']:
      self.log.error("Failed to get externalIDs for transformation %d" % transID, res['Message'])
      return res
    externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
    self.log.info("Found %d tasks for transformation" % len(externalIDs))
    return S_OK(externalIDs)

  def __removeRequests(self, requestIDs):
    """ This will remove requests from the RMS system -
    """
    rIDs = [int(long(j)) for j in requestIDs if long(j)]
    for reqID in rIDs:
      self.reqClient.cancelRequest(reqID)

    return S_OK()

  def __removeWMSTasks(self, transJobIDs):
    """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [int(j) for j in transJobIDs if int(j)]
    allRemove = True
    for jobList in breakListIntoChunks(jobIDs, 500):

      res = self.wmsClient.killJob(jobList)
      if res['OK']:
        self.log.info("Successfully killed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to kill %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to kill %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

      res = self.wmsClient.deleteJob(jobList)
      if res['OK']:
        self.log.info("Successfully removed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to remove %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

    if not allRemove:
      return S_ERROR("Failed to remove all remnants from WMS")
    self.log.info("Successfully removed all tasks from the WMS")

    if not jobIDs:
      self.log.info("JobIDs not present, unable to remove asociated requests.")
      return S_OK()

    failed = 0
    failoverRequests = {}
    res = self.reqClient.getRequestIDsForJobs(jobIDs)
    if not res['OK']:
      self.log.error("Failed to get requestID for jobs.", res['Message'])
      return res
    failoverRequests.update(res['Value']['Successful'])
    if not failoverRequests:
      return S_OK()
    for jobID, requestID in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.cancelRequest(requestID)
      if not res['OK']:
        self.log.error("Failed to remove request from RequestDB", res['Message'])
        failed += 1
      else:
        self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID))

    if failed:
      self.log.info("Successfully removed %s requests" % (len(failoverRequests) - failed))
      self.log.info("Failed to remove %s requests" % failed)
      return S_ERROR("Failed to remove all the request from RequestDB")
    self.log.info("Successfully removed all the associated failover requests")
    return S_OK()
Esempio n. 33
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """c'tor"""
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": six.integer_types,
            "TransformationName": six.string_types,
            "Status": six.string_types,
            "Description": six.string_types,
            "LongDescription": six.string_types,
            "Type": six.string_types,
            "Plugin": six.string_types,
            "AgentType": six.string_types,
            "FileMask": six.string_types,
            "TransformationGroup": six.string_types,
            "GroupSize": six.integer_types + (float, ),
            "InheritedFrom": six.integer_types,
            "Body": six.string_types,
            "MaxNumberOfTasks": six.integer_types,
            "EventsPerTask": six.integer_types,
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }

        # the metaquery parameters are neither part of the transformation parameters nor the additional parameters, so
        # special treatment is necessary
        self.inputMetaQuery = None
        self.outputMetaQuery = None

        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins",
            ["Broadcast", "Standard", "BySize", "ByShare"])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError("TransformationID %d does not exist" %
                                     transID)
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def setBody(self, body):
        """check that the body is a string, or using the proper syntax for multiple operations,
        or is a BodyPlugin object

        :param body: transformation body, for example

          .. code :: python

            body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                     ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
                   ]

        :type body: string or list of tuples (or lists) of string and dictionaries or a Body plugin (:py:class:`DIRAC.TransformationSystem.Client.BodyPlugin.BaseBody.BaseBody`)
        :raises TypeError: If the structure is not as expected
        :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation`
                            is used
        :returns: S_OK, S_ERROR
        """
        self.item_called = "Body"

        # Simple single operation body case
        if isinstance(body, six.string_types):
            return self.__setParam(body)

        # BodyPlugin case
        elif isinstance(body, BaseBody):
            return self.__setParam(encode(body))

        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        # MultiOperation body case
        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], six.string_types):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].items():
                if not isinstance(par, six.string_types):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if par not in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val, six.string_types + six.integer_types +
                    (float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
        return self.__setParam(json.dumps(body))

    def setInputMetaQuery(self, query):
        """Set the input meta query.

        :param dict query: dictionary to use for input meta query
        """
        self.inputMetaQuery = query
        return S_OK()

    def setOutputMetaQuery(self, query):
        """Set the output meta query.

        :param dict query: dictionary to use for output meta query
        """
        self.outputMetaQuery = query
        return S_OK()

    def __setSE(self, seParam, seList):
        if isinstance(seList, six.string_types):
            try:
                seList = eval(seList)
            except Exception:
                seList = seList.split(",")
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(list(self.paramTypes))
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if isinstance(value, self.paramTypes[self.item_called]):
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"],
                "MessageDate", "MessageDate")
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation",
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation",
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation",
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats",
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats",
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks",
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se="Unknown",
                                 printOutput=False):
        return self.__executeOperation("addTaskForTransformation",
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus",
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """gets the AuthorDN and username of the transformation from the uploaded proxy"""
        username = ""
        author = ""
        res = getProxyInfo()
        if res["OK"]:
            author = res["Value"]["identity"]
            username = res["Value"]["username"]
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res["Message"])
            return S_ERROR(res["Message"])

        res = {"username": username, "authorDN": author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(
        self,
        authorDN="",
        userName="",
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate", "AuthorDN"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res["OK"]:
                gLogger.error(res["Message"])
                return S_ERROR(res["Message"])
            else:
                foundUserName = res["Value"]["username"]
                foundAuthor = res["Value"]["authorDN"]
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ", ".join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ", ".join(transStatus)))

        condDict["AuthorDN"] = authorDN
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

        Fields starting with 'F' ('J')  refers to files (jobs).
        Proc. stand for processed.
        """
        condDict = {"TransformationID": transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = [
            "TransformationID",
            "Type",
            "Status",
            "Files_Total",
            "Files_PercentProcessed",
            "Files_Processed",
            "Files_Unused",
            "Jobs_TotalCreated",
            "Jobs_Waiting",
            "Jobs_Running",
            "Jobs_Done",
            "Jobs_Failed",
            "Jobs_Stalled",
        ]
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = [
            "TransID",
            "Type",
            "Status",
            "F_Total",
            "F_Proc.(%)",
            "F_Proc.",
            "F_Unused",
            "J_Created",
            "J_Wait",
            "J_Run",
            "J_Done",
            "J_Fail",
            "J_Stalled",
        ]
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result["OK"]:
            self._prettyPrint(result)
            return result

        if result["Value"]["TotalRecords"] > 0:
            try:
                paramNames = result["Value"]["ParameterNames"]
                for paramValues in result["Value"]["Records"]:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print("Exception %s " % str(x))

        if not len(dictList) > 0:
            gLogger.error(
                "No found transformations satisfying input condition")
            return S_ERROR(
                "No found transformations satisfying input condition")
        else:
            print(
                self._printFormattedDictList(dictList, paramShowNamesShort,
                                             paramShowNamesShort[0],
                                             paramShowNamesShort[0]))

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        """Add transformation to the transformation system.

        Sets all parameters currently assigned to the transformation.

        :param bool addFiles: if True, immediately perform input data query
        :param bool printOutput: if True, print information about transformation
        """
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
            inputMetaQuery=self.inputMetaQuery,
            outputMetaQuery=self.outputMetaQuery,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res["Message"]))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """Few checks"""
        if self.paramValues["TransformationID"]:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            "TransformationName", "Description", "LongDescription", "Type"
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = six.moves.input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if plugin not in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter("Plugin",
                                                choices=self.supportedPlugins,
                                                default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (", ".join(["SourceSE", "TargetSE"])))
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = six.moves.input("Please enter " + requiredParam +
                                             " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res,
                                     "Failed to get possible StorageElements")
        missing = set(seList) - set(res["Value"])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default="",
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
Esempio n. 34
0
class TaskManagerAgentBase(AgentModule, TransformationAgentsUtilities):
    """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """
    def __init__(self, *args, **kwargs):
        """ c'tor

        Always call this in the extension agent
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        self.transClient = None
        self.jobManagerClient = None
        self.transType = []

        self.tasksPerLoop = 50
        self.maxParametricJobs = 20  # will be updated in execute()

        # credentials
        self.shifterProxy = None
        self.credentials = None
        self.credTuple = (None, None, None)

        self.pluginLocation = ''
        self.bulkSubmissionFlag = False

        # for the threading
        self.transQueue = Queue()
        self.transInQueue = []
        self.transInThread = {}

    #############################################################################

    def initialize(self):
        """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

        gMonitor.registerActivity("SubmittedTasks",
                                  "Automatically submitted tasks",
                                  "Transformation Monitoring", "Tasks",
                                  gMonitor.OP_ACUM)

        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Client.TaskManagerPlugin')

        # Default clients
        self.transClient = TransformationClient()
        self.jobManagerClient = JobManagerClient()

        # Bulk submission flag
        self.bulkSubmissionFlag = self.am_getOption('BulkSubmission',
                                                    self.bulkSubmissionFlag)

        # Shifter credentials to use, could replace the use of shifterProxy eventually
        self.shifterProxy = self.am_getOption('shifterProxy',
                                              self.shifterProxy)
        self.credentials = self.am_getOption('ShifterCredentials',
                                             self.credentials)
        resCred = self.__getCredentials()
        if not resCred['OK']:
            return resCred
        # setting up the threading
        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', 15)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in range(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        if self.transInQueue:
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread))
            self.transInQueue = []
            while self.transInThread:
                time.sleep(2)
            self.log.info("Threads are empty, terminating the agent...")
        return S_OK()

    #############################################################################

    def execute(self):
        """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

        operationsOnTransformationDict = {}
        owner, ownerGroup, ownerDN = None, None, None
        # getting the credentials for submission
        resProxy = getProxyInfo(proxy=False, disableVOMS=False)
        if resProxy['OK']:  # there is a shifterProxy
            proxyInfo = resProxy['Value']
            owner = proxyInfo['username']
            ownerGroup = proxyInfo['group']
            ownerDN = proxyInfo['identity']
            self.log.info(
                "ShifterProxy: Tasks will be submitted with the credentials %s:%s"
                % (owner, ownerGroup))
        elif self.credentials:
            owner, ownerGroup, ownerDN = self.credTuple
        else:
            self.log.info("Using per Transformation Credentials!")

        # Determine whether the task status is to be monitored and updated
        enableTaskMonitor = self.am_getOption('MonitorTasks', '')
        if not enableTaskMonitor:
            self.log.verbose(
                "Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option"
            )
        else:
            # Get the transformations for which the tasks have to be updated
            status = self.am_getOption(
                'UpdateTasksTransformationStatus',
                self.am_getOption('UpdateTasksStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                self._addOperationForTransformations(
                    operationsOnTransformationDict,
                    'updateTaskStatus',
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN)

        # Determine whether the task files status is to be monitored and updated
        enableFileMonitor = self.am_getOption('MonitorFiles', '')
        if not enableFileMonitor:
            self.log.verbose(
                "Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option"
            )
        else:
            # Get the transformations for which the files have to be updated
            status = self.am_getOption(
                'UpdateFilesTransformationStatus',
                self.am_getOption('UpdateFilesStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                self._addOperationForTransformations(
                    operationsOnTransformationDict,
                    'updateFileStatus',
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN)

        # Determine whether the checking of reserved tasks is to be performed
        enableCheckReserved = self.am_getOption('CheckReserved', '')
        if not enableCheckReserved:
            self.log.verbose(
                "Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option"
            )
        else:
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption(
                'CheckReservedTransformationStatus',
                self.am_getOption('CheckReservedStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                self._addOperationForTransformations(
                    operationsOnTransformationDict,
                    'checkReservedTasks',
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN)

        # Determine whether the submission of tasks is to be performed
        enableSubmission = self.am_getOption('SubmitTasks', 'yes')
        if not enableSubmission:
            self.log.verbose(
                "Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option"
            )
        else:
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption(
                'SubmitTransformationStatus',
                self.am_getOption('SubmitStatus', ['Active', 'Completing']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status)
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                # Get the transformations which should be submitted
                self.tasksPerLoop = self.am_getOption('TasksPerLoop',
                                                      self.tasksPerLoop)
                res = self.jobManagerClient.getMaxParametricJobs()
                if not res['OK']:
                    self.log.warn(
                        "Could not get the maxParametricJobs from JobManager",
                        res['Message'])
                else:
                    self.maxParametricJobs = res['Value']

                self._addOperationForTransformations(
                    operationsOnTransformationDict,
                    'submitTasks',
                    transformations,
                    owner=owner,
                    ownerGroup=ownerGroup,
                    ownerDN=ownerDN)

        self._fillTheQueue(operationsOnTransformationDict)

        return S_OK()

    def _selectTransformations(self,
                               transType=None,
                               status=None,
                               agentType=None):
        """ get the transformations
    """
        if status is None:
            status = ['Active', 'Completing']
        if agentType is None:
            agentType = ['Automatic']
        selectCond = {}
        if status:
            selectCond['Status'] = status
        if transType is not None:
            selectCond['Type'] = transType
        if agentType:
            selectCond['AgentType'] = agentType
        res = self.transClient.getTransformations(condDict=selectCond)
        if not res['OK']:
            self.log.error("Failed to get transformations:", res['Message'])
        elif not res['Value']:
            self.log.verbose("No transformations found")
        else:
            self.log.verbose("Obtained %d transformations" % len(res['Value']))
        return res

    def _fillTheQueue(self, operationsOnTransformationsDict):
        """ Just fill the queue with the operation to be done on a certain transformation
    """
        count = 0
        for transID, bodyAndOps in operationsOnTransformationsDict.items():
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put({transID: bodyAndOps})

        self.log.info("Out of %d transformations, %d put in thread queue" %
                      (len(operationsOnTransformationsDict), count))

    #############################################################################

    def _getClients(self, ownerDN=None, ownerGroup=None):
        """Returns the clients used in the threads

    This is another function that should be extended.

    The clients provided here are defaults, and should be adapted

    If ownerDN and ownerGroup are not None the clients will delegate to these credentials

    :param str ownerDN: DN of the owner of the submitted jobs
    :param str ownerGroup: group of the owner of the submitted jobs
    :returns: dict of Clients
    """
        threadTransformationClient = TransformationClient()
        threadTaskManager = WorkflowTasks(ownerDN=ownerDN,
                                          ownerGroup=ownerGroup)
        threadTaskManager.pluginLocation = self.pluginLocation

        return {
            'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager
        }

    def _execute(self, threadID):
        """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
        # Each thread will have its own clients if we use credentials/shifterProxy
        clients = self._getClients() if self.shifterProxy else \
            self._getClients(ownerGroup=self.credTuple[1], ownerDN=self.credTuple[2]) if self.credentials \
            else None
        method = '_execute'
        operation = 'None'

        while True:
            startTime = time.time()
            transIDOPBody = self.transQueue.get()
            if not self.transInQueue:
                # Queue was cleared, nothing to do
                continue
            try:
                transID = list(transIDOPBody)[0]
                operations = transIDOPBody[transID]['Operations']
                if transID not in self.transInQueue:
                    self._logWarn("Got a transf not in transInQueue...?",
                                  method=method,
                                  transID=transID)
                    break
                if not (self.credentials or self.shifterProxy):
                    ownerDN, group = transIDOPBody[transID][
                        'OwnerDN'], transIDOPBody[transID]['OwnerGroup']
                    clients = self._getClients(ownerDN=ownerDN,
                                               ownerGroup=group)
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Start processing transformation",
                              method=method,
                              transID=transID)
                clients['TaskManager'].transInThread = self.transInThread
                for operation in operations:
                    self._logInfo("Executing %s" % operation,
                                  method=method,
                                  transID=transID)
                    startOperation = time.time()
                    res = getattr(self, operation)(transIDOPBody, clients)
                    if not res['OK']:
                        self._logError("Failed to %s: %s" %
                                       (operation, res['Message']),
                                       method=method,
                                       transID=transID)
                    self._logInfo("Executed %s in %.1f seconds" %
                                  (operation, time.time() - startOperation),
                                  method=method,
                                  transID=transID)
            except Exception as x:  # pylint: disable=broad-except
                self._logException('Exception executing operation %s' %
                                   operation,
                                   lException=x,
                                   method=method,
                                   transID=transID)
            finally:
                if not transID:
                    transID = 'None'
                self._logInfo("Processed transformation in %.1f seconds" %
                              (time.time() - startTime),
                              method=method,
                              transID=transID)
                self.transInThread.pop(transID, None)
                self._logVerbose("%d transformations still in queue" %
                                 (len(self.transInThread)),
                                 method=method,
                                 transID=transID)
                if transID in self.transInQueue:
                    self.transInQueue.remove(transID)
                self._logDebug("transInQueue = ",
                               self.transInQueue,
                               method=method,
                               transID=transID)

    #############################################################################
    # real operations done

    def updateTaskStatus(self, transIDOPBody, clients):
        """ Updates the task status
    """
        transID = list(transIDOPBody)[0]
        method = 'updateTaskStatus'

        # Get the tasks which are in an UPDATE state, i.e. job statuses + request-specific statuses
        updateStatus = self.am_getOption('TaskUpdateStatus', [
            JobStatus.CHECKING, JobStatus.DELETED, JobStatus.KILLED,
            JobStatus.STAGING, JobStatus.STALLED, JobStatus.MATCHED,
            JobStatus.RESCHEDULED, JobStatus.COMPLETING, JobStatus.COMPLETED,
            JobStatus.SUBMITTED, JobStatus.RECEIVED, JobStatus.WAITING,
            JobStatus.RUNNING, 'Scheduled', 'Assigned'
        ])
        condDict = {
            "TransformationID": transID,
            "ExternalStatus": updateStatus
        }
        timeStamp = str(datetime.datetime.utcnow() -
                        datetime.timedelta(minutes=10))

        # Get transformation tasks
        transformationTasks = clients[
            'TransformationClient'].getTransformationTasks(
                condDict=condDict, older=timeStamp, timeStamp='LastUpdateTime')
        if not transformationTasks['OK']:
            self._logError("Failed to get tasks to update:",
                           transformationTasks['Message'],
                           method=method,
                           transID=transID)
            return transformationTasks
        if not transformationTasks['Value']:
            self._logVerbose("No tasks found to update",
                             method=method,
                             transID=transID)
            return transformationTasks

        # Get status for the transformation tasks
        chunkSize = self.am_getOption('TaskUpdateChunkSize', 0)
        try:
            chunkSize = int(chunkSize)
        except ValueError:
            chunkSize = 0
        if chunkSize:
            self._logVerbose("Getting %d tasks status (chunks of %d)" %
                             (len(transformationTasks['Value']), chunkSize),
                             method=method,
                             transID=transID)
        else:
            self._logVerbose("Getting %d tasks status" %
                             len(transformationTasks['Value']),
                             method=method,
                             transID=transID)
        updated = {}
        for nb, taskChunk in enumerate(
                breakListIntoChunks(transformationTasks['Value'], chunkSize)
                if chunkSize else [transformationTasks['Value']]):
            submittedTaskStatus = clients[
                'TaskManager'].getSubmittedTaskStatus(taskChunk)
            if not submittedTaskStatus['OK']:
                self._logError("Failed to get updated task states:",
                               submittedTaskStatus['Message'],
                               method=method,
                               transID=transID)
                return submittedTaskStatus
            statusDict = submittedTaskStatus['Value']
            if not statusDict:
                self._logVerbose("%4d: No tasks to update" % nb,
                                 method=method,
                                 transID=transID)

            # Set status for tasks that changes
            for status, taskIDs in statusDict.items():
                self._logVerbose("%4d: Updating %d task(s) to %s" %
                                 (nb, len(taskIDs), status),
                                 method=method,
                                 transID=transID)
                setTaskStatus = clients['TransformationClient'].setTaskStatus(
                    transID, taskIDs, status)
                if not setTaskStatus['OK']:
                    self._logError(
                        "Failed to update task status for transformation:",
                        setTaskStatus['Message'],
                        method=method,
                        transID=transID)
                    return setTaskStatus
                updated[status] = updated.setdefault(status, 0) + len(taskIDs)

        for status, nb in updated.items():
            self._logInfo("Updated %d tasks to status %s" % (nb, status),
                          method=method,
                          transID=transID)
        return S_OK()

    def updateFileStatus(self, transIDOPBody, clients):
        """ Update the files status
    """
        transID = list(transIDOPBody)[0]
        method = 'updateFileStatus'

        timeStamp = str(datetime.datetime.utcnow() -
                        datetime.timedelta(minutes=10))

        # get transformation files
        condDict = {'TransformationID': transID, 'Status': ['Assigned']}
        transformationFiles = clients[
            'TransformationClient'].getTransformationFiles(
                condDict=condDict, older=timeStamp, timeStamp='LastUpdate')
        if not transformationFiles['OK']:
            self._logError("Failed to get transformation files to update:",
                           transformationFiles['Message'],
                           method=method,
                           transID=transID)
            return transformationFiles
        if not transformationFiles['Value']:
            self._logInfo("No files to be updated",
                          method=method,
                          transID=transID)
            return transformationFiles

        # Get the status of the transformation files
        # Sort the files by taskID
        taskFiles = {}
        for fileDict in transformationFiles['Value']:
            taskFiles.setdefault(fileDict['TaskID'], []).append(fileDict)

        chunkSize = 100
        self._logVerbose("Getting file status for %d tasks (chunks of %d)" %
                         (len(taskFiles), chunkSize),
                         method=method,
                         transID=transID)
        updated = {}
        # Process 100 tasks at a time
        for nb, taskIDs in enumerate(breakListIntoChunks(taskFiles,
                                                         chunkSize)):
            fileChunk = []
            for taskID in taskIDs:
                fileChunk += taskFiles[taskID]
            submittedFileStatus = clients[
                'TaskManager'].getSubmittedFileStatus(fileChunk)
            if not submittedFileStatus['OK']:
                self._logError(
                    "Failed to get updated file states for transformation:",
                    submittedFileStatus['Message'],
                    method=method,
                    transID=transID)
                return submittedFileStatus
            statusDict = submittedFileStatus['Value']
            if not statusDict:
                self._logVerbose("%4d: No file states to be updated" % nb,
                                 method=method,
                                 transID=transID)
                continue

            # Set the status of files
            fileReport = FileReport(
                server=clients['TransformationClient'].getServer())
            for lfn, status in statusDict.items():
                updated[status] = updated.setdefault(status, 0) + 1
                setFileStatus = fileReport.setFileStatus(transID, lfn, status)
                if not setFileStatus['OK']:
                    return setFileStatus
            commit = fileReport.commit()
            if not commit['OK']:
                self._logError(
                    "Failed to update file states for transformation:",
                    commit['Message'],
                    method=method,
                    transID=transID)
                return commit
            else:
                self._logVerbose("%4d: Updated the states of %d files" %
                                 (nb, len(commit['Value'])),
                                 method=method,
                                 transID=transID)

        for status, nb in updated.items():
            self._logInfo("Updated %d files to status %s" % (nb, status),
                          method=method,
                          transID=transID)
        return S_OK()

    def checkReservedTasks(self, transIDOPBody, clients):
        """ Checking Reserved tasks
    """
        transID = list(transIDOPBody)[0]
        method = 'checkReservedTasks'

        # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
        condDict = {"TransformationID": transID, "ExternalStatus": 'Reserved'}
        time_stamp_older = str(datetime.datetime.utcnow() -
                               datetime.timedelta(hours=1))

        res = clients['TransformationClient'].getTransformationTasks(
            condDict=condDict, older=time_stamp_older)
        self._logDebug("getTransformationTasks(%s) return value:" % condDict,
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to get Reserved tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        if not res['Value']:
            self._logVerbose("No Reserved tasks found", transID=transID)
            return res
        reservedTasks = res['Value']

        # Update the reserved tasks
        res = clients['TaskManager'].updateTransformationReservedTasks(
            reservedTasks)
        self._logDebug("updateTransformationReservedTasks(%s) return value:" %
                       reservedTasks,
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to update transformation reserved tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        noTasks = res['Value']['NoTasks']
        taskNameIDs = res['Value']['TaskNameIDs']

        # For the tasks with no associated request found re-set the status of the task in the transformationDB
        if noTasks:
            self._logInfo(
                "Resetting status of %d tasks to Created as no associated job/request found"
                % len(noTasks),
                method=method,
                transID=transID)
            for taskName in noTasks:
                transID, taskID = self._parseTaskName(taskName)
                res = clients['TransformationClient'].setTaskStatus(
                    transID, taskID, 'Created')
                if not res['OK']:
                    self._logError(
                        "Failed to update task status and ID after recovery:",
                        '%s %s' % (taskName, res['Message']),
                        method=method,
                        transID=transID)
                    return res

        # For the tasks for which an associated request was found update the task details in the transformationDB
        for taskName, extTaskID in taskNameIDs.items():
            transID, taskID = self._parseTaskName(taskName)
            self._logInfo("Setting status of %s to Submitted with ID %s" %
                          (taskName, extTaskID),
                          method=method,
                          transID=transID)
            setTaskStatusAndWmsID = clients[
                'TransformationClient'].setTaskStatusAndWmsID(
                    transID, taskID, 'Submitted', str(extTaskID))
            if not setTaskStatusAndWmsID['OK']:
                self._logError(
                    "Failed to update task status and ID after recovery:",
                    "%s %s" % (taskName, setTaskStatusAndWmsID['Message']),
                    method=method,
                    transID=transID)
                return setTaskStatusAndWmsID

        return S_OK()

    def submitTasks(self, transIDOPBody, clients):
        """ Submit the tasks to an external system, using the taskManager provided

    :param dict transIDOPBody: transformation body
    :param dict clients: dictionary of client objects

    :return: S_OK/S_ERROR
    """
        transID = list(transIDOPBody)[0]
        transBody = transIDOPBody[transID]['Body']
        owner = transIDOPBody[transID]['Owner']
        ownerGroup = transIDOPBody[transID]['OwnerGroup']
        ownerDN = transIDOPBody[transID]['OwnerDN']
        method = 'submitTasks'

        # Get all tasks to submit
        tasksToSubmit = clients['TransformationClient'].getTasksToSubmit(
            transID, self.tasksPerLoop)
        self._logDebug("getTasksToSubmit(%s, %s) return value:" %
                       (transID, self.tasksPerLoop),
                       tasksToSubmit,
                       method=method,
                       transID=transID)
        if not tasksToSubmit['OK']:
            self._logError("Failed to obtain tasks:",
                           tasksToSubmit['Message'],
                           method=method,
                           transID=transID)
            return tasksToSubmit
        tasks = tasksToSubmit['Value']['JobDictionary']
        if not tasks:
            self._logVerbose("No tasks found for submission",
                             method=method,
                             transID=transID)
            return tasksToSubmit
        self._logInfo("Obtained %d tasks for submission" % len(tasks),
                      method=method,
                      transID=transID)

        # Prepare tasks and submits them, by chunks
        chunkSize = self.maxParametricJobs if self.bulkSubmissionFlag else self.tasksPerLoop
        for taskDictChunk in breakDictionaryIntoChunks(tasks, chunkSize):
            res = self._prepareAndSubmitAndUpdateTasks(transID, transBody,
                                                       taskDictChunk, owner,
                                                       ownerDN, ownerGroup,
                                                       clients)
            if not res['OK']:
                return res
            self._logVerbose("Submitted %d jobs, bulkSubmissionFlag = %s" %
                             (len(taskDictChunk), self.bulkSubmissionFlag))

        return S_OK()

    def _prepareAndSubmitAndUpdateTasks(self, transID, transBody, tasks, owner,
                                        ownerDN, ownerGroup, clients):
        """ prepare + submit + monitor a dictionary of tasks

    :param int transID: transformation ID
    :param str transBody: transformation job template
    :param dict tasks: dictionary of per task parameters
    :param str owner: owner of the transformation
    :param str ownerDN: DN of the owner of the transformation
    :param str ownerGroup: group of the owner of the transformation
    :param dict clients: dictionary of client objects

    :return: S_OK/S_ERROR
    """

        method = '_prepareAndSubmitAndUpdateTasks'
        # prepare tasks
        preparedTransformationTasks = clients[
            'TaskManager'].prepareTransformationTasks(transBody, tasks, owner,
                                                      ownerGroup, ownerDN,
                                                      self.bulkSubmissionFlag)
        self._logDebug("prepareTransformationTasks return value:",
                       preparedTransformationTasks,
                       method=method,
                       transID=transID)
        if not preparedTransformationTasks['OK']:
            self._logError("Failed to prepare tasks",
                           preparedTransformationTasks['Message'],
                           method=method,
                           transID=transID)
            return preparedTransformationTasks

        # Submit tasks
        res = clients['TaskManager'].submitTransformationTasks(
            preparedTransformationTasks['Value'])
        self._logDebug("submitTransformationTasks return value:",
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to submit prepared tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res

        # Update tasks after submission
        res = clients['TaskManager'].updateDBAfterTaskSubmission(res['Value'])
        self._logDebug("updateDBAfterTaskSubmission return value:",
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to update DB after task submission:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res

        return S_OK()

    @staticmethod
    def _addOperationForTransformations(operationsOnTransformationDict,
                                        operation,
                                        transformations,
                                        owner=None,
                                        ownerGroup=None,
                                        ownerDN=None):
        """Fill the operationsOnTransformationDict"""
        transformationIDsAndBodies = [
            (transformation['TransformationID'], transformation['Body'],
             transformation['AuthorDN'], transformation['AuthorGroup'])
            for transformation in transformations['Value']
        ]
        for transID, body, t_ownerDN, t_ownerGroup in transformationIDsAndBodies:
            if transID in operationsOnTransformationDict:
                operationsOnTransformationDict[transID]['Operations'].append(
                    operation)
            else:
                operationsOnTransformationDict[transID] = {
                    'Body':
                    body,
                    'Operations': [operation],
                    'Owner':
                    owner if owner else getUsernameForDN(t_ownerDN)['Value'],
                    'OwnerGroup':
                    ownerGroup if owner else t_ownerGroup,
                    'OwnerDN':
                    ownerDN if owner else t_ownerDN
                }

    def __getCredentials(self):
        """Get the credentials to use if ShifterCredentials are set, otherwise do nothing.

    This function fills the self.credTuple tuple.
    """
        if not self.credentials:
            return S_OK()
        resCred = Operations().getOptionsDict("/Shifter/%s" % self.credentials)
        if not resCred['OK']:
            self.log.error("Cred: Failed to find shifter credentials",
                           self.credentials)
            return resCred
        owner = resCred['Value']['User']
        ownerGroup = resCred['Value']['Group']
        # returns  a list
        ownerDN = getDNForUsername(owner)['Value'][0]
        self.credTuple = (owner, ownerGroup, ownerDN)
        self.log.info(
            "Cred: Tasks will be submitted with the credentials %s:%s" %
            (owner, ownerGroup))
        return S_OK()
Esempio n. 35
0
class DataRecoveryAgent(AgentModule):
    """Data Recovery Agent"""
    def __init__(self, *args, **kwargs):
        AgentModule.__init__(self, *args, **kwargs)
        self.name = 'DataRecoveryAgent'
        self.enabled = False

        self.productionsToIgnore = self.am_getOption("ProductionsToIgnore", [])
        self.transformationTypes = self.am_getOption("TransformationTypes", [
            'MCReconstruction', 'MCSimulation', 'MCReconstruction_Overlay',
            'MCGeneration'
        ])
        self.transformationStatus = self.am_getOption("TransformationStatus",
                                                      ['Active', 'Completing'])
        self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager')

        self.jobStatus = [
            'Failed', 'Done'
        ]  ##This needs to be both otherwise we cannot account for all cases

        self.jobMon = JobMonitoringClient()
        self.fcClient = FileCatalogClient()
        self.tClient = TransformationClient()
        self.reqClient = ReqClient()
        self.diracILC = DiracILC()
        self.inputFilesProcessed = set()
        self.todo = {'MCGeneration':
                     [ dict( Message="MCGeneration: OutputExists: Job 'Done'",
                             ShortMessage="MCGeneration: job 'Done' ",
                             Counter=0,
                             Check=lambda job: job.allFilesExist() and job.status=='Failed',
                             Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                           ),
                       dict( Message="MCGeneration: OutputMissing: Job 'Failed'",
                             ShortMessage="MCGeneration: job 'Failed' ",
                             Counter=0,
                             Check=lambda job: job.allFilesMissing() and job.status=='Done',
                             Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                           ),
                       # dict( Message="MCGeneration, job 'Done': OutputExists: Task 'Done'",
                       #       ShortMessage="MCGeneration: job already 'Done' ",
                       #       Counter=0,
                       #       Check=lambda job: job.allFilesExist() and job.status=='Done',
                       #       Actions=lambda job,tInfo: [ tInfo._TransformationInfo__setTaskStatus(job, 'Done') ]
                       #     ),
                     ],
                     'OtherProductions':
                     [ \
                   ## should always be first!

                       dict( Message="One of many Successful: clean others",
                             ShortMessage="Other Tasks --> Keep",
                             Counter=0,
                             Check=lambda job: job.allFilesExist() and job.otherTasks and job.inputFile not in self.inputFilesProcessed,
                             Actions=lambda job,tInfo: [ self.inputFilesProcessed.add(job.inputFile), job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                           ),
                       dict( Message="Other Task processed Input, no Output: Fail",
                             ShortMessage="Other Tasks --> Fail",
                             Counter=0,
                             Check=lambda job: job.inputFile in self.inputFilesProcessed and job.allFilesMissing() and job.status!='Failed',
                             Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                           ),
                       dict( Message="Other Task processed Input: Fail and clean",
                             ShortMessage="Other Tasks --> Cleanup",
                             Counter=0,
                             Check=lambda job: job.inputFile in self.inputFilesProcessed and not job.allFilesMissing(),
                             Actions=lambda job,tInfo: [ job.setJobFailed(tInfo), job.cleanOutputs(tInfo) ]
                           ),
                       dict( Message="InputFile missing: mark job 'Failed', mark input 'Deleted', clean",
                             ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                             Counter=0,
                             Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus != "Deleted",
                             Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo), job.setInputDeleted(tInfo) ]
                           ),
                       dict( Message="InputFile Deleted, output Exists: mark job 'Failed', clean",
                             ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                             Counter=0,
                             Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus == "Deleted" and not job.allFilesMissing(),
                             Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo) ]
                           ),
                       ## All Output Exists
                       dict( Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                             ShortMessage="Output Exists --> Job Done, Input Processed",
                             Counter=0,
                             Check=lambda job: job.allFilesExist() and \
                                               not job.otherTasks and \
                                               job.status=='Failed' and \
                                               job.fileStatus!="Processed" and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [ job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                           ),
                       dict( Message="Output Exists, job Failed, input Processed --> Job Done",
                             ShortMessage="Output Exists --> Job Done",
                             Counter=0,
                             Check=lambda job: job.allFilesExist() and \
                                               not job.otherTasks and \
                                               job.status=='Failed' and \
                                               job.fileStatus=="Processed" and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                           ),
                       dict( Message="Output Exists, job Done, input not Processed --> Input Processed",
                             ShortMessage="Output Exists --> Input Processed",
                             Counter=0,
                             Check=lambda job: job.allFilesExist() and \
                                               not job.otherTasks and \
                                               job.status=='Done' and \
                                               job.fileStatus!="Processed" and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [ job.setInputProcessed(tInfo) ]
                           ),
                       ## outputmissing
                       dict( Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                             ShortMessage="Max ErrorCount --> Input MaxReset",
                             Counter=0,
                             Check=lambda job: job.allFilesMissing() and \
                                               not job.otherTasks and \
                                               job.status=='Failed' and \
                                               job.fileStatus in ASSIGNEDSTATES and \
                                               job.inputFile not in self.inputFilesProcessed and \
                                               job.inputFileExists and \
                                               job.errorCount > MAXRESET,
                             Actions=lambda job,tInfo: [ job.setInputMaxReset(tInfo) ]
                           ),
                       dict( Message="Output Missing, job Failed, input Assigned --> Input Unused",
                             ShortMessage="Output Missing --> Input Unused",
                             Counter=0,
                             Check=lambda job: job.allFilesMissing() and \
                                               not job.otherTasks and \
                                               job.status=='Failed' and \
                                               job.fileStatus in ASSIGNEDSTATES and \
                                               job.inputFile not in self.inputFilesProcessed and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [ job.setInputUnused(tInfo) ]
                           ),
                       dict( Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                             ShortMessage="Output Missing --> Job Failed, Input Unused",
                             Counter=0,
                             Check=lambda job: job.allFilesMissing() and \
                                               not job.otherTasks and \
                                               job.status=='Done' and \
                                               job.fileStatus in ASSIGNEDSTATES and \
                                               job.inputFile not in self.inputFilesProcessed and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [ job.setInputUnused(tInfo), job.setJobFailed(tInfo) ]
                           ),
                       ## some files missing, needing cleanup. Only checking for
                       ## assigned, because processed could mean an earlier job was
                       ## succesful and this one is just the duplicate that needed
                       ## to be removed! But we check for other tasks earlier, so
                       ## this should not happen
                       dict( Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                             ShortMessage="Output Missing --> Cleanup, Input Unused",
                             Counter=0,
                             Check=lambda job: job.someFilesMissing() and \
                                               not job.otherTasks and \
                                               job.status=='Failed' and \
                                               job.fileStatus in ASSIGNEDSTATES and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo)]
                             #Actions=lambda job,tInfo: []
                           ),
                       dict( Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                             ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                             Counter=0,
                             Check=lambda job: job.someFilesMissing() and \
                                               not job.otherTasks and \
                                               job.status=='Done' and \
                                               job.fileStatus in ASSIGNEDSTATES and \
                                               job.inputFileExists,
                             Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo),job.setJobFailed(tInfo)]
                             #Actions=lambda job,tInfo: []
                           ),
                       dict( Message="Some missing, job Done --> job Failed",
                             ShortMessage="Output Missing, Done --> Job Failed",
                             Counter=0,
                             Check=lambda job: not job.allFilesExist() and job.status=='Done',
                             Actions=lambda job,tInfo: [job.setJobFailed(tInfo)]
                           ),
                       dict ( Message="Something Strange",
                              ShortMessage="Strange",
                              Counter=0,
                              Check=lambda job: job.status not in ("Failed","Done"),
                              Actions=lambda job,tInfo: []
                            ),
                       ##should always be the last one!
                       dict ( Message="Failed Hard",
                              ShortMessage="Failed Hard",
                              Counter=0,
                              Check=lambda job: False, ## never
                              Actions=lambda job,tInfo: []
                            ),
                     ]
                    }
        self.jobCache = defaultdict(lambda: (0, 0))
        self.printEveryNJobs = self.am_getOption('PrintEvery', 200)
        ##Notification
        self.notesToSend = ""
        self.addressTo = self.am_getOption('MailTo',
                                           ["*****@*****.**"])
        self.addressFrom = self.am_getOption('MailFrom',
                                             "*****@*****.**")
        self.subject = "DataRecoveryAgent"

        #############################################################################
    def beginExecution(self):
        """Resets defaults after one cycle
    """
        self.enabled = self.am_getOption('EnableFlag', False)
        self.productionsToIgnore = self.am_getOption("ProductionsToIgnore", [])
        self.transformationTypes = self.am_getOption("TransformationTypes", [
            'MCReconstruction', 'MCSimulation', 'MCReconstruction_Overlay',
            'MCGeneration'
        ])
        self.transformationStatus = self.am_getOption("TransformationStatus",
                                                      ['Active', 'Completing'])
        self.addressTo = self.am_getOption('MailTo',
                                           ["*****@*****.**"])
        self.addressFrom = self.am_getOption('MailFrom',
                                             "*****@*****.**")
        self.printEveryNJobs = self.am_getOption('PrintEvery', 200)

        return S_OK()

    #############################################################################
    def execute(self):
        """ The main execution method.
    """
        self.log.notice("Will ignore the following productions: %s" %
                        self.productionsToIgnore)
        self.log.notice(" Job Cache: %s " % self.jobCache)
        transformations = self.getEligibleTransformations(
            self.transformationStatus, self.transformationTypes)
        if not transformations['OK']:
            self.log.error("Failure to get transformations",
                           transformations['Message'])
            return S_ERROR("Failure to get transformations")
        for prodID, values in transformations['Value'].iteritems():
            if prodID in self.productionsToIgnore:
                self.log.notice("Ignoring Production: %s " % prodID)
                continue
            self.__resetCounters()
            self.inputFilesProcessed = set()
            transType, transName = values
            self.log.notice("Running over Production: %s " % prodID)
            self.treatProduction(int(prodID), transName, transType)

            if self.notesToSend and self.__notOnlyKeepers(transType):
                ##remove from the jobCache because something happened
                self.jobCache.pop(int(prodID), None)
                notification = NotificationClient()
                for address in self.addressTo:
                    result = notification.sendMail(address,
                                                   "%s: %s" %
                                                   (self.subject, prodID),
                                                   self.notesToSend,
                                                   self.addressFrom,
                                                   localAttempt=False)
                    if not result['OK']:
                        self.log.error('Cannot send notification mail',
                                       result['Message'])
            self.notesToSend = ""

        return S_OK()

    def getEligibleTransformations(self, status, typeList):
        """ Select transformations of given status and type.
    """
        res = self.tClient.getTransformations(condDict={
            'Status': status,
            'Type': typeList
        })
        if not res['OK']:
            return res
        transformations = {}
        for prod in res['Value']:
            prodID = prod['TransformationID']
            prodName = prod['TransformationName']
            transformations[str(prodID)] = (prod['Type'], prodName)
        return S_OK(transformations)

    def treatProduction(self, prodID, transName, transType):
        """run this thing for given production"""

        tInfo = TransformationInfo(prodID, transName, transType, self.enabled,
                                   self.tClient, self.fcClient, self.jobMon)
        jobs, nDone, nFailed = tInfo.getJobs(statusList=self.jobStatus)

        if self.jobCache[prodID][0] == nDone and self.jobCache[prodID][
                1] == nFailed:
            self.log.notice("Skipping production %s because nothing changed" %
                            prodID)
            return

        self.jobCache[prodID] = (nDone, nFailed)

        tasksDict = None
        lfnTaskDict = None

        if not transType.startswith("MCGeneration"):
            self.log.notice("Getting tasks...")
            tasksDict = tInfo.checkTasksStatus()
            lfnTaskDict = dict([(tasksDict[taskID]['LFN'], taskID)
                                for taskID in tasksDict])

        self.checkAllJobs(jobs, tInfo, tasksDict, lfnTaskDict)
        self.printSummary()

    def checkJob(self, job, tInfo):
        """ deal with the job """
        checks = self.todo['MCGeneration'] if job.tType.startswith(
            'MCGeneration') else self.todo['OtherProductions']
        for do in checks:
            if do['Check'](job):
                do['Counter'] += 1
                self.log.notice(do['Message'])
                self.log.notice(job)
                self.notesToSend += do['Message'] + '\n'
                self.notesToSend += str(job) + '\n'
                do['Actions'](job, tInfo)
                return

    def checkAllJobs(self, jobs, tInfo, tasksDict=None, lfnTaskDict=None):
        """run over all jobs and do checks"""
        fileJobDict = defaultdict(list)
        counter = 0
        startTime = time.time()
        nJobs = len(jobs)
        self.log.notice("Running over all the jobs")
        for job in jobs.values():
            counter += 1
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    "%d/%d: %3.1fs " %
                    (counter, nJobs, float(time.time() - startTime)))
            while True:
                try:
                    job.checkRequests(self.reqClient)
                    if job.pendingRequest:
                        self.log.warn("Job has Pending requests:\n%s" % job)
                        break
                    job.getJobInformation(self.diracILC)
                    job.checkFileExistance(self.fcClient)
                    if tasksDict and lfnTaskDict:
                        try:
                            job.getTaskInfo(tasksDict, lfnTaskDict)
                        except TaskInfoException as e:
                            self.log.error(
                                " Skip Task, due to TaskInfoException: %s" % e)
                            if job.inputFile is None and not job.tType.startswith(
                                    "MCGeneration"):
                                self.__failJobHard(job, tInfo)
                            break
                        fileJobDict[job.inputFile].append(job.jobID)
                    self.checkJob(job, tInfo)
                    break  # get out of the while loop
                except RuntimeError as e:
                    self.log.error("+++++ Failure for job: %d " % job.jobID)
                    self.log.error("+++++ Exception: ", str(e))
                    ## runs these again because of RuntimeError

    def printSummary(self):
        """print summary of changes"""
        self.log.notice("Summary:")
        for do in itertools.chain.from_iterable(self.todo.values()):
            message = "%s: %s" % (do['ShortMessage'].ljust(56),
                                  str(do['Counter']).rjust(5))
            self.log.notice(message)
            if self.notesToSend:
                self.notesToSend = str(message) + '\n' + self.notesToSend

    def __resetCounters(self):
        """ reset counters for modified jobs """
        for _name, checks in self.todo.iteritems():
            for do in checks:
                do['Counter'] = 0

    def __failJobHard(self, job, tInfo):
        """ set job to failed and remove output files if there are any """
        if job.inputFile is not None:
            return
        if job.status in ("Failed",) \
           and job.allFilesMissing():
            return
        self.log.notice("Failing job hard %s" % job)
        self.notesToSend += "Failing job %s: no input file?\n" % job.jobID
        self.notesToSend += str(job) + '\n'
        self.todo['OtherProductions'][-1]['Counter'] += 1
        job.cleanOutputs(tInfo)
        job.setJobFailed(tInfo)
        # if job.inputFile is not None:
        #   job.setInputDeleted(tInfo)

    def __notOnlyKeepers(self, transType):
        """check of we only have 'Keep' messages

    in this case we do not have to send report email or run again next time

    """
        if transType.startswith('MCGeneration'):
            return True

        checks = self.todo['OtherProductions']
        totalCount = 0
        for check in checks[1:]:
            totalCount += check['Counter']

        return totalCount > 0
Esempio n. 36
0
class MCExtensionAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''
        AgentModule.__init__(self, *args, **kwargs)

        self.transClient = TransformationClient()
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(Operations().getValue(
                'Transformations/ExtendableTransfTypes',
                ['MCSimulation', 'Simulation']))
        self.maxIterationTasks = self.am_getOption('TasksPerIteration', 50)
        self.maxFailRate = self.am_getOption('MaxFailureRate', 30)
        self.maxWaitingJobs = self.am_getOption('MaxWaitingJobs', 1000)

    #############################################################################
    def initialize(self):
        '''Sets defaults
    '''

        gLogger.info("Will consider the following transformation types: %s" %
                     str(self.transformationTypes))
        gLogger.info("Will create a maximum of %s tasks per iteration" %
                     self.maxIterationTasks)
        gLogger.info(
            "Will not submit tasks for transformations with failure rate greater than %s%%"
            % (self.maxFailRate))
        gLogger.info(
            "Will not submit tasks for transformations with more than %d waiting jobs"
            % self.maxWaitingJobs)

        return S_OK()

    #############################################################################
    def execute(self):
        ''' The MCExtensionAgent execution method.
    '''

        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                'MCExtensionAgent is disabled by configuration option EnableFlag'
            )
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Active',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                transID = transDict['TransformationID']
                maxTasks = transDict['MaxNumberOfTasks']
                self.extendTransformation(transID, maxTasks)
        return S_OK()

    def extendTransformation(self, transID, maxTasks):
        gLogger.info("Considering extension of transformation %d" % transID)
        # Get the current count of tasks submitted for this transformation
        res = self.transClient.getTransformationTaskStats(transID)
        if not res['OK']:
            if res['Message'] != 'No records found':
                gLogger.error("Failed to get task statistics",
                              "%s %s" % (transID, res['Message']))
                return res
            else:
                statusDict = {}
        else:
            statusDict = res['Value']
        gLogger.verbose("Current task count for transformation %d" % transID)
        for status in sorted(statusDict):
            statusCount = statusDict[status]
            gLogger.verbose("%s : %s" %
                            (status.ljust(20), str(statusCount).rjust(8)))
        # Determine the number of tasks to be created
        numberOfTasks = self._calculateTaskNumber(maxTasks, statusDict)
        if not numberOfTasks:
            gLogger.info("No tasks required for transformation %d" % transID)
            return S_OK()
        # Extend the transformation by the determined number of tasks
        res = self.transClient.extendTransformation(transID, numberOfTasks)
        if not res['OK']:
            gLogger.error("Failed to extend transformation",
                          "%s %s" % (transID, res['Message']))
            return res
        gLogger.info("Successfully extended transformation %d by %d tasks" %
                     (transID, numberOfTasks))
        return S_OK()

    def _calculateTaskNumber(self, maxTasks, statusDict):
        ''' Utility function
    '''
        done = statusDict.get('Done', 0)
        failed = statusDict.get('Failed', 0)
        waiting = statusDict.get('Waiting', 0)
        total = statusDict.get('TotalCreated', 0)
        # If the failure rate is higher than acceptable
        if (total != 0) and (
            (100.0 * float(failed) / float(total)) > self.maxFailRate):
            return 0
        # If we already have enough completed jobs
        if done >= maxTasks:
            return 0
        if waiting > self.maxWaitingJobs:
            return 0
        numberOfTasks = maxTasks - (total - failed)
        if numberOfTasks < 0:
            # this happens when people extend the transformation manually instead of increasing MaxNumberOfTasks
            return 0
        if numberOfTasks > self.maxIterationTasks:
            numberOfTasks = self.maxIterationTasks
        return numberOfTasks
class TaskManagerAgentBase( AgentModule, TransformationAgentsUtilities ):
  """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """

  def __init__( self, *args, **kwargs ):
    """ c'tor

        Always call this in the extension agent
    """
    AgentModule.__init__( self, *args, **kwargs )
    TransformationAgentsUtilities.__init__( self )

    self.transClient = None
    self.transType = []

    self.tasksPerLoop = 50

    self.owner = ''
    self.ownerGroup = ''
    self.ownerDN = ''

    self.pluginLocation = ''

    # for the threading
    self.transQueue = Queue()
    self.transInQueue = []
    self.transInThread = {}

  #############################################################################

  def initialize( self ):
    """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

    gMonitor.registerActivity( "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks",
                               gMonitor.OP_ACUM )

    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Client.TaskManagerPlugin' )

    # Default clients
    self.transClient = TransformationClient()

    # setting up the threading
    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', 15 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.verbose( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    return S_OK()

  def finalize( self ):
    """ graceful finalization
    """
    if self.transInQueue:
      self._logInfo( "Wait for threads to get empty before terminating the agent (%d tasks)" % len( self.transInThread ) )
      self.transInQueue = []
      while self.transInThread:
        time.sleep( 2 )
      self.log.info( "Threads are empty, terminating the agent..." )
    return S_OK()

  #############################################################################

  def execute( self ):
    """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

    operationsOnTransformationDict = {}

    # Determine whether the task status is to be monitored and updated
    enableTaskMonitor = self.am_getOption( 'MonitorTasks', '' )
    if not enableTaskMonitor:
      self.log.verbose( "Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option" )
    else:
      # Get the transformations for which the tasks have to be updated
      status = self.am_getOption( 'UpdateTasksStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( [( transformation['TransformationID'],
                                              transformation['Body'] ) for transformation in transformations['Value']] )
        for transID, body in transformationIDsAndBodies.iteritems():
          operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['updateTaskStatus']}

    # Determine whether the task files status is to be monitored and updated
    enableFileMonitor = self.am_getOption( 'MonitorFiles', '' )
    if not enableFileMonitor:
      self.log.verbose( "Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option" )
    else:
      # Get the transformations for which the files have to be updated
      status = self.am_getOption( 'UpdateFilesStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( [( transformation['TransformationID'],
                                              transformation['Body'] ) for transformation in transformations['Value']] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'updateFileStatus' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['updateFileStatus']}

    # Determine whether the checking of reserved tasks is to be performed
    enableCheckReserved = self.am_getOption( 'CheckReserved', '' )
    if not enableCheckReserved:
      self.log.verbose( "Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option" )
    else:
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption( 'CheckReservedStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( [( transformation['TransformationID'],
                                              transformation['Body'] ) for transformation in transformations['Value']] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'checkReservedTasks' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['checkReservedTasks']}

    # Determine whether the submission of tasks is to be performed
    enableSubmission = self.am_getOption( 'SubmitTasks', '' )
    if not enableSubmission:
      self.log.verbose( "Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option" )
    else:
      # getting the credentials for submission
      res = getProxyInfo( False, False )
      if not res['OK']:
        self.log.error( "Failed to determine credentials for submission", res['Message'] )
        return res
      proxyInfo = res['Value']
      self.owner = proxyInfo['username']
      self.ownerGroup = proxyInfo['group']
      self.ownerDN = proxyInfo['identity']
      self.log.info( "Tasks will be submitted with the credentials %s:%s" % ( self.owner, self.ownerGroup ) )
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption( 'SubmitStatus', ['Active', 'Completing'] )
      transformations = self._selectTransformations( transType = self.transType, status = status )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        # Get the transformations which should be submitted
        self.tasksPerLoop = self.am_getOption( 'TasksPerLoop', self.tasksPerLoop )
        transformationIDsAndBodies = dict( [( transformation['TransformationID'],
                                              transformation['Body'] ) for transformation in transformations['Value']] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'submitTasks' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['submitTasks']}

    self._fillTheQueue( operationsOnTransformationDict )

    return S_OK()

  def _selectTransformations( self, transType = [], status = ['Active', 'Completing'], agentType = ['Automatic'] ):
    """ get the transformations
    """
    selectCond = {}
    if status:
      selectCond['Status'] = status
    if transType:
      selectCond['Type'] = transType
    if agentType:
      selectCond['AgentType'] = agentType
    res = self.transClient.getTransformations( condDict = selectCond )
    if not res['OK']:
      self.log.error( "Failed to get transformations: %s" % res['Message'] )
    elif not res['Value']:
      self.log.verbose( "No transformations found" )
    else:
      self.log.verbose( "Obtained %d transformations" % len( res['Value'] ) )
    return res

  def _fillTheQueue( self, operationsOnTransformationsDict ):
    """ Just fill the queue with the operation to be done on a certain transformation
    """
    count = 0
    for transID, bodyAndOps in operationsOnTransformationsDict.iteritems():
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append( transID )
        self.transQueue.put( {transID: bodyAndOps} )

    self.log.info( "Out of %d transformations, %d put in thread queue" % ( len( operationsOnTransformationsDict ),
                                                                           count ) )

  #############################################################################

  def _getClients( self ):
    """ returns the clients used in the threads - this is another function that should be extended.

        The clients provided here are defaults, and should be adapted
    """
    threadTransformationClient = TransformationClient()
    threadTaskManager = WorkflowTasks()  # this is for wms tasks, replace it with something else if needed
    threadTaskManager.pluginLocation = self.pluginLocation

    return {'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager}

  def _execute( self, threadID ):
    """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
    # Each thread will have its own clients
    clients = self._getClients()
    startTime = 0
    method = '_execute'

    while True:
      transIDOPBody = self.transQueue.get()
      try:
        transID = transIDOPBody.keys()[0]
        operations = transIDOPBody[transID]['Operations']
        if transID not in self.transInQueue:
          self._logWarn( "Got a transf not in transInQueue...?", method = method, transID = transID )
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % ( threadID, str( transID ) )
        clients['TaskManager'].transInThread = self.transInThread
        for operation in operations:
          self._logInfo( "Starting processing operation %s" % operation, method = method, transID = transID )
          startTime = time.time()
          res = getattr( self, operation )( transIDOPBody, clients )
          if not res['OK']:
            self._logError( "Failed to %s: %s" % ( operation, res['Message'] ), method = method, transID = transID )
          self._logInfo( "Processed operation %s in %.1f seconds" % ( operation, time.time() - startTime if startTime else time.time() ),
                         method = method, transID = transID )
      except Exception, x:
        self._logException( 'Exception executing operation %s' % operation, lException = x, transID = transID, method = method )
      finally:
Esempio n. 38
0
def _getProductionSummary():
    clip = _Params()
    clip.registerSwitch()
    Script.parseCommandLine()
    from ILCDIRAC.Core.Utilities.HTML import Table
    from ILCDIRAC.Core.Utilities.ProcessList import ProcessList
    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
    from DIRAC import gConfig, gLogger
    prod = clip.prod
    full_detail = clip.full_det
    fc = FileCatalogClient()

    processlist = gConfig.getValue('/LocalSite/ProcessListPath')
    prl = ProcessList(processlist)
    processesdict = prl.getProcessesDict()

    trc = TransformationClient()
    prodids = []
    if not prod:
        conddict = {}
        conddict['Status'] = clip.statuses
        if clip.ptypes:
            conddict['Type'] = clip.ptypes
        res = trc.getTransformations(conddict)
        if res['OK']:
            for transfs in res['Value']:
                prodids.append(transfs['TransformationID'])
    else:
        prodids.extend(prod)

    metadata = []

    gLogger.info("Will run on prods %s" % str(prodids))

    for prodID in prodids:
        if prodID < clip.minprod:
            continue
        meta = {}
        meta['ProdID'] = prodID
        res = trc.getTransformation(str(prodID))
        if not res['OK']:
            gLogger.error("Error getting transformation %s" % prodID)
            continue
        prodtype = res['Value']['Type']
        proddetail = res['Value']['Description']
        if prodtype == 'MCReconstruction' or prodtype == 'MCReconstruction_Overlay':
            meta['Datatype'] = 'DST'
        elif prodtype == 'MCGeneration':
            meta['Datatype'] = 'gen'
        elif prodtype == 'MCSimulation':
            meta['Datatype'] = 'SIM'
        elif prodtype in ['Split', 'Merge']:
            gLogger.warn("Invalid query for %s productions" % prodtype)
            continue
        else:
            gLogger.error("Unknown production type %s" % prodtype)
            continue
        res = fc.findFilesByMetadata(meta)
        if not res['OK']:
            gLogger.error(res['Message'])
            continue
        lfns = res['Value']
        nb_files = len(lfns)
        path = ""
        if not len(lfns):
            gLogger.warn("No files found for prod %s" % prodID)
            continue
        path = os.path.dirname(lfns[0])
        res = fc.getDirectoryUserMetadata(path)
        if not res['OK']:
            gLogger.warn('No meta data found for %s' % path)
            continue
        dirmeta = {}
        dirmeta['proddetail'] = proddetail
        dirmeta['prodtype'] = prodtype
        dirmeta['nb_files'] = nb_files
        dirmeta.update(res['Value'])
        lumi = 0.
        nbevts = 0
        addinfo = None
        files = 0
        xsec = 0.0
        if not full_detail:
            lfn = lfns[0]
            info = _getFileInfo(lfn)
            nbevts = info[1] * len(lfns)
            lumi = info[0] * len(lfns)
            addinfo = info[2]
            if 'xsection' in addinfo:
                if 'sum' in addinfo['xsection']:
                    if 'xsection' in addinfo['xsection']['sum']:
                        xsec += addinfo['xsection']['sum']['xsection']
                        files += 1
        else:
            for lfn in lfns:
                info = _getFileInfo(lfn)
                lumi += info[0]
                nbevts += info[1]
                addinfo = info[2]
                if 'xsection' in addinfo:
                    if 'sum' in addinfo['xsection']:
                        if 'xsection' in addinfo['xsection']['sum']:
                            xsec += addinfo['xsection']['sum']['xsection']
                            files += 1
        if not lumi:
            xsec = 0
            files = 0
            depthDict = {}
            depSet = set()
            res = fc.getFileAncestors(lfns, [1, 2, 3, 4])
            temp_ancestorlist = []
            if res['OK']:
                for lfn, ancestorsDict in res['Value']['Successful'].items():
                    for ancestor, dep in ancestorsDict.items():
                        depthDict.setdefault(dep, [])
                        if ancestor not in temp_ancestorlist:
                            depthDict[dep].append(ancestor)
                            depSet.add(dep)
                            temp_ancestorlist.append(ancestor)
            depList = list(depSet)
            depList.sort()
            for ancestor in depthDict[depList[-1]]:
                info = _getFileInfo(ancestor)
                lumi += info[0]
                addinfo = info[2]
                if 'xsection' in addinfo:
                    if 'sum' in addinfo['xsection']:
                        if 'xsection' in addinfo['xsection']['sum']:
                            xsec += addinfo['xsection']['sum']['xsection']
                            files += 1
        if xsec and files:
            xsec /= files
            dirmeta['CrossSection'] = xsec
        else:
            dirmeta['CrossSection'] = 0.0

        if nbevts:
            dirmeta['NumberOfEvents'] = nbevts
        #if not lumi:
        #  dirmeta['Luminosity']=0
        #  dirmeta['CrossSection']=0
        #else:
        #  if nbevts:
        #    dirmeta['CrossSection']=nbevts/lumi
        #  else:
        #    dirmeta['CrossSection']=0
        #if addinfo:
        #  if 'xsection' in addinfo:
        #    if 'sum' in addinfo['xsection']:
        #      if 'xsection' in addinfo['xsection']['sum']:
        #        dirmeta['CrossSection']=addinfo['xsection']['sum']['xsection']
        if 'NumberOfEvents' not in dirmeta:
            dirmeta['NumberOfEvents'] = 0
        #print processesdict[dirmeta['EvtType']]
        dirmeta['detail'] = ''
        if dirmeta['EvtType'] in processesdict:
            if 'Detail' in processesdict[dirmeta['EvtType']]:
                detail = processesdict[dirmeta['EvtType']]['Detail']

        else:
            detail = dirmeta['EvtType']

        if not prodtype == 'MCGeneration':
            res = trc.getTransformationInputDataQuery(str(prodID))
            if res['OK']:
                if 'ProdID' in res['Value']:
                    dirmeta['MomProdID'] = res['Value']['ProdID']
        if 'MomProdID' not in dirmeta:
            dirmeta['MomProdID'] = 0
        dirmeta['detail'] = _translate(detail)

        metadata.append(dirmeta)

    detectors = {}
    detectors['ILD'] = {}
    corres = {
        "MCGeneration": 'gen',
        "MCSimulation": 'SIM',
        "MCReconstruction": "REC",
        "MCReconstruction_Overlay": "REC"
    }
    detectors['ILD']['SIM'] = []
    detectors['ILD']['REC'] = []
    detectors['SID'] = {}
    detectors['SID']['SIM'] = []
    detectors['SID']['REC'] = []
    detectors['sid'] = {}
    detectors['sid']['SIM'] = []
    detectors['sid']['REC'] = []
    detectors['gen'] = []
    for channel in metadata:
        if 'DetectorType' not in channel:
            detectors['gen'].append(
                (channel['detail'], channel['Energy'], channel['ProdID'],
                 channel['nb_files'],
                 channel['NumberOfEvents'] / channel['nb_files'],
                 channel['NumberOfEvents'], channel['CrossSection'],
                 str(channel['proddetail'])))
        else:
            if not channel['DetectorType'] in detectors:
                gLogger.error("This is unknown detector",
                              channel['DetectorType'])
                continue
            detectors[channel['DetectorType']][corres[
                channel['prodtype']]].append(
                    (channel['detail'], channel['Energy'],
                     channel['DetectorType'], channel['ProdID'],
                     channel['nb_files'],
                     channel['NumberOfEvents'] / channel['nb_files'],
                     channel['NumberOfEvents'], channel['CrossSection'],
                     channel['MomProdID'], str(channel['proddetail'])))

    with open("tables.html", "w") as of:
        of.write("""<!DOCTYPE html>
<html>
 <head>
<title> Production summary </title>
</head>
<body>
""")
        if len(detectors['gen']):
            of.write("<h1>gen prods</h1>\n")
            table = Table(header_row=('Channel', 'Energy', 'ProdID', 'Tasks',
                                      'Average Evts/task', 'Statistics',
                                      'Cross Section (fb)', 'Comment'))
            for item in detectors['gen']:
                table.rows.append(item)
            of.write(str(table))
            gLogger.info("Gen prods")
            gLogger.info(str(table))

        if len(detectors['ILD']):
            of.write("<h1>ILD prods</h1>\n")
            for ptype in detectors['ILD'].keys():
                if len(detectors['ILD'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['ILD'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("ILC CDR prods %s" % ptype)
                    gLogger.info(str(table))

        if len(detectors['SID']):
            of.write("<h1>SID prods</h1>\n")
            for ptype in detectors['SID'].keys():
                if len(detectors['SID'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['SID'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("SID CDR prods %s" % ptype)
                    gLogger.info(str(table))

        if len(detectors['sid']):
            of.write("<h1>sid dbd prods</h1>\n")
            for ptype in detectors['sid'].keys():
                if len(detectors['sid'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['sid'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("sid DBD prods %s" % ptype)
                    gLogger.info(str(table))

        of.write("""
</body>
</html>
""")
    gLogger.notice("Check ./tables.html in any browser for the results")
    dexit(0)
Esempio n. 39
0
class MCExtensionAgent(AgentModule):

    #############################################################################
    def initialize(self):
        """Sets defaults """
        self.transClient = TransformationClient()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        self.transformationTypes = sortList(
            self.am_getOption('TransformationTypes',
                              ['MCSimulation', 'Simulation']))
        gLogger.info("Will consider the following transformation types: %s" %
                     str(self.transformationTypes))
        self.maxIterationTasks = self.am_getOption('TasksPerIteration', 50)
        gLogger.info("Will create a maximum of %s tasks per iteration" %
                     self.maxIterationTasks)
        self.maxFailRate = self.am_getOption('MaxFailureRate', 30)
        gLogger.info(
            "Will not submit tasks for transformations with failure rate greater than %s%s"
            % (self.maxFailRate, '%'))
        self.maxWaitingJobs = self.am_getOption('MaxWaitingJobs', 1000)
        gLogger.info(
            "Will not submit tasks for transformations with more than %d waiting jobs"
            % self.maxWaitingJobs)
        return S_OK()

    #############################################################################
    def execute(self):
        """ The MCExtensionAgent execution method."""

        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag'
                % (self.section))
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Active',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                transID = transDict['TransformationID']
                maxTasks = transDict['MaxNumberOfTasks']
                self.extendTransformation(transID, maxTasks)
        return S_OK()

    def extendTransformation(self, transID, maxTasks):
        gLogger.info("Considering extension of transformation %d" % transID)
        # Get the current count of tasks submitted for this transformation
        res = self.transClient.getTransformationTaskStats(transID)
        if not res['OK']:
            if res['Message'] != 'No records found':
                gLogger.error("Failed to get task statistics",
                              "%s %s" % (transID, res['Message']))
                return res
            else:
                statusDict = {}
        else:
            statusDict = res['Value']
        gLogger.verbose("Current task count for transformation %d" % transID)
        for status in sortList(statusDict.keys()):
            statusCount = statusDict[status]
            gLogger.verbose("%s : %s" %
                            (status.ljust(20), str(statusCount).rjust(8)))
        # Determine the number of tasks to be created
        numberOfTasks = self.calculateTaskNumber(maxTasks, statusDict)
        if not numberOfTasks:
            gLogger.info("No tasks required for transformation %d" % transID)
            return S_OK()
        # Extend the transformation by the determined number of tasks
        res = self.transClient.extendTransformation(transID, numberOfTasks)
        if not res['OK']:
            gLogger.error("Failed to extend transformation",
                          "%s %s" % (transID, res['Message']))
            return res
        gLogger.info("Successfully extended transformation %d by %d tasks" %
                     (transID, numberOfTasks))
        return S_OK()

    def calculateTaskNumber(self, maxTasks, statusDict):
        done = statusDict.get('Done', 0)
        failed = statusDict.get('Failed', 0)
        running = statusDict.get('Running', 0)
        waiting = statusDict.get('Waiting', 0)
        total = statusDict.get('Created', 0)
        # If the failure rate is higher than acceptable
        if (total != 0) and (
            (100.0 * float(failed) / float(total)) > self.maxFailRate):
            return 0
        # If we already have enough completed jobs
        if done >= maxTasks:
            return 0
        if waiting > self.maxWaitingJobs:
            return 0
        numberOfTasks = maxTasks - (total - failed)
        if numberOfTasks > self.maxIterationTasks:
            numberOfTasks = self.maxIterationTasks
        return numberOfTasks
Esempio n. 40
0
class ValidateOutputDataAgent( AgentModule ):

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    self.integrityClient = DataIntegrityClient()
    self.fc = FileCatalog()
    self.transClient = TransformationClient()
    self.fileCatalogClient = FileCatalogClient()

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = agentTSTypes
    else:
      self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )

    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', ['TransformationDB',
                                                                                  'MetadataCatalog'] ) )
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.enableFlag = True

  #############################################################################

  def initialize( self ):
    """ Sets defaults
    """
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    gLogger.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    return S_OK()

  #############################################################################

  def execute( self ):
    """ The VerifyOutputData execution method
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" )
      return S_OK( 'Disabled via CS flag' )

    gLogger.info( "-" * 40 )
    self.updateWaitingIntegrity()
    gLogger.info( "-" * 40 )

    res = self.transClient.getTransformations( {'Status':'ValidatingOutput', 'Type':self.transformationTypes} )
    if not res['OK']:
      gLogger.error( "Failed to get ValidatingOutput transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in ValidatingOutput status" )
      return S_OK()
    gLogger.info( "Found %s transformations in ValidatingOutput status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      res = self.checkTransformationIntegrity( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to perform full integrity check for transformation %d" % transID )
      else:
        self.finalizeCheck( transID )
        gLogger.info( "-" * 40 )
    return S_OK()

  def updateWaitingIntegrity( self ):
    """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
    gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" )
    res = self.transClient.getTransformations( {'Status':'WaitingIntegrity'} )
    if not res['OK']:
      gLogger.error( "Failed to get WaitingIntegrity transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in WaitingIntegrity status" )
      return S_OK()
    gLogger.info( "Found %s transformations in WaitingIntegrity status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      gLogger.info( "-" * 40 )
      res = self.integrityClient.getTransformationProblematics( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to determine waiting problematics for transformation", res['Message'] )
      elif not res['Value']:
        res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput' )
        if not res['OK']:
          gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % ( transID ) )
        else:
          gLogger.info( "Updated status of transformation %s to ValidatedOutput" % ( transID ) )
      else:
        gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
    return

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )

  @staticmethod
  def _addDirs( transID, newDirs, existingDirs ):
    for nDir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, nDir ):
        if not nDir in existingDirs:
          existingDirs.append( nDir )
    return existingDirs

  #############################################################################
  def checkTransformationIntegrity( self, transID ):
    """ This method contains the real work
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Checking the integrity of transformation %s" % transID )
    gLogger.info( "-" * 40 )

    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      return res
    directories = res['Value']
    if not directories:
      return S_OK()

    ######################################################
    #
    # This check performs Catalog->SE for possible output directories
    #
    res = self.fc.exists( directories )
    if not res['OK']:
      gLogger.error( res['Message'] )
      return res
    for directory, error in res['Value']['Failed']:
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to determine the existance of directories" )
    directoryExists = res['Value']['Successful']
    for directory in sorted( directoryExists.keys() ):
      if not directoryExists[directory]:
        continue
      iRes = self.integrityClient.catalogDirectoryToSE( directory )
      if not iRes['OK']:
        gLogger.error( iRes['Message'] )
        return iRes

    ######################################################
    #
    # This check performs SE->Catalog for possible output directories
    #
    for storageElementName in sorted( self.activeStorages ):
      res = self.integrityClient.storageDirectoryToCatalog( directories, storageElementName )
      if not res['OK']:
        gLogger.error( res['Message'] )
        return res

    gLogger.info( "-" * 40 )
    gLogger.info( "Completed integrity check for transformation %s" % transID )
    return S_OK()

  def finalizeCheck( self, transID ):
    """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
    res = self.integrityClient.getTransformationProblematics( int( transID ) )
    if not res['OK']:
      gLogger.error( "Failed to determine whether there were associated problematic files", res['Message'] )
      newStatus = ''
    elif res['Value']:
      gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
      newStatus = "WaitingIntegrity"
    else:
      gLogger.info( "No problematics were found for transformation %s" % transID )
      newStatus = "ValidatedOutput"
    if newStatus:
      res = self.transClient.setTransformationParameter( transID, 'Status', newStatus )
      if not res['OK']:
        gLogger.error( "Failed to update status of transformation %s to %s" % ( transID, newStatus ) )
      else:
        gLogger.info( "Updated status of transformation %s to %s" % ( transID, newStatus ) )
    gLogger.info( "-" * 40 )
    return S_OK()
Esempio n. 41
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": [types.IntType, types.LongType],
            "TransformationName": types.StringTypes,
            "Status": types.StringTypes,
            "Description": types.StringTypes,
            "LongDescription": types.StringTypes,
            "Type": types.StringTypes,
            "Plugin": types.StringTypes,
            "AgentType": types.StringTypes,
            "FileMask": types.StringTypes,
            "TransformationGroup": types.StringTypes,
            "GroupSize": [types.IntType, types.LongType, types.FloatType],
            "InheritedFrom": [types.IntType, types.LongType],
            "Body": types.StringTypes,
            "MaxNumberOfTasks": [types.IntType, types.LongType],
            "EventsPerTask": [types.IntType, types.LongType],
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"]
        )
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError, "TransformationID %d does not exist" % transID
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)
                )

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(",", " ").split()
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(self.paramTypes.keys())
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called,
                        value,
                        type(value),
                        self.paramTypes[self.item_called],
                    )
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" % self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error("Unable to invoke setter %s, it isn't a member function" % setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate"
            )
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation", printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation", printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats", printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput)

    def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False):
        return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR("Unable to invoke %s, it isn't a member funtion of TransformationClient")
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=["TransformationID", "Status", "AgentType", "TransformationName", "CreationDate"],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info("Will attempt to create transformation with the following parameters")
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"]))
                    gLogger.notice("To add this parameter later please execute the following.")
                    gLogger.notice("oTransformation = Transformation(%d)" % transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues["TransformationID"]:
            gLogger.info("You are currently working with an active transformation definition.")
            gLogger.info("If you wish to create a new transformation reset the TransformationID.")
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = ["TransformationName", "Description", "LongDescription", "Type"]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info("%s is not defined for this transformation. This is required..." % parameter)
                self.paramValues[parameter] = raw_input("Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info("The selected Plugin (%s) is not known to the transformation agent." % plugin)
                res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info("The GroupSize was found to be less than zero. It has been set to 1.")
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"]))
        )
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR("Unable to invoke %s, this function hasn't been implemented." % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res, "Failed to get possible StorageElements")
        missing = []
        for se in seList:
            if not se in res["Value"]:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self, parameter, choices=[], default="", insert=True):
        res = promptUser("Please enter %s" % parameter, choices=choices, default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR("Unable to invoke %s, it isn't a member function of Transformation!")
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
Esempio n. 42
0
class TaskManagerAgentBase(AgentModule, TransformationAgentsUtilities):
    """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """
    def __init__(self, *args, **kwargs):
        """ c'tor

        Always call this in the extension agent
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        self.transClient = None
        self.transType = []

        self.tasksPerLoop = 50

        self.owner = ''
        self.ownerGroup = ''
        self.ownerDN = ''

        # for the threading
        self.transQueue = Queue()
        self.transInQueue = []
        self.transInThread = {}

    #############################################################################

    def initialize(self):
        """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

        gMonitor.registerActivity("SubmittedTasks",
                                  "Automatically submitted tasks",
                                  "Transformation Monitoring", "Tasks",
                                  gMonitor.OP_ACUM)

        # Default clients
        self.transClient = TransformationClient()

        # setting up the threading
        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', 15)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        if self.transInQueue:
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread))
            self.transInQueue = []
            while self.transInThread:
                time.sleep(2)
            self.log.info("Threads are empty, terminating the agent...")
        return S_OK()

    #############################################################################

    def execute(self):
        """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

        operationsOnTransformationDict = {}

        # Determine whether the task status is to be monitored and updated
        enableTaskMonitor = self.am_getOption('MonitorTasks', '')
        if not enableTaskMonitor:
            self.log.verbose(
                "Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option"
            )
        else:
            # Get the transformations for which the tasks have to be updated
            status = self.am_getOption('UpdateTasksStatus',
                                       ['Active', 'Completing', 'Stopped'])
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations: %s" %
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict([
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value']
                ])
                for transID, body in transformationIDsAndBodies.iteritems():
                    operationsOnTransformationDict[transID] = {
                        'Body': body,
                        'Operations': ['updateTaskStatus']
                    }

        # Determine whether the task files status is to be monitored and updated
        enableFileMonitor = self.am_getOption('MonitorFiles', '')
        if not enableFileMonitor:
            self.log.verbose(
                "Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option"
            )
        else:
            # Get the transformations for which the files have to be updated
            status = self.am_getOption('UpdateFilesStatus',
                                       ['Active', 'Completing', 'Stopped'])
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations: %s" %
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict([
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value']
                ])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('updateFileStatus')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['updateFileStatus']
                        }

        # Determine whether the checking of reserved tasks is to be performed
        enableCheckReserved = self.am_getOption('CheckReserved', '')
        if not enableCheckReserved:
            self.log.verbose(
                "Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option"
            )
        else:
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption('CheckReservedStatus',
                                       ['Active', 'Completing', 'Stopped'])
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations: %s" %
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict([
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value']
                ])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('checkReservedTasks')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['checkReservedTasks']
                        }

        # Determine whether the submission of tasks is to be performed
        enableSubmission = self.am_getOption('SubmitTasks', '')
        if not enableSubmission:
            self.log.verbose(
                "Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option"
            )
        else:
            # getting the credentials for submission
            res = getProxyInfo(False, False)
            if not res['OK']:
                self.log.error(
                    "Failed to determine credentials for submission",
                    res['Message'])
                return res
            proxyInfo = res['Value']
            self.owner = proxyInfo['username']
            self.ownerGroup = proxyInfo['group']
            self.ownerDN = proxyInfo['identity']
            self.log.info(
                "Tasks will be submitted with the credentials %s:%s" %
                (self.owner, self.ownerGroup))
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption('SubmitStatus',
                                       ['Active', 'Completing'])
            transformations = self._selectTransformations(
                transType=self.transType, status=status)
            if not transformations['OK']:
                self.log.warn("Could not select transformations: %s" %
                              transformations['Message'])
            else:
                # Get the transformations which should be submitted
                self.tasksPerLoop = self.am_getOption('TasksPerLoop',
                                                      self.tasksPerLoop)
                transformationIDsAndBodies = dict([
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value']
                ])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('submitTasks')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['submitTasks']
                        }

        self._fillTheQueue(operationsOnTransformationDict)

        return S_OK()

    def _selectTransformations(self,
                               transType=[],
                               status=['Active', 'Completing'],
                               agentType=['Automatic']):
        """ get the transformations
    """
        selectCond = {}
        if status:
            selectCond['Status'] = status
        if transType:
            selectCond['Type'] = transType
        if agentType:
            selectCond['AgentType'] = agentType
        res = self.transClient.getTransformations(condDict=selectCond)
        if not res['OK']:
            self.log.error("Failed to get transformations: %s" %
                           res['Message'])
        elif not res['Value']:
            self.log.verbose("No transformations found")
        else:
            self.log.verbose("Obtained %d transformations" % len(res['Value']))
        return res

    def _fillTheQueue(self, operationsOnTransformationsDict):
        """ Just fill the queue with the operation to be done on a certain transformation
    """
        count = 0
        for transID, bodyAndOps in operationsOnTransformationsDict.iteritems():
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put({transID: bodyAndOps})

        self.log.info("Out of %d transformations, %d put in thread queue" %
                      (len(operationsOnTransformationsDict), count))

    #############################################################################

    def _getClients(self):
        """ returns the clients used in the threads - this is another function that should be extended.

        The clients provided here are defaults, and should be adapted
    """
        threadTransformationClient = TransformationClient()
        threadTaskManager = WorkflowTasks(
        )  # this is for wms tasks, replace it with something else if needed

        return {
            'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager
        }

    def _execute(self, threadID):
        """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
        # Each thread will have its own clients
        clients = self._getClients()
        startTime = 0
        method = '_execute'

        while True:
            transIDOPBody = self.transQueue.get()
            try:
                transID = transIDOPBody.keys()[0]
                operations = transIDOPBody[transID]['Operations']
                if transID not in self.transInQueue:
                    self._logWarn("Got a transf not in transInQueue...?",
                                  method=method,
                                  transID=transID)
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                for operation in operations:
                    self._logInfo("Starting processing operation %s" %
                                  operation,
                                  method=method,
                                  transID=transID)
                    startTime = time.time()
                    res = getattr(self, operation)(transIDOPBody, clients)
                    if not res['OK']:
                        self._logError("Failed to %s: %s" %
                                       (operation, res['Message']),
                                       method=method,
                                       transID=transID)
                    self._logInfo("Processed operation %s" % operation,
                                  method=method,
                                  transID=transID)
            except Exception, x:
                self._logException('Exception executing operation %s' %
                                   operation,
                                   lException=x,
                                   transID=transID,
                                   method=method)
            finally:
Esempio n. 43
0
class TransformationCleaningAgent(AgentModule):
    """
    .. class:: TransformationCleaningAgent

    :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
    :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
    :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

    """
    def __init__(self, *args, **kwargs):
        """c'tor"""
        AgentModule.__init__(self, *args, **kwargs)

        self.shifterProxy = None

        # # transformation client
        self.transClient = None
        # # wms client
        self.wmsClient = None
        # # request client
        self.reqClient = None
        # # file catalog client
        self.metadataClient = None

        # # transformations types
        self.transformationTypes = None
        # # directory locations
        self.directoryLocations = ["TransformationDB", "MetadataCatalog"]
        # # transformation metadata
        self.transfidmeta = "TransformationID"
        # # archive periof in days
        self.archiveAfter = 7
        # # transformation log SEs
        self.logSE = "LogSE"
        # # enable/disable execution
        self.enableFlag = "True"

        self.dataProcTTypes = ["MCSimulation", "Merge"]
        self.dataManipTTypes = ["Replication", "Removal"]

    def initialize(self):
        """agent initialisation

        reading and setting config opts

        :param self: self reference
        """
        # # shifter proxy
        # See cleanContent method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption("shifterProxy",
                                              self.shifterProxy)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            "Transformations/DataProcessing", self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            "Transformations/DataManipulation", self.dataManipTTypes)
        agentTSTypes = self.am_getOption("TransformationTypes", [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption("DirectoryLocations", self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption("TransfIDMeta",
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption("ArchiveAfter",
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # transformation log SEs
        self.logSE = Operations().getValue("/LogStorage/LogSE", self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()
        # # job monitoring client
        self.jobMonitoringClient = JobMonitoringClient()

        return S_OK()

    #############################################################################
    def execute(self):
        """execution in one agent's cycle

        :param self: self reference
        """

        self.enableFlag = self.am_getOption("EnableFlag", self.enableFlag)
        if self.enableFlag != "True":
            self.log.info(
                "TransformationCleaningAgent is disabled by configuration option EnableFlag"
            )
            return S_OK("Disabled via CS flag")

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            "Status":
            "Cleaning",
            "Type":
            self.transformationTypes
        })
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Failed to get transformations", res["Message"])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            "Status":
            "RemovingFiles",
            "Type":
            self.transformationTypes
        })
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Could not get the transformations", res["Message"])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                "Status": "Completed",
                "Type": self.transformationTypes
            },
            older=olderThanTime,
            timeStamp="LastUpdate")
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Could not get the transformations", res["Message"])
        return S_OK()

    def finalize(self):
        """Only at finalization: will clean ancient transformations (remnants)

            1) get the transformation IDs of jobs that are older than 1 year
            2) find the status of those transformations. Those "Cleaned" and "Archived" will be
               cleaned and archived (again)

        Why doing this here? Basically, it's a race:

        1) the production manager submits a transformation
        2) the TransformationAgent, and a bit later the WorkflowTaskAgent, put such transformation in their internal queue,
           so eventually during their (long-ish) cycle they'll work on it.
        3) 1 minute after creating the transformation, the production manager cleans it (by hand, for whatever reason).
           So, the status is changed to "Cleaning"
        4) the TransformationCleaningAgent cleans what has been created (maybe, nothing),
           then sets the transformation status to "Cleaned" or "Archived"
        5) a bit later the TransformationAgent, and later the WorkflowTaskAgent, kick in,
           creating tasks and jobs for a production that's effectively cleaned (but these 2 agents don't know yet).

        Of course, one could make one final check in TransformationAgent or WorkflowTaskAgent,
        but these 2 agents are already doing a lot of stuff, and are pretty heavy.
        So, we should just clean from time to time.
        What I added here is done only when the agent finalize, and it's quite light-ish operation anyway.
        """
        res = self.jobMonitoringClient.getJobGroups(
            None,
            datetime.utcnow() - timedelta(days=365))
        if not res["OK"]:
            self.log.error("Failed to get job groups", res["Message"])
            return res
        transformationIDs = res["Value"]
        if transformationIDs:
            res = self.transClient.getTransformations(
                {"TransformationID": transformationIDs})
            if not res["OK"]:
                self.log.error("Failed to get transformations", res["Message"])
                return res
            transformations = res["Value"]
            toClean = []
            toArchive = []
            for transDict in transformations:
                if transDict["Status"] == "Cleaned":
                    toClean.append(transDict)
                if transDict["Status"] == "Archived":
                    toArchive.append(transDict)

            for transDict in toClean:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            for transDict in toArchive:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            # Remove JobIDs that were unknown to the TransformationSystem
            jobGroupsToCheck = [
                str(transDict["TransformationID"]).zfill(8)
                for transDict in toClean + toArchive
            ]
            res = self.jobMonitoringClient.getJobs(
                {"JobGroup": jobGroupsToCheck})
            if not res["OK"]:
                return res
            jobIDsToRemove = [int(jobID) for jobID in res["Value"]]
            res = self.__removeWMSTasks(jobIDsToRemove)
            if not res["OK"]:
                return res

        return S_OK()

    def _executeClean(self, transDict):
        """Clean transformation."""
        # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # We just archive
        if transDict["Type"] in self.dataManipTTypes:
            res = self.archiveTransformation(transDict["TransformationID"])
            if not res["OK"]:
                self.log.error(
                    "Problems archiving transformation",
                    "%s: %s" % (transDict["TransformationID"], res["Message"]))
        else:
            res = self.cleanTransformation(transDict["TransformationID"])
            if not res["OK"]:
                self.log.error(
                    "Problems cleaning transformation",
                    "%s: %s" % (transDict["TransformationID"], res["Message"]))

    def _executeRemoval(self, transDict):
        """Remove files from given transformation."""
        res = self.removeTransformationOutput(transDict["TransformationID"])
        if not res["OK"]:
            self.log.error(
                "Problems removing transformation",
                "%s: %s" % (transDict["TransformationID"], res["Message"]))

    def _executeArchive(self, transDict):
        """Archive the given transformation."""
        res = self.archiveTransformation(transDict["TransformationID"])
        if not res["OK"]:
            self.log.error(
                "Problems archiving transformation",
                "%s: %s" % (transDict["TransformationID"], res["Message"]))

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """get the directories for the supplied transformation from the transformation system.
            These directories are used by removeTransformationOutput and cleanTransformation for removing output.

        :param self: self reference
        :param int transID: transformation ID
        """
        self.log.verbose(
            "Cleaning Transformation directories of transformation %d" %
            transID)
        directories = []
        if "TransformationDB" in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ["OutputDirectories"])
            if not res["OK"]:
                self.log.error("Failed to obtain transformation directories",
                               res["Message"])
                return res
            transDirectories = []
            if res["Value"]:
                if not isinstance(res["Value"], list):
                    try:
                        transDirectories = ast.literal_eval(res["Value"])
                    except Exception:
                        # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
                        transDirectories.append(res["Value"])
                else:
                    transDirectories = res["Value"]
            directories = self._addDirs(transID, transDirectories, directories)

        if "MetadataCatalog" in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res["OK"]:
                self.log.error("Failed to obtain metadata catalog directories",
                               res["Message"])
                return res
            transDirectories = res["Value"]
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @classmethod
    def _addDirs(cls, transID, newDirs, existingDirs):
        """append unique :newDirs: list to :existingDirs: list

        :param self: self reference
        :param int transID: transformationID
        :param list newDirs: src list of paths
        :param list existingDirs: dest list of paths
        """
        for folder in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, str(folder)):
                if folder not in existingDirs:
                    existingDirs.append(os.path.normpath(folder))
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanContent(self, directory):
        """wipe out everything from catalog under folder :directory:

        :param self: self reference
        :params str directory: folder name
        """
        self.log.verbose("Cleaning Catalog contents")
        res = self.__getCatalogDirectoryContents([directory])
        if not res["OK"]:
            return res
        filesFound = res["Value"]
        if not filesFound:
            self.log.info(
                "No files are registered in the catalog directory %s" %
                directory)
            return S_OK()
        self.log.info(
            "Attempting to remove possible remnants from the catalog and storage",
            "(n=%d)" % len(filesFound))

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "false")
        res = DataManager().removeFile(filesFound, force=True)
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "true")

        if not res["OK"]:
            return res
        realFailure = False
        for lfn, reason in res["Value"]["Failed"].items():
            if "File does not exist" in str(reason):
                self.log.warn("File %s not found in some catalog: " % (lfn))
            else:
                self.log.error("Failed to remove file found in the catalog",
                               "%s %s" % (lfn, reason))
                realFailure = True
        if realFailure:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        """get catalog contents under paths :directories:

        :param self: self reference
        :param list directories: list of paths in catalog
        """
        self.log.info("Obtaining the catalog contents for %d directories:" %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res["OK"] and "Directory does not exist" in res[
                    "Message"]:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res["OK"]:
                if "No such file or directory" in res["Message"]:
                    self.log.info("%s: %s" % (currentDir, res["Message"]))
                else:
                    self.log.error(
                        "Failed to get directory %s content" % currentDir,
                        res["Message"])
            else:
                dirContents = res["Value"]
                activeDirs.extend(dirContents["SubDirs"])
                allFiles.update(dirContents["Files"])
        self.log.info("", "Found %d files" % len(allFiles))
        return S_OK(list(allFiles))

    def cleanTransformationLogFiles(self, directory):
        """clean up transformation logs from directory :directory:

        :param self: self reference
        :param str directory: folder name
        """
        self.log.verbose("Removing log files found in the directory",
                         directory)
        res = returnSingleResult(
            StorageElement(self.logSE).removeDirectory(directory,
                                                       recursive=True))
        if not res["OK"]:
            if cmpError(res, errno.ENOENT):  # No such file or directory
                self.log.warn("Transformation log directory does not exist",
                              directory)
                return S_OK()
            self.log.error("Failed to remove log files", res["Message"])
            return res
        self.log.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """This just removes any mention of the output data from the catalog and storage"""
        self.log.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res["OK"]:
            self.log.error("Problem obtaining directories for transformation",
                           "%s with result '%s'" % (transID, res))
            return S_OK()
        directories = res["Value"]
        for directory in directories:
            if not re.search("/LOG/", directory):
                res = self.cleanContent(directory)
                if not res["OK"]:
                    return res

        self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" %
                      (len(directories), transID))
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully removed output of transformation", transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, "Status", "RemovedFiles")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation %s to RemovedFiles" %
                      (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """This just removes job from the jobDB and the transformation DB

        :param self: self reference
        :param int transID: transformation ID
        """
        self.log.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res["OK"]:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, "Status", "Archived")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation %s to Archived" %
                      (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
        """
        self.log.info("Cleaning transformation", transID)
        res = self.getTransformationDirectories(transID)
        if not res["OK"]:
            self.log.error("Problem obtaining directories for transformation",
                           "%s with result '%s'" % (transID, res["Message"]))
            return S_OK()
        directories = res["Value"]
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res["OK"]:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search("/LOG/", directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res["OK"]:
                    return res
            res = self.cleanContent(directory)
            if not res["OK"]:
                return res

        # Clean ALL the possible remnants found
        res = self.cleanMetadataCatalogFiles(transID)
        if not res["OK"]:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully cleaned transformation", transID)
        res = self.transClient.setTransformationParameter(
            transID, "Status", "Cleaned")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to Cleaned" %
                (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation",
                      "%s to Cleaned" % (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """wipe out files from catalog"""
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res["OK"]:
            return res
        fileToRemove = res["Value"]
        if not fileToRemove:
            self.log.info("No files found for transID", transID)
            return S_OK()

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "false")
        res = DataManager().removeFile(fileToRemove, force=True)
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "true")

        if not res["OK"]:
            return res
        for lfn, reason in res["Value"]["Failed"].items():
            self.log.error("Failed to remove file found in metadata catalog",
                           "%s %s" % (lfn, reason))
        if res["Value"]["Failed"]:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        self.log.info("Successfully removed all files found in the DFC")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        """clean tasks from WMS, or from the RMS if it is a DataManipulation transformation"""
        self.log.verbose("Cleaning Transformation tasks of transformation",
                         transID)
        res = self.__getTransformationExternalIDs(transID)
        if not res["OK"]:
            return res
        externalIDs = res["Value"]
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ["Type"])
            if not res["OK"]:
                self.log.error("Failed to determine transformation type")
                return res
            transType = res["Value"]
            if transType in self.dataProcTTypes:
                res = self.__removeWMSTasks(externalIDs)
            else:
                res = self.__removeRequests(externalIDs)
            if not res["OK"]:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        """collect all ExternalIDs for transformation :transID:

        :param self: self reference
        :param int transID: transforamtion ID
        """
        res = self.transClient.getTransformationTasks(
            condDict={"TransformationID": transID})
        if not res["OK"]:
            self.log.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res["Message"])
            return res
        externalIDs = [taskDict["ExternalID"] for taskDict in res["Value"]]
        self.log.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        """This will remove requests from the RMS system -"""
        rIDs = [int(int(j)) for j in requestIDs if int(j)]
        for reqID in rIDs:
            self.reqClient.cancelRequest(reqID)

        return S_OK()

    def __removeWMSTasks(self, transJobIDs):
        """delete jobs (mark their status as "JobStatus.DELETED") and their requests from the system

        :param self: self reference
        :param list trasnJobIDs: job IDs
        """
        # Prevent 0 job IDs
        jobIDs = [int(j) for j in transJobIDs if int(j)]
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):

            res = self.wmsClient.killJob(jobList)
            if res["OK"]:
                self.log.info("Successfully killed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found jobs which did not exist in the WMS",
                              "(n=%d)" % len(res["InvalidJobIDs"]))
            elif "NonauthorizedJobIDs" in res:
                self.log.error("Failed to kill jobs because not authorized",
                               "(n=%d)" % len(res["NonauthorizedJobIDs"]))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to kill jobs",
                               "(n=%d)" % len(res["FailedJobIDs"]))
                allRemove = False

            res = self.wmsClient.deleteJob(jobList)
            if res["OK"]:
                self.log.info("Successfully deleted jobs from WMS",
                              "(n=%d)" % len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found jobs which did not exist in the WMS",
                              "(n=%d)" % len(res["InvalidJobIDs"]))
            elif "NonauthorizedJobIDs" in res:
                self.log.error("Failed to delete jobs because not authorized",
                               "(n=%d)" % len(res["NonauthorizedJobIDs"]))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to delete jobs",
                               "(n=%d)" % len(res["FailedJobIDs"]))
                allRemove = False

        if not allRemove:
            return S_ERROR("Failed to delete all remnants from WMS")
        self.log.info("Successfully deleted all tasks from the WMS")

        if not jobIDs:
            self.log.info(
                "JobIDs not present, unable to delete associated requests.")
            return S_OK()

        failed = 0
        failoverRequests = {}
        res = self.reqClient.getRequestIDsForJobs(jobIDs)
        if not res["OK"]:
            self.log.error("Failed to get requestID for jobs.", res["Message"])
            return res
        failoverRequests.update(res["Value"]["Successful"])
        if not failoverRequests:
            return S_OK()
        for jobID, requestID in res["Value"]["Successful"].items():
            # Put this check just in case, tasks must have associated jobs
            if jobID == 0 or jobID == "0":
                continue
            res = self.reqClient.cancelRequest(requestID)
            if not res["OK"]:
                self.log.error("Failed to remove request from RequestDB",
                               res["Message"])
                failed += 1
            else:
                self.log.verbose("Removed request %s associated to job %d." %
                                 (requestID, jobID))

        if failed:
            self.log.info("Successfully removed requests",
                          "(n=%d)" % (len(failoverRequests) - failed))
            self.log.info("Failed to remove requests", "(n=%d)" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        self.log.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Esempio n. 44
0
class TaskManagerAgentBase(AgentModule):

  #############################################################################
  def initialize(self):
    self.section = self.am_getOption("section")
    gMonitor.registerActivity("SubmittedTasks","Automatically submitted tasks","Transformation Monitoring","Tasks", gMonitor.OP_ACUM)
    self.transClient = TransformationClient()
    return S_OK()

  #############################################################################
  def execute(self):
    """ The TaskManagerBase execution method. """

    # Determine whether the task status is to be monitored and updated
    enableTaskMonitor = self.am_getOption('MonitorTasks','')
    if not enableTaskMonitor:
      gLogger.info("execute: Monitoring of tasks is disabled.")
      gLogger.info("execute: To enable create the 'MonitorTasks' option")
    else:
      res = self.updateTaskStatus()
      if not res['OK']:
        gLogger.warn('execute: Failed to update task states', res['Message'])

    # Determine whether the task files status is to be monitored and updated
    enableFileMonitor = self.am_getOption('MonitorFiles','')
    if not enableFileMonitor:
      gLogger.info("execute: Monitoring of files is disabled.")
      gLogger.info("execute: To enable create the 'MonitorFiles' option")
    else:
      res = self.updateFileStatus()
      if not res['OK']:
        gLogger.warn('execute: Failed to update file states', res['Message'])

    # Determine whether the checking of reserved tasks is to be performed
    enableCheckReserved = self.am_getOption('CheckReserved','')
    if not enableCheckReserved:
      gLogger.info("execute: Checking of reserved tasks is disabled.")
      gLogger.info("execute: To enable create the 'CheckReserved' option")
    else:
      res = self.checkReservedTasks()
      if not res['OK']:
        gLogger.warn('execute: Failed to checked reserved tasks',res['Message'])

    # Determine whether the submission of tasks is to be executed
    enableSubmission = self.am_getOption('SubmitTasks','')
    if not enableSubmission:
      gLogger.info("execute: Submission of tasks is disabled.")
      gLogger.info("execute: To enable create the 'SubmitTasks' option")
    else:
      res = self.submitTasks()
      if not res['OK']:
        gLogger.warn('execute: Failed to submit created tasks', res['Message'])

    return S_OK()
  
  def _selectTransformations(self,transType=[],status=['Active','Completing'],agentType=['Automatic']):
    selectCond = {}
    if status:
      selectCond['Status'] = status
    if transType:
      selectCond['Type'] = transType
    if agentType:
      selectCond['AgentType'] = agentType
    res = self.transClient.getTransformations(condDict=selectCond)
    if not res['OK']:
      gLogger.error("_selectTransformations: Failed to get transformations for selection.",res['Message'])
    elif not res['Value']:
      gLogger.info("_selectTransformations: No transformations found for selection.")
    else:
      gLogger.info("_selectTransformations: Obtained %d transformations for selection" % len(res['Value']))
    return res

  def updateTaskStatus(self):
    gLogger.info("updateTaskStatus: Updating the Status of tasks")
    # Get the transformations to be updated
    status = self.am_getOption('UpdateTasksStatus',['Active','Completing','Stopped'])
    res = self._selectTransformations(transType=self.transType,status=status,agentType=[])
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      # Get the tasks which are in a UPDATE state
      updateStatus = self.am_getOption('TaskUpdateStatus',['Checking','Deleted','Killed','Staging','Stalled','Matched','Rescheduled','Completed','Submitted','Received','Waiting','Running'])
      condDict = {"TransformationID":transID,"ExternalStatus":updateStatus}
      timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))
      res = self.transClient.getTransformationTasks(condDict=condDict,older=timeStamp, timeStamp='LastUpdateTime')
      if not res['OK']:
        gLogger.error("updateTaskStatus: Failed to get tasks to update for transformation", "%s %s" % (transID,res['Message']))
        continue
      if not res['Value']:
        gLogger.verbose("updateTaskStatus: No tasks found to update for transformation %s" % transID)
        continue
      res = self.getSubmittedTaskStatus(res['Value'])
      if not res['OK']:
        gLogger.error("updateTaskStatus: Failed to get updated task statuses for transformation", "%s %s" % (transID,res['Message']))
        continue
      statusDict = res['Value']
      for status in sortList(statusDict.keys()):
        taskIDs = statusDict[status]
        gLogger.info("updateTaskStatus: Updating %d task(s) from transformation %d to %s" % (len(taskIDs),transID,status))
        res = self.transClient.setTaskStatus(transID,taskIDs,status)
        if not res['OK']:
          gLogger.error("updateTaskStatus: Failed to update task status for transformation", "%s %s" % (transID,res['Message']))
            
    gLogger.info("updateTaskStatus: Transformation task status update complete")  
    return S_OK()

  def updateFileStatus(self):
    gLogger.info("updateFileStatus: Updating Status of task files")
    #Get the transformations to be updated
    status = self.am_getOption('UpdateFilesStatus',['Active','Completing','Stopped'])
    res = self._selectTransformations(transType=self.transType,status=status,agentType=[])
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      # Get the files which are in a UPDATE state
      updateStatus = self.am_getOption('FileUpdateStatus',['Submitted','Received','Waiting','Running'])
      timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))
      condDict = {'TransformationID' : transID, 'Status' : ['Assigned']}
      res = self.transClient.getTransformationFiles(condDict=condDict,older=timeStamp, timeStamp='LastUpdate')
      if not res['OK']:
        gLogger.error("updateFileStatus: Failed to get transformation files to update.",res['Message'])
        continue
      if not res['Value']:
        gLogger.info("updateFileStatus: No files to be updated for transformation %s." % transID)
        continue
      res = self.getSubmittedFileStatus(res['Value'])
      if not res['OK']:
        gLogger.error("updateFileStatus: Failed to get updated file statuses for transformation","%s %s" % (transID,res['Message']))
        continue
      statusDict = res['Value']
      if not statusDict:
        gLogger.info("updateFileStatus: No file statuses to be updated for transformation %s." % transID)
        continue
      fileReport = FileReport(server=self.transClient.getServer())
      for lfn,status in statusDict.items():
        fileReport.setFileStatus(int(transID),lfn,status)
      res = fileReport.commit()
      if not res['OK']:
        gLogger.error("updateFileStatus: Failed to update file status for transformation", "%s %s" % (transID, res['Message']))
      else:
        for status,update in res['Value'].items():
          gLogger.info("updateFileStatus: Updated %s files for %s to %s." % (update, transID, status))
    gLogger.info("updateFileStatus: Transformation file status update complete")  
    return S_OK()
  
  def checkReservedTasks(self):
    gLogger.info("checkReservedTasks: Checking Reserved tasks")
    # Get the transformations which should be checked
    status = self.am_getOption('CheckReservedStatus',['Active','Completing','Stopped'])
    res = self._selectTransformations(transType=self.transType,status=status,agentType=[])
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
      condDict = {"TransformationID":transID,"ExternalStatus":'Reserved'}
      time_stamp_older = str(datetime.datetime.utcnow() - datetime.timedelta(hours=1))
      time_stamp_newer = str(datetime.datetime.utcnow() - datetime.timedelta(days=7))
      res = self.transClient.getTransformationTasks(condDict=condDict,older=time_stamp_older,newer=time_stamp_newer, timeStamp='LastUpdateTime')
      if not res['OK']:
        gLogger.error("checkReservedTasks: Failed to get Reserved tasks for transformation", "%s %s" % (transID,res['Message']))
        continue
      if not res['Value']:
        gLogger.verbose("checkReservedTasks: No Reserved tasks found for transformation %s" % transID)
        continue
      res = self.updateTransformationReservedTasks(res['Value'])
      if not res['OK']:
        gLogger.info("checkReservedTasks: No Reserved tasks found for transformation %s" % transID)
        continue
      noTasks = res['Value']['NoTasks']
      taskNameIDs = res['Value']['TaskNameIDs']
      # For the tasks with no associated request found re-set the status of the task in the transformationDB
      for taskName in noTasks:
        transID,taskID = taskName.split('_')
        gLogger.info("checkReservedTasks: Resetting status of %s to Created as no associated task found" % (taskName))
        res = self.transClient.setTaskStatus(int(transID),int(taskID),'Created')
        if not res['OK']:
          gLogger.warn("checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % (taskName,res['Message']))
      # For the tasks for which an associated request was found update the task details in the transformationDB
      for taskName,extTaskID in taskNameIDs.items():
        transID,taskID = taskName.split('_')
        gLogger.info("checkReservedTasks: Resetting status of %s to Created with ID %s" % (taskName,extTaskID))
        res = self.transClient.setTaskStatusAndWmsID(int(transID),int(taskID),'Submitted',str(extTaskID))
        if not res['OK']:
          gLogger.warn("checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % (taskName,res['Message']))
    gLogger.info("checkReservedTasks: Updating of reserved tasks complete")  
    return S_OK()

  def submitTasks(self):
    gLogger.info("submitTasks: Submitting tasks for transformations")
    res = getProxyInfo(False,False)
    if not res['OK']:
      gLogger.error("submitTasks: Failed to determine credentials for submission",res['Message'])
      return res
    proxyInfo = res['Value']
    owner = proxyInfo['username']
    ownerGroup = proxyInfo['group']
    gLogger.info("submitTasks: Tasks will be submitted with the credentials %s:%s" % (owner,ownerGroup))
    # Get the transformations which should be submitted
    tasksPerLoop = self.am_getOption('TasksPerLoop',50)
    status = self.am_getOption('SubmitStatus',['Active','Completing'])
    res = self._selectTransformations(transType=self.transType,status=status)
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      transBody = transformation['Body']
      res = self.transClient.getTasksToSubmit(transID,tasksPerLoop)
      if not res['OK']:
        gLogger.error("submitTasks: Failed to obtain tasks for transformation", "%s %s" % (transID,res['Message']))
        continue
      tasks = res['Value']['JobDictionary']
      if not tasks:
        gLogger.verbose("submitTasks: No tasks found for submission for transformation %s" % transID)
        continue
      gLogger.info("submitTasks: Obtained %d tasks for submission for transformation %s" % (len(tasks),transID))
      res = self.prepareTransformationTasks(transBody,tasks,owner,ownerGroup)
      if not res['OK']:
        gLogger.error("submitTasks: Failed to prepare tasks for transformation", "%s %s" % (transID,res['Message']))
        continue
      res = self.submitTransformationTasks(res['Value'])
      if not res['OK']:
        gLogger.error("submitTasks: Failed to submit prepared tasks for transformation", "%s %s" % (transID,res['Message']))
        continue
      res = self.updateDBAfterTaskSubmission(res['Value'])
      if not res['OK']:
        gLogger.error("submitTasks: Failed to update DB after task submission for transformation", "%s %s" % (transID,res['Message']))
        continue
    gLogger.info("submitTasks: Submission of transformation tasks complete")  
    return S_OK()
Esempio n. 45
0
class DataRecoveryAgent(AgentModule):
    """Data Recovery Agent"""
    def __init__(self, *args, **kwargs):
        AgentModule.__init__(self, *args, **kwargs)
        self.name = 'DataRecoveryAgent'
        self.enabled = False
        self.getJobInfoFromJDLOnly = False

        self.__getCSOptions()

        self.jobStatus = [
            'Failed', 'Done'
        ]  # This needs to be both otherwise we cannot account for all cases

        self.jobMon = JobMonitoringClient()
        self.fcClient = FileCatalogClient()
        self.tClient = TransformationClient()
        self.reqClient = ReqClient()
        self.diracAPI = Dirac()
        self.inputFilesProcessed = set()
        self.todo = {'NoInputFiles':
                     [dict(Message="NoInputFiles: OutputExists: Job 'Done'",
                           ShortMessage="NoInputFiles: job 'Done' ",
                           Counter=0,
                           Check=lambda job: job.allFilesExist() and job.status == 'Failed',
                           Actions=lambda job, tInfo: [job.setJobDone(tInfo)],
                           ),
                      dict(Message="NoInputFiles: OutputMissing: Job 'Failed'",
                           ShortMessage="NoInputFiles: job 'Failed' ",
                           Counter=0,
                           Check=lambda job: job.allFilesMissing() and job.status == 'Done',
                           Actions=lambda job, tInfo: [job.setJobFailed(tInfo)],
                           ),
                      ],
                     'InputFiles':
                     [ \
                     # must always be first!

                         dict(Message="One of many Successful: clean others",
                              ShortMessage="Other Tasks --> Keep",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and job.otherTasks and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed),
                              Actions=lambda job, tInfo: [self.inputFilesProcessed.update(job.inputFiles),
                                                          job.setJobDone(tInfo),
                                                          job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input, no Output: Fail",
                              ShortMessage="Other Tasks --> Fail",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allFilesMissing() and job.status != 'Failed',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input: Fail and clean",
                              ShortMessage="Other Tasks --> Cleanup",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(
                                  self.inputFilesProcessed) and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo), job.cleanOutputs(tInfo)]
                              ),
                         dict(Message="InputFile(s) missing: mark job 'Failed', mark input 'Deleted', clean",
                              ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              not job.allTransFilesDeleted(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo),
                                                          job.setInputDeleted(tInfo)],
                              ),
                         dict(Message="InputFile(s) Deleted, output Exists: mark job 'Failed', clean",
                              ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              job.allTransFilesDeleted() and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo)],
                              ),
                         # All Output Exists
                         dict(Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                              ShortMessage="Output Exists --> Job Done, Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo), job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Output Exists, job Failed, input Processed --> Job Done",
                              ShortMessage="Output Exists --> Job Done",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo)]
                              ),
                         dict(Message="Output Exists, job Done, input not Processed --> Input Processed",
                              ShortMessage="Output Exists --> Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputProcessed(tInfo)]
                              ),
                         # outputmissing
                         dict(Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                              ShortMessage="Max ErrorCount --> Input MaxReset",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist() and \
                              job.checkErrorCount(),
                              Actions=lambda job, tInfo: [job.setInputMaxReset(tInfo)]
                              ),
                         dict(Message="Output Missing, job Failed, input Assigned --> Input Unused",
                              ShortMessage="Output Missing --> Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                              ShortMessage="Output Missing --> Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         # some files missing, needing cleanup. Only checking for
                         # assigned, because processed could mean an earlier job was
                         # succesful and this one is just the duplicate that needed
                         # to be removed! But we check for other tasks earlier, so
                         # this should not happen
                         dict(Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [
                                  job.cleanOutputs(tInfo), job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Some missing, job Done --> job Failed",
                              ShortMessage="Output Missing, Done --> Job Failed",
                              Counter=0,
                              Check=lambda job: not job.allFilesExist() and job.status == 'Done',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Something Strange",
                              ShortMessage="Strange",
                              Counter=0,
                              Check=lambda job: job.status not in ("Failed", "Done"),
                              Actions=lambda job, tInfo: []
                              ),
                         # should always be the last one!
                         dict(Message="Failed Hard",
                              ShortMessage="Failed Hard",
                              Counter=0,
                              Check=lambda job: False,  # never
                              Actions=lambda job, tInfo: []
                              ),
                     ]
                     }
        self.jobCache = defaultdict(lambda: (0, 0))
        # Notification options
        self.notesToSend = ""
        self.subject = "DataRecoveryAgent"
        self.startTime = time.time()

        #############################################################################

    def beginExecution(self):
        """Resets defaults after one cycle."""
        self.__getCSOptions()
        return S_OK()

    def __getCSOptions(self):
        """Get agent options from the CS."""
        self.enabled = self.am_getOption('EnableFlag', False)
        self.transformationsToIgnore = self.am_getOption(
            'TransformationsToIgnore', [])
        self.getJobInfoFromJDLOnly = self.am_getOption('JobInfoFromJDLOnly',
                                                       False)
        self.transformationStatus = self.am_getOption('TransformationStatus',
                                                      ['Active', 'Completing'])
        ops = Operations()
        extendableTTypes = set(
            ops.getValue('Transformations/ExtendableTransfTypes',
                         ['MCSimulation']))
        dataProcessing = set(ops.getValue('Transformations/DataProcessing',
                                          []))
        self.transNoInput = self.am_getOption('TransformationsNoInput',
                                              list(extendableTTypes))
        self.transWithInput = self.am_getOption(
            'TransformationsWithInput',
            list(dataProcessing - extendableTTypes))
        self.transformationTypes = self.transWithInput + self.transNoInput
        self.log.notice('Will treat transformations without input files',
                        self.transNoInput)
        self.log.notice('Will treat transformations with input files',
                        self.transWithInput)
        self.addressTo = self.am_getOption('MailTo', [])
        self.addressFrom = self.am_getOption('MailFrom', '')
        self.printEveryNJobs = self.am_getOption('PrintEvery', 200)

    def execute(self):
        """ The main execution method.
    """
        self.log.notice("Will ignore the following transformations: %s" %
                        self.transformationsToIgnore)
        self.log.notice(" Job Cache: %s " % self.jobCache)
        transformations = self.getEligibleTransformations(
            self.transformationStatus, self.transformationTypes)
        if not transformations['OK']:
            self.log.error("Failure to get transformations",
                           transformations['Message'])
            return S_ERROR("Failure to get transformations")
        for transID, transInfoDict in transformations['Value'].iteritems():
            if transID in self.transformationsToIgnore:
                self.log.notice('Ignoring Transformation: %s' % transID)
                continue
            self.__resetCounters()
            self.inputFilesProcessed = set()
            self.log.notice('Running over Transformation: %s' % transID)
            self.treatTransformation(int(transID), transInfoDict)
            self.sendNotification(transID, transInfoDict)

        return S_OK()

    def getEligibleTransformations(self, status, typeList):
        """ Select transformations of given status and type.
    """
        res = self.tClient.getTransformations(condDict={
            'Status': status,
            'Type': typeList
        })
        if not res['OK']:
            return res
        transformations = {}
        for prod in res['Value']:
            transID = prod['TransformationID']
            transformations[str(transID)] = prod
        return S_OK(transformations)

    def treatTransformation(self, transID, transInfoDict):
        """Run this thing for given transformation."""
        tInfo = TransformationInfo(transID, transInfoDict, self.enabled,
                                   self.tClient, self.fcClient, self.jobMon)
        jobs, nDone, nFailed = tInfo.getJobs(statusList=self.jobStatus)

        if self.jobCache[transID][0] == nDone and self.jobCache[transID][
                1] == nFailed:
            self.log.notice(
                'Skipping transformation %s because nothing changed' % transID)
            return

        self.jobCache[transID] = (nDone, nFailed)

        tasksDict = None
        lfnTaskDict = None

        self.startTime = time.time()
        if transInfoDict['Type'] in self.transWithInput:
            self.log.notice('Getting tasks...')
            tasksDict = tInfo.checkTasksStatus()
            lfnTaskDict = dict([(taskDict['LFN'], taskID)
                                for taskID, taskDicts in tasksDict.items()
                                for taskDict in taskDicts])

        self.checkAllJobs(jobs, tInfo, tasksDict, lfnTaskDict)
        self.printSummary()

    def checkJob(self, job, tInfo):
        """Deal with the job."""
        checks = self.todo[
            'NoInputFiles'] if job.tType in self.transNoInput else self.todo[
                'InputFiles']
        for do in checks:
            self.log.verbose('Testing: ', do['Message'])
            if do['Check'](job):
                do['Counter'] += 1
                self.log.notice(do['Message'])
                self.log.notice(job)
                self.notesToSend += do['Message'] + '\n'
                self.notesToSend += str(job) + '\n'
                do['Actions'](job, tInfo)
                return

    def getLFNStatus(self, jobs):
        """Get all the LFNs for the jobs and get their status."""
        self.log.notice('Collecting LFNs...')
        lfnExistence = {}
        lfnCache = []
        counter = 0
        jobInfoStart = time.time()
        for counter, job in enumerate(jobs.values()):
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    'Getting JobInfo: %d/%d: %3.1fs' %
                    (counter, len(jobs), float(time.time() - jobInfoStart)))
            while True:
                try:
                    job.getJobInformation(self.diracAPI,
                                          self.jobMon,
                                          jdlOnly=self.getJobInfoFromJDLOnly)
                    lfnCache.extend(job.inputFiles)
                    lfnCache.extend(job.outputFiles)
                    break
                except RuntimeError as e:  # try again
                    self.log.error('+++++ Failure for job:', job.jobID)
                    self.log.error('+++++ Exception: ', str(e))

        timeSpent = float(time.time() - jobInfoStart)
        self.log.notice('Getting JobInfo Done: %3.1fs (%3.3fs per job)' %
                        (timeSpent, timeSpent / counter))

        counter = 0
        fileInfoStart = time.time()
        for lfnChunk in breakListIntoChunks(list(lfnCache), 200):
            counter += 200
            if counter % 1000 == 0:
                self.log.notice('Getting FileInfo: %d/%d: %3.1fs' %
                                (counter, len(lfnCache),
                                 float(time.time() - fileInfoStart)))
            while True:
                try:
                    reps = self.fcClient.exists(lfnChunk)
                    if not reps['OK']:
                        self.log.error(
                            'Failed to check file existence, try again...',
                            reps['Message'])
                        raise RuntimeError('Try again')
                    statuses = reps['Value']
                    lfnExistence.update(statuses['Successful'])
                    break
                except RuntimeError:  # try again
                    pass
        self.log.notice('Getting FileInfo Done: %3.1fs' %
                        (float(time.time() - fileInfoStart)))

        return lfnExistence

    def setPendingRequests(self, jobs):
        """Loop over all the jobs and get requests, if any."""
        for jobChunk in breakListIntoChunks(jobs.values(), 1000):
            jobIDs = [job.jobID for job in jobChunk]
            while True:
                result = self.reqClient.readRequestsForJobs(jobIDs)
                if result['OK']:
                    break
                self.log.error('Failed to read requests', result['Message'])
                # repeat
            for jobID in result['Value']['Successful']:
                request = result['Value']['Successful'][jobID]
                requestID = request.RequestID
                dbStatus = self.reqClient.getRequestStatus(requestID).get(
                    'Value', 'Unknown')
                for job in jobChunk:
                    if job.jobID == jobID:
                        job.pendingRequest = dbStatus not in ('Done',
                                                              'Canceled')
                        self.log.notice(
                            'Found %s request for job %d' %
                            ('pending' if job.pendingRequest else 'finished',
                             jobID))
                        break

    def checkAllJobs(self, jobs, tInfo, tasksDict=None, lfnTaskDict=None):
        """run over all jobs and do checks"""
        fileJobDict = defaultdict(list)
        counter = 0
        nJobs = len(jobs)
        self.setPendingRequests(jobs)
        lfnExistence = self.getLFNStatus(jobs)
        self.log.notice('Running over all the jobs')
        jobCheckStart = time.time()
        for counter, job in enumerate(jobs.values()):
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    'Checking Jobs %d/%d: %3.1fs' %
                    (counter, nJobs, float(time.time() - jobCheckStart)))
            while True:
                try:
                    if job.pendingRequest:
                        self.log.warn('Job has Pending requests:\n%s' % job)
                        break
                    job.checkFileExistence(lfnExistence)
                    if tasksDict and lfnTaskDict:
                        try:
                            job.getTaskInfo(tasksDict, lfnTaskDict,
                                            self.transWithInput)
                        except TaskInfoException as e:
                            self.log.error(
                                " Skip Task, due to TaskInfoException: %s" % e)
                            if not job.inputFiles and job.tType in self.transWithInput:
                                self.__failJobHard(job, tInfo)
                            break
                        for inputFile in job.inputFiles:
                            fileJobDict[inputFile].append(job.jobID)
                    self.checkJob(job, tInfo)
                    break  # get out of the while loop
                except RuntimeError as e:
                    self.log.error("+++++ Failure for job: %d " % job.jobID)
                    self.log.error("+++++ Exception: ", str(e))
                    # run these again because of RuntimeError
        self.log.notice('Checking Jobs Done: %d/%d: %3.1fs' %
                        (counter, nJobs, float(time.time() - jobCheckStart)))

    def printSummary(self):
        """print summary of changes"""
        self.log.notice("Summary:")
        for do in itertools.chain.from_iterable(self.todo.values()):
            message = "%s: %s" % (do['ShortMessage'].ljust(56),
                                  str(do['Counter']).rjust(5))
            self.log.notice(message)
            if self.notesToSend:
                self.notesToSend = str(message) + '\n' + self.notesToSend

    def __resetCounters(self):
        """ reset counters for modified jobs """
        for _name, checks in self.todo.iteritems():
            for do in checks:
                do['Counter'] = 0

    def __failJobHard(self, job, tInfo):
        """ set job to failed and remove output files if there are any """
        if job.inputFiles:
            return
        if job.status in ("Failed",) \
           and job.allFilesMissing():
            return
        self.log.notice("Failing job hard %s" % job)
        self.notesToSend += "Failing job %s: no input file?\n" % job.jobID
        self.notesToSend += str(job) + '\n'
        self.todo['InputFiles'][-1]['Counter'] += 1
        job.cleanOutputs(tInfo)
        job.setJobFailed(tInfo)
        # if job.inputFile is not None:
        #   job.setInputDeleted(tInfo)

    def __notOnlyKeepers(self, transType):
        """check of we only have 'Keep' messages

    in this case we do not have to send report email or run again next time

    """
        if transType in self.transNoInput:
            return True

        checks = self.todo['InputFiles']
        totalCount = 0
        for check in checks[1:]:
            totalCount += check['Counter']

        return totalCount > 0

    def sendNotification(self, transID, transInfoDict):
        """Send notification email if something was modified for a transformation.

    :param int transID: ID of given transformation
    :param transInfoDict:
    """
        if not self.addressTo or not self.addressFrom or not self.notesToSend:
            return
        if not self.__notOnlyKeepers(transInfoDict['Type']):
            # purge notes
            self.notesToSend = ""
            return

        # remove from the jobCache because something happened
        self.jobCache.pop(int(transID), None)
        # send the email to recipients
        for address in self.addressTo:
            result = NotificationClient().sendMail(address,
                                                   "%s: %s" %
                                                   (self.subject, transID),
                                                   self.notesToSend,
                                                   self.addressFrom,
                                                   localAttempt=False)
            if not result['OK']:
                self.log.error('Cannot send notification mail',
                               result['Message'])
        # purge notes
        self.notesToSend = ""
Esempio n. 46
0
class TaskManagerAgentBase(AgentModule, TransformationAgentsUtilities):
    """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """
    def __init__(self, *args, **kwargs):
        """ c'tor

        Always call this in the extension agent
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        self.transClient = None
        self.transType = []

        self.tasksPerLoop = 50

        self.owner = ''
        self.ownerGroup = ''
        self.ownerDN = ''

        self.pluginLocation = ''
        self.bulkSubmissionFlag = False

        # for the threading
        self.transQueue = Queue()
        self.transInQueue = []
        self.transInThread = {}

    #############################################################################

    def initialize(self):
        """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

        gMonitor.registerActivity("SubmittedTasks",
                                  "Automatically submitted tasks",
                                  "Transformation Monitoring", "Tasks",
                                  gMonitor.OP_ACUM)

        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Client.TaskManagerPlugin')

        # Default clients
        self.transClient = TransformationClient()

        # Bulk submission flag
        self.bulkSubmissionFlag = self.am_getOption('BulkSubmission', False)

        # setting up the threading
        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', 15)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        if self.transInQueue:
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread))
            self.transInQueue = []
            while self.transInThread:
                time.sleep(2)
            self.log.info("Threads are empty, terminating the agent...")
        return S_OK()

    #############################################################################

    def execute(self):
        """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

        operationsOnTransformationDict = {}

        # Determine whether the task status is to be monitored and updated
        enableTaskMonitor = self.am_getOption('MonitorTasks', '')
        if not enableTaskMonitor:
            self.log.verbose(
                "Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option"
            )
        else:
            # Get the transformations for which the tasks have to be updated
            status = self.am_getOption(
                'UpdateTasksTransformationStatus',
                self.am_getOption('UpdateTasksStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict(
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value'])
                for transID, body in transformationIDsAndBodies.iteritems():
                    operationsOnTransformationDict[transID] = {
                        'Body': body,
                        'Operations': ['updateTaskStatus']
                    }

        # Determine whether the task files status is to be monitored and updated
        enableFileMonitor = self.am_getOption('MonitorFiles', '')
        if not enableFileMonitor:
            self.log.verbose(
                "Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option"
            )
        else:
            # Get the transformations for which the files have to be updated
            status = self.am_getOption(
                'UpdateFilesTransformationStatus',
                self.am_getOption('UpdateFilesStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict(
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value'])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('updateFileStatus')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['updateFileStatus']
                        }

        # Determine whether the checking of reserved tasks is to be performed
        enableCheckReserved = self.am_getOption('CheckReserved', '')
        if not enableCheckReserved:
            self.log.verbose(
                "Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option"
            )
        else:
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption(
                'CheckReservedTransformationStatus',
                self.am_getOption('CheckReservedStatus',
                                  ['Active', 'Completing', 'Stopped']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status, agentType=[])
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                transformationIDsAndBodies = dict(
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value'])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('checkReservedTasks')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['checkReservedTasks']
                        }

        # Determine whether the submission of tasks is to be performed
        enableSubmission = self.am_getOption('SubmitTasks', '')
        if not enableSubmission:
            self.log.verbose(
                "Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option"
            )
        else:
            # getting the credentials for submission
            res = getProxyInfo(False, False)
            if not res['OK']:
                self.log.error(
                    "Failed to determine credentials for submission",
                    res['Message'])
                return res
            proxyInfo = res['Value']
            self.owner = proxyInfo['username']
            self.ownerGroup = proxyInfo['group']
            self.ownerDN = proxyInfo['identity']
            self.log.info(
                "Tasks will be submitted with the credentials %s:%s" %
                (self.owner, self.ownerGroup))
            # Get the transformations for which the check of reserved tasks have to be performed
            status = self.am_getOption(
                'SubmitTransformationStatus',
                self.am_getOption('SubmitStatus', ['Active', 'Completing']))
            transformations = self._selectTransformations(
                transType=self.transType, status=status)
            if not transformations['OK']:
                self.log.warn("Could not select transformations:",
                              transformations['Message'])
            else:
                # Get the transformations which should be submitted
                self.tasksPerLoop = self.am_getOption('TasksPerLoop',
                                                      self.tasksPerLoop)
                transformationIDsAndBodies = dict(
                    (transformation['TransformationID'],
                     transformation['Body'])
                    for transformation in transformations['Value'])
                for transID, body in transformationIDsAndBodies.iteritems():
                    if transID in operationsOnTransformationDict:
                        operationsOnTransformationDict[transID][
                            'Operations'].append('submitTasks')
                    else:
                        operationsOnTransformationDict[transID] = {
                            'Body': body,
                            'Operations': ['submitTasks']
                        }

        self._fillTheQueue(operationsOnTransformationDict)

        return S_OK()

    def _selectTransformations(self,
                               transType=None,
                               status=['Active', 'Completing'],
                               agentType=['Automatic']):
        """ get the transformations
    """
        selectCond = {}
        if status:
            selectCond['Status'] = status
        if transType is not None:
            selectCond['Type'] = transType
        if agentType:
            selectCond['AgentType'] = agentType
        res = self.transClient.getTransformations(condDict=selectCond)
        if not res['OK']:
            self.log.error("Failed to get transformations:", res['Message'])
        elif not res['Value']:
            self.log.verbose("No transformations found")
        else:
            self.log.verbose("Obtained %d transformations" % len(res['Value']))
        return res

    def _fillTheQueue(self, operationsOnTransformationsDict):
        """ Just fill the queue with the operation to be done on a certain transformation
    """
        count = 0
        for transID, bodyAndOps in operationsOnTransformationsDict.iteritems():
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put({transID: bodyAndOps})

        self.log.info("Out of %d transformations, %d put in thread queue" %
                      (len(operationsOnTransformationsDict), count))

    #############################################################################

    def _getClients(self):
        """ returns the clients used in the threads - this is another function that should be extended.

        The clients provided here are defaults, and should be adapted
    """
        threadTransformationClient = TransformationClient()
        threadTaskManager = WorkflowTasks(
        )  # this is for wms tasks, replace it with something else if needed
        threadTaskManager.pluginLocation = self.pluginLocation

        return {
            'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager
        }

    def _execute(self, threadID):
        """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
        # Each thread will have its own clients
        clients = self._getClients()
        method = '_execute'

        while True:
            startTime = time.time()
            transIDOPBody = self.transQueue.get()
            if not self.transInQueue:
                # Queue was cleared, nothing to do
                continue
            try:
                transID = transIDOPBody.keys()[0]
                operations = transIDOPBody[transID]['Operations']
                if transID not in self.transInQueue:
                    self._logWarn("Got a transf not in transInQueue...?",
                                  method=method,
                                  transID=transID)
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Start processing transformation",
                              method=method,
                              transID=transID)
                clients['TaskManager'].transInThread = self.transInThread
                for operation in operations:
                    self._logInfo("Executing %s" % operation,
                                  method=method,
                                  transID=transID)
                    startOperation = time.time()
                    res = getattr(self, operation)(transIDOPBody, clients)
                    if not res['OK']:
                        self._logError("Failed to %s: %s" %
                                       (operation, res['Message']),
                                       method=method,
                                       transID=transID)
                    self._logInfo("Executed %s in %.1f seconds" %
                                  (operation, time.time() - startOperation),
                                  method=method,
                                  transID=transID)
            except Exception as x:
                self._logException('Exception executing operation %s' %
                                   operation,
                                   lException=x,
                                   method=method,
                                   transID=transID)
            finally:
                if not transID:
                    transID = 'None'
                self._logInfo("Processed transformation in %.1f seconds" %
                              (time.time() - startTime),
                              method=method,
                              transID=transID)
                self.transInThread.pop(transID, None)
                self._logVerbose("%d transformations still in queue" %
                                 (len(self.transInThread)),
                                 method=method,
                                 transID=transID)
                if transID in self.transInQueue:
                    self.transInQueue.remove(transID)
                self._logDebug("transInQueue = ",
                               self.transInQueue,
                               method=method,
                               transID=transID)

    #############################################################################
    # real operations done

    def updateTaskStatus(self, transIDOPBody, clients):
        """ Updates the task status
    """
        transID = transIDOPBody.keys()[0]
        method = 'updateTaskStatus'

        # Get the tasks which are in an UPDATE state
        updateStatus = self.am_getOption('TaskUpdateStatus', [
            'Checking', 'Deleted', 'Killed', 'Staging', 'Stalled', 'Matched',
            'Scheduled', 'Rescheduled', 'Completed', 'Submitted', 'Assigned',
            'Received', 'Waiting', 'Running'
        ])
        condDict = {
            "TransformationID": transID,
            "ExternalStatus": updateStatus
        }
        timeStamp = str(datetime.datetime.utcnow() -
                        datetime.timedelta(minutes=10))

        # Get transformation tasks
        transformationTasks = clients[
            'TransformationClient'].getTransformationTasks(
                condDict=condDict, older=timeStamp, timeStamp='LastUpdateTime')
        if not transformationTasks['OK']:
            self._logError("Failed to get tasks to update:",
                           transformationTasks['Message'],
                           method=method,
                           transID=transID)
            return transformationTasks
        if not transformationTasks['Value']:
            self._logVerbose("No tasks found to update",
                             method=method,
                             transID=transID)
            return transformationTasks

        # Get status for the transformation tasks
        chunkSize = self.am_getOption('TaskUpdateChunkSize', 0)
        # FIXME: Stupid piece of code to make tests happy...
        try:
            chunkSize = int(chunkSize)
        except:
            chunkSize = 0
        if chunkSize:
            self._logVerbose("Getting %d tasks status (chunks of %d)" %
                             (len(transformationTasks['Value']), chunkSize),
                             method=method,
                             transID=transID)
        else:
            self._logVerbose("Getting %d tasks status" %
                             len(transformationTasks['Value']),
                             method=method,
                             transID=transID)
        updated = {}
        for nb, taskChunk in enumerate(
                breakListIntoChunks(transformationTasks['Value'], chunkSize)
                if chunkSize else [transformationTasks['Value']]):
            submittedTaskStatus = clients[
                'TaskManager'].getSubmittedTaskStatus(taskChunk)
            if not submittedTaskStatus['OK']:
                self._logError("Failed to get updated task states:",
                               submittedTaskStatus['Message'],
                               method=method,
                               transID=transID)
                return submittedTaskStatus
            statusDict = submittedTaskStatus['Value']
            if not statusDict:
                self._logVerbose("%4d: No tasks to update" % nb,
                                 method=method,
                                 transID=transID)

            # Set status for tasks that changes
            for status, taskIDs in statusDict.iteritems():
                self._logVerbose("%4d: Updating %d task(s) to %s" %
                                 (nb, len(taskIDs), status),
                                 method=method,
                                 transID=transID)
                setTaskStatus = clients['TransformationClient'].setTaskStatus(
                    transID, taskIDs, status)
                if not setTaskStatus['OK']:
                    self._logError(
                        "Failed to update task status for transformation:",
                        setTaskStatus['Message'],
                        method=method,
                        transID=transID)
                    return setTaskStatus
                updated[status] = updated.setdefault(status, 0) + len(taskIDs)

        for status, nb in updated.iteritems():
            self._logInfo("Updated %d tasks to status %s" % (nb, status),
                          method=method,
                          transID=transID)
        return S_OK()

    def updateFileStatus(self, transIDOPBody, clients):
        """ Update the files status
    """
        transID = transIDOPBody.keys()[0]
        method = 'updateFileStatus'

        timeStamp = str(datetime.datetime.utcnow() -
                        datetime.timedelta(minutes=10))

        # get transformation files
        condDict = {'TransformationID': transID, 'Status': ['Assigned']}
        transformationFiles = clients[
            'TransformationClient'].getTransformationFiles(
                condDict=condDict, older=timeStamp, timeStamp='LastUpdate')
        if not transformationFiles['OK']:
            self._logError("Failed to get transformation files to update:",
                           transformationFiles['Message'],
                           method=method,
                           transID=transID)
            return transformationFiles
        if not transformationFiles['Value']:
            self._logInfo("No files to be updated",
                          method=method,
                          transID=transID)
            return transformationFiles

        # Get the status of the transformation files
        # Sort the files by taskID
        taskFiles = {}
        for fileDict in transformationFiles['Value']:
            taskFiles.setdefault(fileDict['TaskID'], []).append(fileDict)

        chunkSize = 100
        self._logVerbose("Getting file status for %d tasks (chunks of %d)" %
                         (len(taskFiles), chunkSize),
                         method=method,
                         transID=transID)
        updated = {}
        # Process 100 tasks at a time
        for nb, taskIDs in enumerate(breakListIntoChunks(taskFiles,
                                                         chunkSize)):
            fileChunk = []
            for taskID in taskIDs:
                fileChunk += taskFiles[taskID]
            submittedFileStatus = clients[
                'TaskManager'].getSubmittedFileStatus(fileChunk)
            if not submittedFileStatus['OK']:
                self._logError(
                    "Failed to get updated file states for transformation:",
                    submittedFileStatus['Message'],
                    method=method,
                    transID=transID)
                return submittedFileStatus
            statusDict = submittedFileStatus['Value']
            if not statusDict:
                self._logVerbose("%4d: No file states to be updated" % nb,
                                 method=method,
                                 transID=transID)
                continue

            # Set the status of files
            fileReport = FileReport(
                server=clients['TransformationClient'].getServer())
            for lfn, status in statusDict.iteritems():
                updated[status] = updated.setdefault(status, 0) + 1
                setFileStatus = fileReport.setFileStatus(transID, lfn, status)
                if not setFileStatus['OK']:
                    return setFileStatus
            commit = fileReport.commit()
            if not commit['OK']:
                self._logError(
                    "Failed to update file states for transformation:",
                    commit['Message'],
                    method=method,
                    transID=transID)
                return commit
            else:
                self._logVerbose("%4d: Updated the states of %d files" %
                                 (nb, len(commit['Value'])),
                                 method=method,
                                 transID=transID)

        for status, nb in updated.iteritems():
            self._logInfo("Updated %d files to status %s" % (nb, status),
                          method=method,
                          transID=transID)
        return S_OK()

    def checkReservedTasks(self, transIDOPBody, clients):
        """ Checking Reserved tasks
    """
        transID = transIDOPBody.keys()[0]
        method = 'checkReservedTasks'

        # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
        condDict = {"TransformationID": transID, "ExternalStatus": 'Reserved'}
        time_stamp_older = str(datetime.datetime.utcnow() -
                               datetime.timedelta(hours=1))

        res = clients['TransformationClient'].getTransformationTasks(
            condDict=condDict, older=time_stamp_older)
        self._logDebug("getTransformationTasks(%s) return value:" % condDict,
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to get Reserved tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        if not res['Value']:
            self._logVerbose("No Reserved tasks found", transID=transID)
            return res
        reservedTasks = res['Value']

        # Update the reserved tasks
        res = clients['TaskManager'].updateTransformationReservedTasks(
            reservedTasks)
        self._logDebug("updateTransformationReservedTasks(%s) return value:" %
                       reservedTasks,
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to update transformation reserved tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        noTasks = res['Value']['NoTasks']
        taskNameIDs = res['Value']['TaskNameIDs']

        # For the tasks with no associated request found re-set the status of the task in the transformationDB
        if noTasks:
            self._logInfo(
                "Resetting status of %d tasks to Created as no associated job/request found"
                % len(noTasks),
                method=method,
                transID=transID)
            for taskName in noTasks:
                transID, taskID = self._parseTaskName(taskName)
                res = clients['TransformationClient'].setTaskStatus(
                    transID, taskID, 'Created')
                if not res['OK']:
                    self._logError(
                        "Failed to update task status and ID after recovery:",
                        '%s %s' % (taskName, res['Message']),
                        method=method,
                        transID=transID)
                    return res

        # For the tasks for which an associated request was found update the task details in the transformationDB
        for taskName, extTaskID in taskNameIDs.items():
            transID, taskID = self._parseTaskName(taskName)
            self._logInfo("Setting status of %s to Submitted with ID %s" %
                          (taskName, extTaskID),
                          method=method,
                          transID=transID)
            setTaskStatusAndWmsID = clients[
                'TransformationClient'].setTaskStatusAndWmsID(
                    transID, taskID, 'Submitted', str(extTaskID))
            if not setTaskStatusAndWmsID['OK']:
                self._logError(
                    "Failed to update task status and ID after recovery:",
                    "%s %s" % (taskName, setTaskStatusAndWmsID['Message']),
                    method=method,
                    transID=transID)
                return setTaskStatusAndWmsID

        return S_OK()

    def submitTasks(self, transIDOPBody, clients):
        """ Submit the tasks to an external system, using the taskManager provided
    """
        transID = transIDOPBody.keys()[0]
        transBody = transIDOPBody[transID]['Body']
        method = 'submitTasks'

        # Get all tasks to submit
        tasksToSubmit = clients['TransformationClient'].getTasksToSubmit(
            transID, self.tasksPerLoop)
        self._logDebug("getTasksToSubmit(%s, %s) return value:" %
                       (transID, self.tasksPerLoop),
                       tasksToSubmit,
                       method=method,
                       transID=transID)
        if not tasksToSubmit['OK']:
            self._logError("Failed to obtain tasks:",
                           tasksToSubmit['Message'],
                           method=method,
                           transID=transID)
            return tasksToSubmit
        tasks = tasksToSubmit['Value']['JobDictionary']
        if not tasks:
            self._logVerbose("No tasks found for submission",
                             method=method,
                             transID=transID)
            return tasksToSubmit
        self._logInfo("Obtained %d tasks for submission" % len(tasks),
                      method=method,
                      transID=transID)

        # Prepare tasks
        preparedTransformationTasks = clients[
            'TaskManager'].prepareTransformationTasks(transBody, tasks,
                                                      self.owner,
                                                      self.ownerGroup,
                                                      self.ownerDN,
                                                      self.bulkSubmissionFlag)
        self._logDebug("prepareTransformationTasks return value:",
                       preparedTransformationTasks,
                       method=method,
                       transID=transID)
        if not preparedTransformationTasks['OK']:
            self._logError("Failed to prepare tasks",
                           preparedTransformationTasks['Message'],
                           method=method,
                           transID=transID)
            return preparedTransformationTasks

        # Submit tasks
        res = clients['TaskManager'].submitTransformationTasks(
            preparedTransformationTasks['Value'])
        self._logDebug("submitTransformationTasks return value:",
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to submit prepared tasks:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res

        # Update tasks after submission
        res = clients['TaskManager'].updateDBAfterTaskSubmission(res['Value'])
        self._logDebug("updateDBAfterTaskSubmission return value:",
                       res,
                       method=method,
                       transID=transID)
        if not res['OK']:
            self._logError("Failed to update DB after task submission:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res

        return S_OK()
Esempio n. 47
0
class DataRecoveryAgent( AgentModule ):
  """Data Recovery Agent"""
  def __init__(self, *args, **kwargs):
    AgentModule.__init__( self, *args, **kwargs )
    self.name = 'DataRecoveryAgent'
    self.enabled = False

    self.productionsToIgnore = self.am_getOption( "ProductionsToIgnore", [] )
    self.transformationTypes = self.am_getOption( "TransformationTypes",
                                                  ['MCReconstruction',
                                                   'MCSimulation',
                                                   'MCReconstruction_Overlay',
                                                   'MCGenerations'] )
    self.transformationStatus = self.am_getOption( "TransformationStatus", ['Active', 'Completing'] )
    self.shifterProxy = self.am_setOption( 'shifterProxy', 'DataManager' )

    self.jobStatus = ['Failed','Done'] ##This needs to be both otherwise we cannot account for all cases

    self.jobMon = JobMonitoringClient()
    self.fcClient = FileCatalogClient()
    self.tClient = TransformationClient()
    self.reqClient = ReqClient()
    self.inputFilesProcessed = set()
    self.todo = {'MCGeneration':
                 [ dict( Message="MCGeneration: OutputExists: Job 'Done'",
                         ShortMessage="MCGeneration: job 'Done' ",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and job.status=='Failed',
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                       ),
                   dict( Message="MCGeneration: OutputMissing: Job 'Failed'",
                         ShortMessage="MCGeneration: job 'Failed' ",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and job.status=='Done',
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                       ),
                   # dict( Message="MCGeneration, job 'Done': OutputExists: Task 'Done'",
                   #       ShortMessage="MCGeneration: job already 'Done' ",
                   #       Counter=0,
                   #       Check=lambda job: job.allFilesExist() and job.status=='Done',
                   #       Actions=lambda job,tInfo: [ tInfo._TransformationInfo__setTaskStatus(job, 'Done') ]
                   #     ),
                 ],
                 'OtherProductions':
                 [ \
                   dict( Message="One of many Successful: clean others",
                         ShortMessage="Other Tasks --> Keep",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and job.otherTasks and job.inputFile not in self.inputFilesProcessed,
                         Actions=lambda job,tInfo: [ self.inputFilesProcessed.add(job.inputFile), job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                       ),
                   dict( Message="Other Task processed Input, no Output: Fail",
                         ShortMessage="Other Tasks --> Fail",
                         Counter=0,
                         Check=lambda job: job.inputFile in self.inputFilesProcessed and job.allFilesMissing() and job.status!='Failed',
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo) ]
                       ),
                   dict( Message="Other Task processed Input: Fail and clean",
                         ShortMessage="Other Tasks --> Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile in self.inputFilesProcessed and not job.allFilesMissing(),
                         Actions=lambda job,tInfo: [ job.setJobFailed(tInfo), job.cleanOutputs(tInfo) ]
                       ),
                   dict( Message="InputFile missing: mark job 'Failed', mark input 'Deleted', clean",
                         ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus != "Deleted",
                         Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo), job.setInputDeleted(tInfo) ]
                       ),
                   dict( Message="InputFile Deleted, output Exists: mark job 'Failed', clean",
                         ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                         Counter=0,
                         Check=lambda job: job.inputFile and not job.inputFileExists and job.fileStatus == "Deleted" and not job.allFilesMissing(),
                         Actions=lambda job,tInfo: [ job.cleanOutputs(tInfo), job.setJobFailed(tInfo) ]
                       ),
                   ## All Output Exists
                   dict( Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                         ShortMessage="Output Exists --> Job Done, Input Processed",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and not job.otherTasks and job.status=='Failed' and job.fileStatus!="Processed" and job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo), job.setInputProcessed(tInfo) ]
                       ),
                   dict( Message="Output Exists, job Failed, input Processed --> Job Done",
                         ShortMessage="Output Exists --> Job Done",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and not job.otherTasks and job.status=='Failed' and job.fileStatus=="Processed" and job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setJobDone(tInfo) ]
                       ),
                   dict( Message="Output Exists, job Done, input not Processed --> Input Processed",
                         ShortMessage="Output Exists --> Input Processed",
                         Counter=0,
                         Check=lambda job: job.allFilesExist() and not job.otherTasks and job.status=='Done' and job.fileStatus!="Processed" and job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputProcessed(tInfo) ]
                       ),
                   ## outputmissing
                   dict( Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                         ShortMessage="Max ErrorCount --> Input MaxReset",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and not job.otherTasks and job.status=='Failed' and job.fileStatus=="Assigned" and job.inputFileExists and job.errorCount > MAXRESET,
                         Actions=lambda job,tInfo: [ job.setInputMaxReset(tInfo) ]
                       ),
                   dict( Message="Output Missing, job Failed, input Assigned --> Input Unused",
                         ShortMessage="Output Missing --> Input Unused",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and not job.otherTasks and job.status=='Failed' and job.fileStatus=="Assigned" and job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputUnused(tInfo) ]
                       ),
                   dict( Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                         ShortMessage="Output Missing --> Job Failed, Input Unused",
                         Counter=0,
                         Check=lambda job: job.allFilesMissing() and not job.otherTasks and job.status=='Done' and job.fileStatus=="Assigned" and job.inputFileExists,
                         Actions=lambda job,tInfo: [ job.setInputUnused(tInfo), job.setJobFailed(tInfo) ]
                       ),
                   ## some files missing, needing cleanup. Only checking for assigned, because processed could mean an earlier job was succesful and this one is just the duplucate that needed to be removed!
                   dict( Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                         ShortMessage="Output Missing --> Cleanup, Input Unused",
                         Counter=0,
                         Check=lambda job: job.someFilesMissing() and not job.otherTasks and job.status=='Failed' and job.fileStatus=="Assigned" and job.inputFileExists,
                         Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo)]
                         #Actions=lambda job,tInfo: []
                       ),
                   dict( Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                         ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                         Counter=0,
                         Check=lambda job: job.someFilesMissing() and not job.otherTasks and job.status=='Done' and job.fileStatus=="Assigned" and job.inputFileExists,
                         Actions=lambda job,tInfo: [job.cleanOutputs(tInfo),job.setInputUnused(tInfo),job.setJobFailed(tInfo)]
                         #Actions=lambda job,tInfo: []
                       ),
                   dict( Message="Some missing, job Done --> job Failed",
                         ShortMessage="Output Missing, Done --> Job Failed",
                         Counter=0,
                         Check=lambda job: not job.allFilesExist() and job.status=='Done',
                         Actions=lambda job,tInfo: [job.setJobFailed(tInfo)]
                       ),
                   dict ( Message="Something Strange",
                          ShortMessage="Strange",
                          Counter=0,
                          Check=lambda job: job.status not in ("Failed","Done"),
                          Actions=lambda job,tInfo: []
                        ),
                 ]
                }
    self.jobCache = defaultdict( lambda: (0, 0) )
    self.printEveryNJobs = self.am_getOption( 'PrintEvery', 200 )
    ##Notification
    self.notesToSend = ""
    self.addressTo = self.am_getOption( 'MailTo', ["*****@*****.**"] )
    self.addressFrom = self.am_getOption( 'MailFrom', "*****@*****.**" )
    self.subject = "DataRecoveryAgent"

    
    #############################################################################
  def beginExecution(self):
    """Resets defaults after one cycle
    """
    self.enabled = self.am_getOption('EnableFlag', False)
    self.productionsToIgnore = self.am_getOption( "ProductionsToIgnore", [] )
    self.transformationTypes = self.am_getOption( "TransformationTypes",
                                                  ['MCReconstruction',
                                                   'MCSimulation',
                                                   'MCReconstruction_Overlay',
                                                   'MCGenerations'] )
    self.transformationStatus = self.am_getOption( "TransformationStatus", ['Active', 'Completing'] )
    self.addressTo = self.am_getOption( 'MailTo', ["*****@*****.**"] )
    self.addressFrom = self.am_getOption( 'MailFrom', "*****@*****.**" )
    self.printEveryNJobs = self.am_getOption( 'PrintEvery', 200 )

    return S_OK()
  #############################################################################
  def execute(self):
    """ The main execution method.
    """  
    self.log.notice( "Will ignore the following productions: %s" % self.productionsToIgnore )
    self.log.notice( " Job Cache: %s " % self.jobCache )
    transformations = self.getEligibleTransformations( self.transformationStatus, self.transformationTypes )
    if not transformations['OK']:
      self.log.error( "Failure to get transformations", transformations['Message'] )
      return S_ERROR( "Failure to get transformations" )
    for prodID,values in transformations['Value'].iteritems():
      if prodID in self.productionsToIgnore:
        self.log.notice( "Ignoring Production: %s " % prodID )
        continue
      self.__resetCounters()
      self.inputFilesProcessed = set()
      transType, transName = values
      self.log.notice( "Running over Production: %s " % prodID )
      self.treatProduction( int(prodID), transName, transType )

      if self.notesToSend:
        ##remove from the jobCache because something happened
        self.jobCache.pop( int(prodID), None )
        notification = NotificationClient()
        for address in self.addressTo:
          result = notification.sendMail( address, "%s: %s" %( self.subject, prodID ), self.notesToSend, self.addressFrom, localAttempt = False )
          if not result['OK']:
            self.log.error( 'Cannot send notification mail', result['Message'] )
        self.notesToSend = ""

    return S_OK()

  def getEligibleTransformations( self, status, typeList ):
    """ Select transformations of given status and type.
    """
    res = self.tClient.getTransformations(condDict = {'Status' : status, 'Type' : typeList})
    if not res['OK']:
      return res
    transformations = {}
    for prod in res['Value']:
      prodID = prod['TransformationID']
      prodName = prod['TransformationName']
      transformations[str(prodID)] = (prod['Type'], prodName)
    return S_OK(transformations)

  def treatProduction( self, prodID, transName, transType ):
    """run this thing for given production"""

    tInfo = TransformationInfo( prodID, transName, transType, self.enabled,
                                self.tClient, self.fcClient, self.jobMon )
    jobs, nDone, nFailed = tInfo.getJobs(statusList=self.jobStatus)

    if self.jobCache[prodID][0] == nDone and self.jobCache[prodID][1] == nFailed:
      self.log.notice( "Skipping production %s because nothing changed" % prodID )
      return

    self.jobCache[prodID] = (nDone, nFailed)

    tasksDict=None
    lfnTaskDict=None

    if transType != "MCGeneration":
      self.log.notice( "Getting tasks...")
      tasksDict = tInfo.checkTasksStatus()
      lfnTaskDict = dict( [ ( tasksDict[taskID]['LFN'],taskID ) for taskID in tasksDict ] )

    self.checkAllJobs( jobs, tInfo, tasksDict, lfnTaskDict )
    self.printSummary()


  def checkJob( self, job, tInfo ):
    """ deal with the job """
    checks = self.todo['MCGeneration'] if job.tType == 'MCGeneration' else self.todo['OtherProductions']
    for do in checks:
      if do['Check'](job):
        do['Counter'] += 1
        self.log.notice( do['Message'] )
        self.log.notice( job )
        self.notesToSend += do['Message']+'\n'
        self.notesToSend += str(job)+'\n'
        do['Actions'](job, tInfo)
        return

  def checkAllJobs( self, jobs, tInfo, tasksDict=None, lfnTaskDict=None ):
    """run over all jobs and do checks"""
    fileJobDict = defaultdict(list)
    counter = 0
    startTime = time.time()
    nJobs = len(jobs)
    self.log.notice( "Running over all the jobs" )
    for job in jobs.values():
      counter += 1
      if counter % self.printEveryNJobs == 0:
        self.log.notice( "%d/%d: %3.1fs " % (counter, nJobs, float(time.time() - startTime) ) )
      while True:
        try:
          job.checkRequests( self.reqClient )
          if job.pendingRequest:
            self.log.warn( "Job has Pending requests:\n%s" % job )
            break
          job.getJobInformation( self.jobMon )
          job.checkFileExistance( self.fcClient )
          if tasksDict and lfnTaskDict:
            try:
              job.getTaskInfo( tasksDict, lfnTaskDict )
            except TaskInfoException as e:
              self.log.error(" Skip Task, due to TaskInfoException: %s" % e )
              break
            fileJobDict[job.inputFile].append( job.jobID )
          self.checkJob( job, tInfo )
          break # get out of the while loop
        except RuntimeError as e:
          self.log.error( "+++++ Failure for job: %d " % job.jobID )
          self.log.error( "+++++ Exception: ", str(e) )
          ## runs these again because of RuntimeError

  def printSummary( self ):
    """print summary of changes"""
    self.log.notice( "Summary:" )
    for do in itertools.chain.from_iterable(self.todo.values()):
      message = "%s: %s" % ( do['ShortMessage'].ljust(56), str(do['Counter']).rjust(5) )
      self.log.notice( message )
      if self.notesToSend:
        self.notesToSend = str(message)+'\n' + self.notesToSend

  def __resetCounters( self ):
    for _name,checks in self.todo.iteritems():
      for do in checks:
        do['Counter'] = 0
Esempio n. 48
0
class TransformationAgent( AgentModule, TransformationAgentsUtilities ):
  """ Usually subclass of AgentModule
  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )
    TransformationAgentsUtilities.__init__( self )

    #few parameters
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )

    #clients
    self.transfClient = TransformationClient()

    #for the threading
    self.transQueue = Queue.Queue()
    self.transInQueue = []

    #for caching using a pickle file
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join( self.workDirectory, 'ReplicaCache.pkl' )
    self.dateWriteCache = datetime.datetime.utcnow()

    # Validity of the cache
    self.replicaCache = None
    self.replicaCacheValidity = self.am_getOption( 'ReplicaCacheValidity', 2 )
    self.writingCache = False

    self.noUnusedDelay = self.am_getOption( 'NoUnusedDelay', 6 )
    self.unusedFiles = {}
    self.unusedTimeStamp = {}

    self.debug = False
    self.transInThread = {}

  def initialize( self ):
    """ standard initialize
    """

    self.__readCache()
    self.dateWriteCache = datetime.datetime.utcnow()

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    # Get it threaded
    maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.info( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    self.log.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )

    return S_OK()

  def finalize( self ):
    """ graceful finalization
    """
    if self.transInQueue:
      self._logInfo( "Wait for threads to get empty before terminating the agent (%d tasks)" % len( self.transInThread ) )
      self.transInQueue = []
      while self.transInThread:
        time.sleep( 2 )
      self.log.info( "Threads are empty, terminating the agent..." )
    self.__writeCache( force = True )
    return S_OK()

  def execute( self ):
    """ Just puts transformations in the queue
    """
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      self._logError( "Failed to obtain transformations: %s" % ( res['Message'] ) )
      return S_OK()
    # Process the transformations
    count = 0
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      if transDict.get( 'InheritedFrom' ):
        # Try and move datasets from the ancestor production
        res = self.transfClient.moveFilesToDerivedTransformation( transDict )
        if not res['OK']:
          self._logError( "Error moving files from an inherited transformation", res['Message'], transID = transID )
        else:
          parentProd, movedFiles = res['Value']
          if movedFiles:
            self._logInfo( "Successfully moved files from %d to %d:" % ( parentProd, transID ), transID = transID )
            for status, val in movedFiles.items():
              self._logInfo( "\t%d files to status %s" % ( val, status ), transID = transID )
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append( transID )
        self.transQueue.put( transDict )
    self._logInfo( "Out of %d transformations, %d put in thread queue" % ( len( res['Value'] ), count ) )
    return S_OK()

  def getTransformations( self ):
    """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      self._logInfo( "Initializing general purpose agent.", method = 'getTransformations' )
      transfDict = {'Status': self.transformationStatus }
      if self.transformationTypes:
        transfDict['Type'] = self.transformationTypes
      res = self.transfClient.getTransformations( transfDict, extraParams = True )
      if not res['OK']:
        self._logError( "Failed to get transformations: %s" % res['Message'], method = 'getTransformations' )
        return res
      transformations = res['Value']
      self._logInfo( "Obtained %d transformations to process" % len( transformations ), method = 'getTransformations' )
    else:
      self._logInfo( "Initializing for transformation %s." % transName, method = "getTransformations" )
      res = self.transfClient.getTransformation( transName, extraParams = True )
      if not res['OK']:
        self._logError( "Failed to get transformation: %s." % res['Message'], method = 'getTransformations' )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def _getClients( self ):
    """ returns the clients used in the threads
    """
    threadTransformationClient = TransformationClient()
    threadReplicaManager = ReplicaManager()

    return {'TransformationClient': threadTransformationClient,
            'ReplicaManager': threadReplicaManager}

  def _execute( self, threadID ):
    """ thread - does the real job: processing the transformations to be processed
    """

    #Each thread will have its own clients
    clients = self._getClients()

    while True:
      transDict = self.transQueue.get()
      try:
        transID = long( transDict['TransformationID'] )
        if transID not in self.transInQueue:
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % ( threadID, str( transID ) )
        self._logInfo( "Processing transformation %s." % transID, transID = transID )
        startTime = time.time()
        res = self.processTransformation( transDict, clients )
        if not res['OK']:
          self._logInfo( "Failed to process transformation: %s" % res['Message'], transID = transID )
      except Exception, x:
        self._logException( '%s' % x, transID = transID )
      finally:
Esempio n. 49
0
class TaskManagerAgentBase(AgentModule, TransformationAgentsUtilities):
  """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """

  def __init__(self, *args, **kwargs):
    """ c'tor

        Always call this in the extension agent
    """
    AgentModule.__init__(self, *args, **kwargs)
    TransformationAgentsUtilities.__init__(self)

    self.transClient = None
    self.jobManagerClient = None
    self.transType = []

    self.tasksPerLoop = 50
    self.maxParametricJobs = 20  # will be updated in execute()

    # credentials
    self.shifterProxy = None
    self.credentials = None
    self.credTuple = (None, None, None)

    self.pluginLocation = ''
    self.bulkSubmissionFlag = False

    # for the threading
    self.transQueue = Queue()
    self.transInQueue = []
    self.transInThread = {}

  #############################################################################

  def initialize(self):
    """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

    gMonitor.registerActivity("SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks",
                              gMonitor.OP_ACUM)

    self.pluginLocation = self.am_getOption('PluginLocation', 'DIRAC.TransformationSystem.Client.TaskManagerPlugin')

    # Default clients
    self.transClient = TransformationClient()
    self.jobManagerClient = JobManagerClient()

    # Bulk submission flag
    self.bulkSubmissionFlag = self.am_getOption('BulkSubmission', self.bulkSubmissionFlag)

    # Shifter credentials to use, could replace the use of shifterProxy eventually
    self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy)
    self.credentials = self.am_getOption('ShifterCredentials', self.credentials)
    resCred = self.__getCredentials()
    if not resCred['OK']:
      return resCred
    # setting up the threading
    maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', 15)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
    self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    return S_OK()

  def finalize(self):
    """ graceful finalization
    """
    if self.transInQueue:
      self._logInfo("Wait for threads to get empty before terminating the agent (%d tasks)" %
                    len(self.transInThread))
      self.transInQueue = []
      while self.transInThread:
        time.sleep(2)
      self.log.info("Threads are empty, terminating the agent...")
    return S_OK()

  #############################################################################

  def execute(self):
    """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

    operationsOnTransformationDict = {}
    owner, ownerGroup, ownerDN = None, None, None
    # getting the credentials for submission
    resProxy = getProxyInfo(proxy=False, disableVOMS=False)
    if resProxy['OK']:  # there is a shifterProxy
      proxyInfo = resProxy['Value']
      owner = proxyInfo['username']
      ownerGroup = proxyInfo['group']
      ownerDN = proxyInfo['identity']
      self.log.info("ShifterProxy: Tasks will be submitted with the credentials %s:%s" % (owner, ownerGroup))
    elif self.credentials:
      owner, ownerGroup, ownerDN = self.credTuple
    else:
      self.log.info("Using per Transformation Credentials!")

    # Determine whether the task status is to be monitored and updated
    enableTaskMonitor = self.am_getOption('MonitorTasks', '')
    if not enableTaskMonitor:
      self.log.verbose("Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option")
    else:
      # Get the transformations for which the tasks have to be updated
      status = self.am_getOption('UpdateTasksTransformationStatus',
                                 self.am_getOption('UpdateTasksStatus', ['Active', 'Completing', 'Stopped']))
      transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
      if not transformations['OK']:
        self.log.warn("Could not select transformations:", transformations['Message'])
      else:
        self._addOperationForTransformations(operationsOnTransformationDict, 'updateTaskStatus', transformations,
                                             owner=owner, ownerGroup=ownerGroup, ownerDN=ownerDN)

    # Determine whether the task files status is to be monitored and updated
    enableFileMonitor = self.am_getOption('MonitorFiles', '')
    if not enableFileMonitor:
      self.log.verbose("Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option")
    else:
      # Get the transformations for which the files have to be updated
      status = self.am_getOption('UpdateFilesTransformationStatus',
                                 self.am_getOption('UpdateFilesStatus', ['Active', 'Completing', 'Stopped']))
      transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
      if not transformations['OK']:
        self.log.warn("Could not select transformations:", transformations['Message'])
      else:
        self._addOperationForTransformations(operationsOnTransformationDict, 'updateFileStatus', transformations,
                                             owner=owner, ownerGroup=ownerGroup, ownerDN=ownerDN)

    # Determine whether the checking of reserved tasks is to be performed
    enableCheckReserved = self.am_getOption('CheckReserved', '')
    if not enableCheckReserved:
      self.log.verbose("Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option")
    else:
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption('CheckReservedTransformationStatus',
                                 self.am_getOption('CheckReservedStatus', ['Active', 'Completing', 'Stopped']))
      transformations = self._selectTransformations(transType=self.transType, status=status, agentType=[])
      if not transformations['OK']:
        self.log.warn("Could not select transformations:", transformations['Message'])
      else:
        self._addOperationForTransformations(operationsOnTransformationDict, 'checkReservedTasks', transformations,
                                             owner=owner, ownerGroup=ownerGroup, ownerDN=ownerDN)

    # Determine whether the submission of tasks is to be performed
    enableSubmission = self.am_getOption('SubmitTasks', 'yes')
    if not enableSubmission:
      self.log.verbose("Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option")
    else:
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption('SubmitTransformationStatus',
                                 self.am_getOption('SubmitStatus', ['Active', 'Completing']))
      transformations = self._selectTransformations(transType=self.transType, status=status)
      if not transformations['OK']:
        self.log.warn("Could not select transformations:", transformations['Message'])
      else:
        # Get the transformations which should be submitted
        self.tasksPerLoop = self.am_getOption('TasksPerLoop', self.tasksPerLoop)
        res = self.jobManagerClient.getMaxParametricJobs()
        if not res['OK']:
          self.log.warn("Could not get the maxParametricJobs from JobManager", res['Message'])
        else:
          self.maxParametricJobs = res['Value']

        self._addOperationForTransformations(operationsOnTransformationDict, 'submitTasks', transformations,
                                             owner=owner, ownerGroup=ownerGroup, ownerDN=ownerDN)

    self._fillTheQueue(operationsOnTransformationDict)

    return S_OK()

  def _selectTransformations(self, transType=None, status=None, agentType=None):
    """ get the transformations
    """
    if status is None:
      status = ['Active', 'Completing']
    if agentType is None:
      agentType = ['Automatic']
    selectCond = {}
    if status:
      selectCond['Status'] = status
    if transType is not None:
      selectCond['Type'] = transType
    if agentType:
      selectCond['AgentType'] = agentType
    res = self.transClient.getTransformations(condDict=selectCond)
    if not res['OK']:
      self.log.error("Failed to get transformations:", res['Message'])
    elif not res['Value']:
      self.log.verbose("No transformations found")
    else:
      self.log.verbose("Obtained %d transformations" % len(res['Value']))
    return res

  def _fillTheQueue(self, operationsOnTransformationsDict):
    """ Just fill the queue with the operation to be done on a certain transformation
    """
    count = 0
    for transID, bodyAndOps in operationsOnTransformationsDict.iteritems():
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append(transID)
        self.transQueue.put({transID: bodyAndOps})

    self.log.info("Out of %d transformations, %d put in thread queue" % (len(operationsOnTransformationsDict),
                                                                         count))

  #############################################################################

  def _getClients(self, ownerDN=None, ownerGroup=None):
    """Returns the clients used in the threads

    This is another function that should be extended.

    The clients provided here are defaults, and should be adapted

    If ownerDN and ownerGroup are not None the clients will delegate to these credentials

    :param str ownerDN: DN of the owner of the submitted jobs
    :param str ownerGroup: group of the owner of the submitted jobs
    :returns: dict of Clients
    """
    threadTransformationClient = TransformationClient()
    threadTaskManager = WorkflowTasks(ownerDN=ownerDN, ownerGroup=ownerGroup)
    threadTaskManager.pluginLocation = self.pluginLocation

    return {'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager}

  def _execute(self, threadID):
    """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
    # Each thread will have its own clients if we use credentials/shifterProxy
    clients = self._getClients() if self.shifterProxy else \
        self._getClients(ownerGroup=self.credTuple[1], ownerDN=self.credTuple[2]) if self.credentials \
        else None
    method = '_execute'
    operation = 'None'

    while True:
      startTime = time.time()
      transIDOPBody = self.transQueue.get()
      if not self.transInQueue:
        # Queue was cleared, nothing to do
        continue
      try:
        transID = transIDOPBody.keys()[0]
        operations = transIDOPBody[transID]['Operations']
        if transID not in self.transInQueue:
          self._logWarn("Got a transf not in transInQueue...?",
                        method=method, transID=transID)
          break
        if not (self.credentials or self.shifterProxy):
          ownerDN, group = transIDOPBody[transID]['OwnerDN'], transIDOPBody[transID]['OwnerGroup']
          clients = self._getClients(ownerDN=ownerDN, ownerGroup=group)
        self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID))
        self._logInfo("Start processing transformation", method=method, transID=transID)
        clients['TaskManager'].transInThread = self.transInThread
        for operation in operations:
          self._logInfo("Executing %s" % operation, method=method, transID=transID)
          startOperation = time.time()
          res = getattr(self, operation)(transIDOPBody, clients)
          if not res['OK']:
            self._logError("Failed to %s: %s" % (operation, res['Message']), method=method, transID=transID)
          self._logInfo("Executed %s in %.1f seconds" % (operation, time.time() - startOperation),
                        method=method, transID=transID)
      except Exception as x:  # pylint: disable=broad-except
        self._logException('Exception executing operation %s' % operation, lException=x,
                           method=method, transID=transID)
      finally:
        if not transID:
          transID = 'None'
        self._logInfo("Processed transformation in %.1f seconds" % (time.time() - startTime),
                      method=method, transID=transID)
        self.transInThread.pop(transID, None)
        self._logVerbose("%d transformations still in queue" % (len(self.transInThread)),
                         method=method, transID=transID)
        if transID in self.transInQueue:
          self.transInQueue.remove(transID)
        self._logDebug("transInQueue = ", self.transInQueue,
                       method=method, transID=transID)

  #############################################################################
  # real operations done

  def updateTaskStatus(self, transIDOPBody, clients):
    """ Updates the task status
    """
    transID = transIDOPBody.keys()[0]
    method = 'updateTaskStatus'

    # Get the tasks which are in an UPDATE state
    updateStatus = self.am_getOption('TaskUpdateStatus', ['Checking', 'Deleted', 'Killed', 'Staging', 'Stalled',
                                                          'Matched', 'Scheduled', 'Rescheduled', 'Completed',
                                                          'Submitted', 'Assigned', 'Received',
                                                          'Waiting', 'Running'])
    condDict = {"TransformationID": transID, "ExternalStatus": updateStatus}
    timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))

    # Get transformation tasks
    transformationTasks = clients['TransformationClient'].getTransformationTasks(condDict=condDict,
                                                                                 older=timeStamp,
                                                                                 timeStamp='LastUpdateTime')
    if not transformationTasks['OK']:
      self._logError("Failed to get tasks to update:", transformationTasks['Message'],
                     method=method, transID=transID)
      return transformationTasks
    if not transformationTasks['Value']:
      self._logVerbose("No tasks found to update",
                       method=method, transID=transID)
      return transformationTasks

    # Get status for the transformation tasks
    chunkSize = self.am_getOption('TaskUpdateChunkSize', 0)
    try:
      chunkSize = int(chunkSize)
    except ValueError:
      chunkSize = 0
    if chunkSize:
      self._logVerbose("Getting %d tasks status (chunks of %d)" %
                       (len(transformationTasks['Value']), chunkSize),
                       method=method, transID=transID)
    else:
      self._logVerbose("Getting %d tasks status" %
                       len(transformationTasks['Value']),
                       method=method, transID=transID)
    updated = {}
    for nb, taskChunk in enumerate(breakListIntoChunks(transformationTasks['Value'], chunkSize)
                                   if chunkSize else
                                   [transformationTasks['Value']]):
      submittedTaskStatus = clients['TaskManager'].getSubmittedTaskStatus(taskChunk)
      if not submittedTaskStatus['OK']:
        self._logError("Failed to get updated task states:", submittedTaskStatus['Message'],
                       method=method, transID=transID)
        return submittedTaskStatus
      statusDict = submittedTaskStatus['Value']
      if not statusDict:
        self._logVerbose("%4d: No tasks to update" % nb,
                         method=method, transID=transID)

      # Set status for tasks that changes
      for status, taskIDs in statusDict.iteritems():
        self._logVerbose("%4d: Updating %d task(s) to %s" % (nb, len(taskIDs), status),
                         method=method, transID=transID)
        setTaskStatus = clients['TransformationClient'].setTaskStatus(transID, taskIDs, status)
        if not setTaskStatus['OK']:
          self._logError("Failed to update task status for transformation:", setTaskStatus['Message'],
                         method=method, transID=transID)
          return setTaskStatus
        updated[status] = updated.setdefault(status, 0) + len(taskIDs)

    for status, nb in updated.iteritems():
      self._logInfo("Updated %d tasks to status %s" % (nb, status),
                    method=method, transID=transID)
    return S_OK()

  def updateFileStatus(self, transIDOPBody, clients):
    """ Update the files status
    """
    transID = transIDOPBody.keys()[0]
    method = 'updateFileStatus'

    timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10))

    # get transformation files
    condDict = {'TransformationID': transID, 'Status': ['Assigned']}
    transformationFiles = clients['TransformationClient'].getTransformationFiles(condDict=condDict,
                                                                                 older=timeStamp,
                                                                                 timeStamp='LastUpdate')
    if not transformationFiles['OK']:
      self._logError("Failed to get transformation files to update:", transformationFiles['Message'],
                     method=method, transID=transID)
      return transformationFiles
    if not transformationFiles['Value']:
      self._logInfo("No files to be updated",
                    method=method, transID=transID)
      return transformationFiles

    # Get the status of the transformation files
    # Sort the files by taskID
    taskFiles = {}
    for fileDict in transformationFiles['Value']:
      taskFiles.setdefault(fileDict['TaskID'], []).append(fileDict)

    chunkSize = 100
    self._logVerbose("Getting file status for %d tasks (chunks of %d)" %
                     (len(taskFiles), chunkSize),
                     method=method, transID=transID)
    updated = {}
    # Process 100 tasks at a time
    for nb, taskIDs in enumerate(breakListIntoChunks(taskFiles, chunkSize)):
      fileChunk = []
      for taskID in taskIDs:
        fileChunk += taskFiles[taskID]
      submittedFileStatus = clients['TaskManager'].getSubmittedFileStatus(fileChunk)
      if not submittedFileStatus['OK']:
        self._logError("Failed to get updated file states for transformation:", submittedFileStatus['Message'],
                       method=method, transID=transID)
        return submittedFileStatus
      statusDict = submittedFileStatus['Value']
      if not statusDict:
        self._logVerbose("%4d: No file states to be updated" % nb,
                         method=method, transID=transID)
        continue

      # Set the status of files
      fileReport = FileReport(server=clients['TransformationClient'].getServer())
      for lfn, status in statusDict.iteritems():
        updated[status] = updated.setdefault(status, 0) + 1
        setFileStatus = fileReport.setFileStatus(transID, lfn, status)
        if not setFileStatus['OK']:
          return setFileStatus
      commit = fileReport.commit()
      if not commit['OK']:
        self._logError("Failed to update file states for transformation:", commit['Message'],
                       method=method, transID=transID)
        return commit
      else:
        self._logVerbose("%4d: Updated the states of %d files" % (nb, len(commit['Value'])),
                         method=method, transID=transID)

    for status, nb in updated.iteritems():
      self._logInfo("Updated %d files to status %s" % (nb, status),
                    method=method, transID=transID)
    return S_OK()

  def checkReservedTasks(self, transIDOPBody, clients):
    """ Checking Reserved tasks
    """
    transID = transIDOPBody.keys()[0]
    method = 'checkReservedTasks'

    # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
    condDict = {"TransformationID": transID, "ExternalStatus": 'Reserved'}
    time_stamp_older = str(datetime.datetime.utcnow() - datetime.timedelta(hours=1))

    res = clients['TransformationClient'].getTransformationTasks(condDict=condDict, older=time_stamp_older)
    self._logDebug("getTransformationTasks(%s) return value:" % condDict, res,
                   method=method, transID=transID)
    if not res['OK']:
      self._logError("Failed to get Reserved tasks:", res['Message'],
                     method=method, transID=transID)
      return res
    if not res['Value']:
      self._logVerbose("No Reserved tasks found", transID=transID)
      return res
    reservedTasks = res['Value']

    # Update the reserved tasks
    res = clients['TaskManager'].updateTransformationReservedTasks(reservedTasks)
    self._logDebug("updateTransformationReservedTasks(%s) return value:" % reservedTasks, res,
                   method=method, transID=transID)
    if not res['OK']:
      self._logError("Failed to update transformation reserved tasks:", res['Message'],
                     method=method, transID=transID)
      return res
    noTasks = res['Value']['NoTasks']
    taskNameIDs = res['Value']['TaskNameIDs']

    # For the tasks with no associated request found re-set the status of the task in the transformationDB
    if noTasks:
      self._logInfo("Resetting status of %d tasks to Created as no associated job/request found" % len(noTasks),
                    method=method, transID=transID)
      for taskName in noTasks:
        transID, taskID = self._parseTaskName(taskName)
        res = clients['TransformationClient'].setTaskStatus(transID, taskID, 'Created')
        if not res['OK']:
          self._logError("Failed to update task status and ID after recovery:",
                         '%s %s' % (taskName, res['Message']),
                         method=method, transID=transID)
          return res

    # For the tasks for which an associated request was found update the task details in the transformationDB
    for taskName, extTaskID in taskNameIDs.items():
      transID, taskID = self._parseTaskName(taskName)
      self._logInfo("Setting status of %s to Submitted with ID %s" % (taskName, extTaskID),
                    method=method, transID=transID)
      setTaskStatusAndWmsID = clients['TransformationClient'].setTaskStatusAndWmsID(transID, taskID,
                                                                                    'Submitted', str(extTaskID))
      if not setTaskStatusAndWmsID['OK']:
        self._logError("Failed to update task status and ID after recovery:",
                       "%s %s" % (taskName, setTaskStatusAndWmsID['Message']),
                       method=method, transID=transID)
        return setTaskStatusAndWmsID

    return S_OK()

  def submitTasks(self, transIDOPBody, clients):
    """ Submit the tasks to an external system, using the taskManager provided

    :param dict transIDOPBody: transformation body
    :param dict clients: dictionary of client objects

    :return: S_OK/S_ERROR
    """
    transID = transIDOPBody.keys()[0]
    transBody = transIDOPBody[transID]['Body']
    owner = transIDOPBody[transID]['Owner']
    ownerGroup = transIDOPBody[transID]['OwnerGroup']
    ownerDN = transIDOPBody[transID]['OwnerDN']
    method = 'submitTasks'

    # Get all tasks to submit
    tasksToSubmit = clients['TransformationClient'].getTasksToSubmit(transID, self.tasksPerLoop)
    self._logDebug("getTasksToSubmit(%s, %s) return value:" % (transID, self.tasksPerLoop), tasksToSubmit,
                   method=method, transID=transID)
    if not tasksToSubmit['OK']:
      self._logError("Failed to obtain tasks:", tasksToSubmit['Message'],
                     method=method, transID=transID)
      return tasksToSubmit
    tasks = tasksToSubmit['Value']['JobDictionary']
    if not tasks:
      self._logVerbose("No tasks found for submission",
                       method=method, transID=transID)
      return tasksToSubmit
    self._logInfo("Obtained %d tasks for submission" % len(tasks),
                  method=method, transID=transID)

    # Prepare tasks and submits them, by chunks
    chunkSize = self.maxParametricJobs if self.bulkSubmissionFlag else self.tasksPerLoop
    for taskDictChunk in breakDictionaryIntoChunks(tasks, chunkSize):
      res = self._prepareAndSubmitAndUpdateTasks(transID, transBody, taskDictChunk,
                                                 owner, ownerDN, ownerGroup,
                                                 clients)
      if not res['OK']:
        return res
      self._logVerbose("Submitted %d jobs, bulkSubmissionFlag = %s" % (len(taskDictChunk), self.bulkSubmissionFlag))

    return S_OK()

  def _prepareAndSubmitAndUpdateTasks(self, transID, transBody, tasks, owner, ownerDN, ownerGroup, clients):
    """ prepare + submit + monitor a dictionary of tasks

    :param int transID: transformation ID
    :param str transBody: transformation job template
    :param dict tasks: dictionary of per task parameters
    :param str owner: owner of the transformation
    :param str ownerDN: DN of the owner of the transformation
    :param str ownerGroup: group of the owner of the transformation
    :param dict clients: dictionary of client objects

    :return: S_OK/S_ERROR
    """

    method = '_prepareAndSubmitAndUpdateTasks'
    # prepare tasks
    preparedTransformationTasks = clients['TaskManager'].prepareTransformationTasks(transBody,
                                                                                    tasks,
                                                                                    owner,
                                                                                    ownerGroup,
                                                                                    ownerDN,
                                                                                    self.bulkSubmissionFlag)
    self._logDebug("prepareTransformationTasks return value:", preparedTransformationTasks,
                   method=method, transID=transID)
    if not preparedTransformationTasks['OK']:
      self._logError("Failed to prepare tasks", preparedTransformationTasks['Message'],
                     method=method, transID=transID)
      return preparedTransformationTasks

    # Submit tasks
    res = clients['TaskManager'].submitTransformationTasks(preparedTransformationTasks['Value'])
    self._logDebug("submitTransformationTasks return value:", res,
                   method=method, transID=transID)
    if not res['OK']:
      self._logError("Failed to submit prepared tasks:", res['Message'],
                     method=method, transID=transID)
      return res

    # Update tasks after submission
    res = clients['TaskManager'].updateDBAfterTaskSubmission(res['Value'])
    self._logDebug("updateDBAfterTaskSubmission return value:", res,
                   method=method, transID=transID)
    if not res['OK']:
      self._logError("Failed to update DB after task submission:", res['Message'],
                     method=method, transID=transID)
      return res

    return S_OK()

  @staticmethod
  def _addOperationForTransformations(operationsOnTransformationDict, operation, transformations,
                                      owner=None, ownerGroup=None, ownerDN=None):
    """Fill the operationsOnTransformationDict"""
    transformationIDsAndBodies = [(transformation['TransformationID'],
                                   transformation['Body'],
                                   transformation['AuthorDN'],
                                   transformation['AuthorGroup']) for transformation in transformations['Value']]
    for transID, body, t_ownerDN, t_ownerGroup in transformationIDsAndBodies:
      if transID in operationsOnTransformationDict:
        operationsOnTransformationDict[transID]['Operations'].append(operation)
      else:
        operationsOnTransformationDict[transID] = {'Body': body, 'Operations': [operation],
                                                   'Owner': owner if owner else getUsernameForDN(t_ownerDN)['Value'],
                                                   'OwnerGroup': ownerGroup if owner else t_ownerGroup,
                                                   'OwnerDN': ownerDN if owner else t_ownerDN}

  def __getCredentials(self):
    """Get the credentials to use if ShifterCredentials are set, otherwise do nothing.

    This function fills the self.credTuple tuple.
    """
    if not self.credentials:
      return S_OK()
    resCred = Operations().getOptionsDict("/Shifter/%s" % self.credentials)
    if not resCred['OK']:
      self.log.error("Cred: Failed to find shifter credentials", self.credentials)
      return resCred
    owner = resCred['Value']['User']
    ownerGroup = resCred['Value']['Group']
    # returns  a list
    ownerDN = getDNForUsername(owner)['Value'][0]
    self.credTuple = (owner, ownerGroup, ownerDN)
    self.log.info("Cred: Tasks will be submitted with the credentials %s:%s" % (owner, ownerGroup))
    return S_OK()
Esempio n. 50
0
class TaskManagerAgentBase( AgentModule, TransformationAgentsUtilities ):
  """ To be extended. Please look at WorkflowTaskAgent and RequestTaskAgent.
  """

  def __init__( self, *args, **kwargs ):
    """ c'tor

        Always call this in the extension agent
    """
    AgentModule.__init__( self, *args, **kwargs )
    TransformationAgentsUtilities.__init__( self )

    self.transClient = None
    self.transType = []

    self.tasksPerLoop = 50

    self.owner = ''
    self.ownerGroup = ''
    self.ownerDN = ''

    self.pluginLocation = ''
    self.bulkSubmissionFlag = False

    # for the threading
    self.transQueue = Queue()
    self.transInQueue = []
    self.transInThread = {}

  #############################################################################

  def initialize( self ):
    """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

    gMonitor.registerActivity( "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks",
                               gMonitor.OP_ACUM )

    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Client.TaskManagerPlugin' )

    # Default clients
    self.transClient = TransformationClient()

    # Bulk submission flag
    self.bulkSubmissionFlag = self.am_getOption( 'BulkSubmission', False )

    # setting up the threading
    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', 15 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.verbose( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    return S_OK()

  def finalize( self ):
    """ graceful finalization
    """
    if self.transInQueue:
      self._logInfo( "Wait for threads to get empty before terminating the agent (%d tasks)" % len( self.transInThread ) )
      self.transInQueue = []
      while self.transInThread:
        time.sleep( 2 )
      self.log.info( "Threads are empty, terminating the agent..." )
    return S_OK()

  #############################################################################

  def execute( self ):
    """ The TaskManagerBase execution method is just filling the Queues of transformations that need to be processed
    """

    operationsOnTransformationDict = {}

    # Determine whether the task status is to be monitored and updated
    enableTaskMonitor = self.am_getOption( 'MonitorTasks', '' )
    if not enableTaskMonitor:
      self.log.verbose( "Monitoring of tasks is disabled. To enable it, create the 'MonitorTasks' option" )
    else:
      # Get the transformations for which the tasks have to be updated
      status = self.am_getOption( 'UpdateTasksStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( ( transformation['TransformationID'],
                                             transformation['Body'] ) for transformation in transformations['Value'] )
        for transID, body in transformationIDsAndBodies.iteritems():
          operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['updateTaskStatus']}

    # Determine whether the task files status is to be monitored and updated
    enableFileMonitor = self.am_getOption( 'MonitorFiles', '' )
    if not enableFileMonitor:
      self.log.verbose( "Monitoring of files is disabled. To enable it, create the 'MonitorFiles' option" )
    else:
      # Get the transformations for which the files have to be updated
      status = self.am_getOption( 'UpdateFilesStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( ( transformation['TransformationID'],
                                             transformation['Body'] ) for transformation in transformations['Value'] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'updateFileStatus' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['updateFileStatus']}

    # Determine whether the checking of reserved tasks is to be performed
    enableCheckReserved = self.am_getOption( 'CheckReserved', '' )
    if not enableCheckReserved:
      self.log.verbose( "Checking of reserved tasks is disabled. To enable it, create the 'CheckReserved' option" )
    else:
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption( 'CheckReservedStatus', ['Active', 'Completing', 'Stopped'] )
      transformations = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        transformationIDsAndBodies = dict( ( transformation['TransformationID'],
                                             transformation['Body'] ) for transformation in transformations['Value'] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'checkReservedTasks' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['checkReservedTasks']}

    # Determine whether the submission of tasks is to be performed
    enableSubmission = self.am_getOption( 'SubmitTasks', '' )
    if not enableSubmission:
      self.log.verbose( "Submission of tasks is disabled. To enable it, create the 'SubmitTasks' option" )
    else:
      # getting the credentials for submission
      res = getProxyInfo( False, False )
      if not res['OK']:
        self.log.error( "Failed to determine credentials for submission", res['Message'] )
        return res
      proxyInfo = res['Value']
      self.owner = proxyInfo['username']
      self.ownerGroup = proxyInfo['group']
      self.ownerDN = proxyInfo['identity']
      self.log.info( "Tasks will be submitted with the credentials %s:%s" % ( self.owner, self.ownerGroup ) )
      # Get the transformations for which the check of reserved tasks have to be performed
      status = self.am_getOption( 'SubmitStatus', ['Active', 'Completing'] )
      transformations = self._selectTransformations( transType = self.transType, status = status )
      if not transformations['OK']:
        self.log.warn( "Could not select transformations: %s" % transformations['Message'] )
      else:
        # Get the transformations which should be submitted
        self.tasksPerLoop = self.am_getOption( 'TasksPerLoop', self.tasksPerLoop )
        transformationIDsAndBodies = dict( ( transformation['TransformationID'],
                                             transformation['Body'] ) for transformation in transformations['Value'] )
        for transID, body in transformationIDsAndBodies.iteritems():
          if transID in operationsOnTransformationDict:
            operationsOnTransformationDict[transID]['Operations'].append( 'submitTasks' )
          else:
            operationsOnTransformationDict[transID] = {'Body': body, 'Operations': ['submitTasks']}

    self._fillTheQueue( operationsOnTransformationDict )

    return S_OK()

  def _selectTransformations( self, transType = None, status = ['Active', 'Completing'], agentType = ['Automatic'] ):
    """ get the transformations
    """
    selectCond = {}
    if status:
      selectCond['Status'] = status
    if transType is not None:
      selectCond['Type'] = transType
    if agentType:
      selectCond['AgentType'] = agentType
    res = self.transClient.getTransformations( condDict = selectCond )
    if not res['OK']:
      self.log.error( "Failed to get transformations: %s" % res['Message'] )
    elif not res['Value']:
      self.log.verbose( "No transformations found" )
    else:
      self.log.verbose( "Obtained %d transformations" % len( res['Value'] ) )
    return res

  def _fillTheQueue( self, operationsOnTransformationsDict ):
    """ Just fill the queue with the operation to be done on a certain transformation
    """
    count = 0
    for transID, bodyAndOps in operationsOnTransformationsDict.iteritems():
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append( transID )
        self.transQueue.put( {transID: bodyAndOps} )

    self.log.info( "Out of %d transformations, %d put in thread queue" % ( len( operationsOnTransformationsDict ),
                                                                           count ) )

  #############################################################################

  def _getClients( self ):
    """ returns the clients used in the threads - this is another function that should be extended.

        The clients provided here are defaults, and should be adapted
    """
    threadTransformationClient = TransformationClient()
    threadTaskManager = WorkflowTasks()  # this is for wms tasks, replace it with something else if needed
    threadTaskManager.pluginLocation = self.pluginLocation

    return {'TransformationClient': threadTransformationClient,
            'TaskManager': threadTaskManager}

  def _execute( self, threadID ):
    """ This is what runs inside the threads, in practice this is the function that does the real stuff
    """
    # Each thread will have its own clients
    clients = self._getClients()
    startTime = 0
    method = '_execute'

    while True:
      transIDOPBody = self.transQueue.get()
      try:
        transID = transIDOPBody.keys()[0]
        operations = transIDOPBody[transID]['Operations']
        if transID not in self.transInQueue:
          self._logWarn( "Got a transf not in transInQueue...?", method = method, transID = transID )
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % ( threadID, str( transID ) )
        clients['TaskManager'].transInThread = self.transInThread
        for operation in operations:
          self._logInfo( "Starting processing operation %s" % operation, method = method, transID = transID )
          startTime = time.time()
          res = getattr( self, operation )( transIDOPBody, clients )
          if not res['OK']:
            self._logError( "Failed to %s: %s" % ( operation, res['Message'] ), method = method, transID = transID )
          self._logInfo( "Processed operation %s in %.1f seconds" % ( operation, time.time() - startTime if startTime else time.time() ),
                         method = method, transID = transID )
      except Exception as x:
        self._logException( 'Exception executing operation %s' % operation, lException = x, transID = transID, method = method )
      finally:
        if not transID:
          transID = 'None'
        self._logInfo( "Processed transformation in %.1f seconds" % ( time.time() - startTime if startTime else time.time() ),
                       method = method, transID = transID )
        self._logVerbose( "%d transformations still in queue" % ( len( self.transInQueue ) - 1 ),
                          method = method, transID = transID )
        self.transInThread.pop( transID, None )
        if transID in self.transInQueue:
          self.transInQueue.remove( transID )
        self._logDebug( "transInQueue = %s" % str( self.transInQueue ), method = method, transID = transID )

  #############################################################################
  # real operations done

  def updateTaskStatus( self, transIDOPBody, clients ):
    """ Updates the task status
    """
    transID = transIDOPBody.keys()[0]
    method = 'updateTaskStatus'

    # Get the tasks which are in an UPDATE state
    updateStatus = self.am_getOption( 'TaskUpdateStatus', ['Checking', 'Deleted', 'Killed', 'Staging', 'Stalled',
                                                           'Matched', 'Scheduled', 'Rescheduled', 'Completed',
                                                           'Submitted', 'Assigned', 'Received',
                                                           'Waiting', 'Running'] )
    condDict = {"TransformationID":transID, "ExternalStatus":updateStatus}
    timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) )
    transformationTasks = clients['TransformationClient'].getTransformationTasks( condDict = condDict,
                                                                                  older = timeStamp,
                                                                                  timeStamp = 'LastUpdateTime' )
    self._logDebug( "getTransformationTasks(%s) return value: %s" % ( str( condDict ), str( transformationTasks ) ),
                    method = method, transID = transID )
    if not transformationTasks['OK']:
      self._logError( "Failed to get tasks to update: %s" % transformationTasks['Message'],
                      method = method, transID = transID )
      return transformationTasks
    if not transformationTasks['Value']:
      self._logVerbose( "No tasks found to update", method = method, transID = transID )
      return transformationTasks
    self._logVerbose( "Getting %d tasks status" % len( transformationTasks['Value'] ),
                      method = method, transID = transID )
    submittedTaskStatus = clients['TaskManager'].getSubmittedTaskStatus( transformationTasks['Value'] )
    self._logDebug( "getSubmittedTaskStatus return value: %s" % str( submittedTaskStatus ),
                    method = method, transID = transID )
    if not submittedTaskStatus['OK']:
      self._logError( "Failed to get updated task states: %s" % submittedTaskStatus['Message'],
                      method = method, transID = transID )
      return submittedTaskStatus
    statusDict = submittedTaskStatus['Value']
    if not statusDict:
      self._logInfo( "No tasks to update", method = method, transID = transID )
      return submittedTaskStatus
    else:
      for status in sorted( statusDict ):
        taskIDs = statusDict[status]
        self._logInfo( "Updating %d task(s) to %s" % ( len( taskIDs ), status ),
                       method = method, transID = transID )
        setTaskStatus = clients['TransformationClient'].setTaskStatus( transID, taskIDs, status )
        self._logDebug( "setTaskStatus return value: %s" % str( setTaskStatus ),
                        method = method, transID = transID )
        if not setTaskStatus['OK']:
          self._logError( "Failed to update task status for transformation: %s" % setTaskStatus['Message'],
                          method = method, transID = transID )
          return setTaskStatus

    return S_OK()

  def updateFileStatus( self, transIDOPBody, clients ):
    """ Update the files status
    """
    transID = transIDOPBody.keys()[0]
    method = 'updateFileStatus'

    timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) )
    condDict = {'TransformationID' : transID, 'Status' : ['Assigned']}
    transformationFiles = clients['TransformationClient'].getTransformationFiles( condDict = condDict,
                                                                                  older = timeStamp, timeStamp = 'LastUpdate' )
    self._logDebug( "getTransformationFiles(%s) return value: %s" % ( str( condDict ), transformationFiles ),
                    method = method, transID = transID )
    if not transformationFiles['OK']:
      self._logError( "Failed to get transformation files to update: %s" % transformationFiles['Message'],
                      method = method )
      return transformationFiles
    if not transformationFiles['Value']:
      self._logInfo( "No files to be updated", transID = transID, method = method )
      return transformationFiles
    submittedFileStatus = clients['TaskManager'].getSubmittedFileStatus( transformationFiles['Value'] )
    self._logDebug( "getSubmittedFileStatus return value: %s" % submittedFileStatus,
                    method = method, transID = transID )
    if not submittedFileStatus['OK']:
      self._logError( "Failed to get updated file states for transformation: %s" % submittedFileStatus['Message'],
                      transID = transID, method = method )
      return submittedFileStatus
    statusDict = submittedFileStatus['Value']
    if not statusDict:
      self._logInfo( "No file states to be updated", transID = transID, method = method )
      return submittedFileStatus
    fileReport = FileReport( server = clients['TransformationClient'].getServer() )
    for lfn, status in statusDict.items():
      setFileStatus = fileReport.setFileStatus( transID, lfn, status )
      if not setFileStatus['OK']:
        return  setFileStatus
    commit = fileReport.commit()
    if not commit['OK']:
      self._logError( "Failed to update file states for transformation: %s" % commit['Message'],
                      transID = transID, method = method )
      return commit
    else:
      self._logInfo( "Updated the states of %d files" % len( commit['Value'] ),
                     transID = transID, method = method )

    return S_OK()

  def checkReservedTasks( self, transIDOPBody, clients ):
    """ Checking Reserved tasks
    """
    transID = transIDOPBody.keys()[0]
    method = 'checkReservedTasks'

    # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
    condDict = {"TransformationID":transID, "ExternalStatus":'Reserved'}
    time_stamp_older = str( datetime.datetime.utcnow() - datetime.timedelta( hours = 1 ) )
    res = clients['TransformationClient'].getTransformationTasks( condDict = condDict, older = time_stamp_older )
    self._logDebug( "getTransformationTasks(%s) return value: %s" % ( condDict, res ),
                    method = method, transID = transID )
    if not res['OK']:
      self._logError( "Failed to get Reserved tasks: %s" % res['Message'],
                      transID = transID, method = method )
      return res
    if not res['Value']:
      self._logVerbose( "No Reserved tasks found", transID = transID )
      return res
    reservedTasks = res['Value']
    res = clients['TaskManager'].updateTransformationReservedTasks( reservedTasks )
    self._logDebug( "updateTransformationReservedTasks(%s) return value: %s" % ( reservedTasks, res ),
                    method = method, transID = transID )
    if not res['OK']:
      self._logError( "Failed to update transformation reserved tasks: %s" % res['Message'],
                      transID = transID, method = method )
      return res
    noTasks = res['Value']['NoTasks']
    taskNameIDs = res['Value']['TaskNameIDs']
    # For the tasks with no associated request found re-set the status of the task in the transformationDB
    for taskName in noTasks:
      transID, taskID = taskName.split( '_' )
      self._logInfo( "Resetting status of %s to Created as no associated task found" % ( taskName ),
                     transID = transID, method = method )
      res = clients['TransformationClient'].setTaskStatus( int( transID ), int( taskID ), 'Created' )
      if not res['OK']:
        self._logError( "Failed to update task status and ID after recovery: %s %s" % ( taskName, res['Message'] ),
                        transID = transID, method = method )
        return res
    # For the tasks for which an associated request was found update the task details in the transformationDB
    for taskName, extTaskID in taskNameIDs.items():
      transID, taskID = taskName.split( '_' )
      self._logInfo( "Setting status of %s to Submitted with ID %s" % ( taskName, extTaskID ),
                     transID = transID, method = method )
      setTaskStatusAndWmsID = clients['TransformationClient'].setTaskStatusAndWmsID( int( transID ), int( taskID ),
                                                                                     'Submitted', str( extTaskID ) )
      if not setTaskStatusAndWmsID['OK']:
        self._logError( "Failed to update task status and ID after recovery: %s %s" % ( taskName,
                                                                                        setTaskStatusAndWmsID['Message'] ),
                        transID = transID, method = method )
        return setTaskStatusAndWmsID

    return S_OK()

  def submitTasks( self, transIDOPBody, clients ):
    """ Submit the tasks to an external system, using the taskManager provided
    """
    transID = transIDOPBody.keys()[0]
    transBody = transIDOPBody[transID]['Body']
    method = 'submitTasks'

    tasksToSubmit = clients['TransformationClient'].getTasksToSubmit( transID, self.tasksPerLoop )
    self._logDebug( "getTasksToSubmit(%s, %s) return value: %s" % ( transID, self.tasksPerLoop, tasksToSubmit ),
                    method = method, transID = transID )
    if not tasksToSubmit['OK']:
      self._logError( "Failed to obtain tasks: %s" % tasksToSubmit['Message'], transID = transID, method = method )
      return tasksToSubmit
    tasks = tasksToSubmit['Value']['JobDictionary']
    if not tasks:
      self._logVerbose( "No tasks found for submission", transID = transID, method = method )
      return tasksToSubmit
    self._logInfo( "Obtained %d tasks for submission" % len( tasks ), transID = transID, method = method )
    preparedTransformationTasks = clients['TaskManager'].prepareTransformationTasks( transBody,
                                                                                     tasks,
                                                                                     self.owner,
                                                                                     self.ownerGroup,
                                                                                     self.ownerDN,
                                                                                     self.bulkSubmissionFlag )
    self._logDebug( "prepareTransformationTasks return value: %s" % preparedTransformationTasks,
                    method = method, transID = transID )
    if not preparedTransformationTasks['OK']:
      self._logError( "Failed to prepare tasks: %s" % preparedTransformationTasks['Message'],
                      transID = transID, method = method )
      return preparedTransformationTasks

    res = self.__actualSubmit( preparedTransformationTasks, clients, transID )
    if not res['OK']:
      return res
    res = clients['TaskManager'].updateDBAfterTaskSubmission( res['Value'] )
    self._logDebug( "updateDBAfterTaskSubmission return value: %s" % res, method = method, transID = transID )
    if not res['OK']:
      self._logError( "Failed to update DB after task submission: %s" % res['Message'],
                      transID = transID, method = method )
      return res

    return S_OK()

  def __actualSubmit( self, preparedTransformationTasks, clients, transID ):
    """ This function contacts either RMS or WMS depending on the type of transformation.
    """
    method = 'submitTasks'
    res = clients['TaskManager'].submitTransformationTasks( preparedTransformationTasks['Value'] )
    self._logDebug( "submitTransformationTasks return value: %s" % res, method = method, transID = transID )
    if not res['OK']:
      self._logError( "Failed to submit prepared tasks: %s" % res['Message'],
                      transID = transID, method = method )
    return res
Esempio n. 51
0
class InputDataAgent(AgentModule):

    #############################################################################
    def initialize(self):
        """ Make the necessary initializations """
        self.fileLog = {}
        self.timeLog = {}
        self.fullTimeLog = {}
        self.pollingTime = self.am_getOption('PollingTime', 120)
        self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', 86400)
        gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME,
                                  "Loops/min", gMonitor.OP_SUM)
        self.transClient = TransformationClient('TransformationDB')
        self.metadataClient = FileCatalogClient()
        return S_OK()

    ##############################################################################
    def execute(self):
        """ Main execution method
    """

        gMonitor.addMark('Iteration', 1)
        # Get all the transformations
        result = self.transClient.getTransformations(
            condDict={'Status': 'Active'})
        activeTransforms = []
        if not result['OK']:
            gLogger.error(
                "InputDataAgent.execute: Failed to get transformations.",
                result['Message'])
            return S_OK()

        # Process each transformation
        for transDict in result['Value']:
            transID = long(transDict['TransformationID'])
            res = self.transClient.getTransformationInputDataQuery(transID)
            if not res['OK']:
                if res['Message'] == 'No InputDataQuery found for transformation':
                    gLogger.info(
                        "InputDataAgent.execute: No input data query found for transformation %d"
                        % transID)
                else:
                    gLogger.error(
                        "InputDataAgent.execute: Failed to get input data query for %d"
                        % transID, res['Message'])
                continue
            inputDataQuery = res['Value']

            # Determine the correct time stamp to use for this transformation
            if self.timeLog.has_key(transID):
                if self.fullTimeLog.has_key(transID):
                    # If it is more than a day since the last reduced query, make a full query just in case
                    if (datetime.datetime.utcnow() -
                            self.fullTimeLog[transID]) < datetime.timedelta(
                                seconds=self.fullUpdatePeriod):
                        timeStamp = self.timeLog[transID]
                        inputDataQuery['StartDate'] = (
                            timeStamp - datetime.timedelta(seconds=10)
                        ).strftime('%Y-%m-%d %H:%M:%S')
                    else:
                        self.fullTimeLog[transID] = datetime.datetime.utcnow()
            self.timeLog[transID] = datetime.datetime.utcnow()
            if not self.fullTimeLog.has_key(transID):
                self.fullTimeLog[transID] = datetime.datetime.utcnow()

            # Perform the query to the metadata catalog
            gLogger.verbose(
                "Using input data query for transformation %d: %s" %
                (transID, str(inputDataQuery)))
            start = time.time()
            result = self.metadataClient.findFilesByMetadata(inputDataQuery)
            rtime = time.time() - start
            gLogger.verbose("Metadata catalog query time: %.2f seconds." %
                            (rtime))
            if not result['OK']:
                gLogger.error(
                    "InputDataAgent.execute: Failed to get response from the metadata catalog",
                    result['Message'])
                continue
            lfnList = result['Value']

            # Check if the number of files has changed since the last cycle
            nlfns = len(lfnList)
            gLogger.info(
                "%d files returned for transformation %d from the metadata catalog"
                % (nlfns, int(transID)))
            if self.fileLog.has_key(transID):
                if nlfns == self.fileLog[transID]:
                    gLogger.verbose(
                        'No new files in metadata catalog since last check')
            self.fileLog[transID] = nlfns

            # Add any new files to the transformation
            addedLfns = []
            if lfnList:
                gLogger.verbose('Processing %d lfns for transformation %d' %
                                (len(lfnList), transID))
                # Add the files to the transformation
                gLogger.verbose('Adding %d lfns for transformation %d' %
                                (len(lfnList), transID))
                result = self.transClient.addFilesToTransformation(
                    transID, sortList(lfnList))
                if not result['OK']:
                    gLogger.warn(
                        "InputDataAgent.execute: failed to add lfns to transformation",
                        result['Message'])
                    self.fileLog[transID] = 0
                else:
                    if result['Value']['Failed']:
                        for lfn, error in res['Value']['Failed'].items():
                            gLogger.warn(
                                "InputDataAgent.execute: Failed to add %s to transformation"
                                % lfn, error)
                    if result['Value']['Successful']:
                        for lfn, status in result['Value']['Successful'].items(
                        ):
                            if status == 'Added':
                                addedLfns.append(lfn)
                        gLogger.info(
                            "InputDataAgent.execute: Added %d files to transformation"
                            % len(addedLfns))

        return S_OK()
Esempio n. 52
0
class ValidateOutputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.consistencyInspector = ConsistencyInspector()
        self.integrityClient = DataIntegrityClient()
        self.fc = FileCatalog()
        self.transClient = TransformationClient()
        self.fileCatalogClient = FileCatalogClient()

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = agentTSTypes
        else:
            self.transformationTypes = Operations().getValue(
                'Transformations/DataProcessing', ['MCSimulation', 'Merge'])

        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations',
                              ['TransformationDB', 'MetadataCatalog']))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        self.enableFlag = True

    #############################################################################

    def initialize(self):
        """ Sets defaults
    """

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        gLogger.info("Will treat the following transformation types: %s" %
                     str(self.transformationTypes))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        return S_OK()

    #############################################################################

    def execute(self):
        """ The VerifyOutputData execution method
    """
        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                "VerifyOutputData is disabled by configuration option 'EnableFlag'"
            )
            return S_OK('Disabled via CS flag')

        gLogger.info("-" * 40)
        self.updateWaitingIntegrity()
        gLogger.info("-" * 40)

        res = self.transClient.getTransformations({
            'Status':
            'ValidatingOutput',
            'Type':
            self.transformationTypes
        })
        if not res['OK']:
            gLogger.error("Failed to get ValidatingOutput transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in ValidatingOutput status")
            return S_OK()
        gLogger.info("Found %s transformations in ValidatingOutput status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            res = self.checkTransformationIntegrity(int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to perform full integrity check for transformation %d"
                    % transID)
            else:
                self.finalizeCheck(transID)
                gLogger.info("-" * 40)
        return S_OK()

    def updateWaitingIntegrity(self):
        """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
        gLogger.info(
            "Looking for transformations in the WaitingIntegrity status to update"
        )
        res = self.transClient.getTransformations(
            {'Status': 'WaitingIntegrity'})
        if not res['OK']:
            gLogger.error("Failed to get WaitingIntegrity transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in WaitingIntegrity status")
            return S_OK()
        gLogger.info("Found %s transformations in WaitingIntegrity status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            gLogger.info("-" * 40)
            res = self.integrityClient.getTransformationProblematics(
                int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to determine waiting problematics for transformation",
                    res['Message'])
            elif not res['Value']:
                res = self.transClient.setTransformationParameter(
                    transID, 'Status', 'ValidatedOutput')
                if not res['OK']:
                    gLogger.error(
                        "Failed to update status of transformation %s to ValidatedOutput"
                        % (transID))
                else:
                    gLogger.info(
                        "Updated status of transformation %s to ValidatedOutput"
                        % (transID))
            else:
                gLogger.info(
                    "%d problematic files for transformation %s were found" %
                    (len(res['Value']), transID))
        return

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ Get the directories for the supplied transformation from the transformation system
    """
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                gLogger.error("Failed to obtain transformation directories",
                              res['Message'])
                return res
            if not isinstance(res['Value'], list):
                transDirectories = ast.literal_eval(res['Value'])
            else:
                transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.fileCatalogClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                gLogger.error("Failed to obtain metadata catalog directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)
        if not directories:
            gLogger.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @staticmethod
    def _addDirs(transID, newDirs, existingDirs):
        for nDir in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, nDir):
                if nDir not in existingDirs:
                    existingDirs.append(nDir)
        return existingDirs

    #############################################################################
    def checkTransformationIntegrity(self, transID):
        """ This method contains the real work
    """
        gLogger.info("-" * 40)
        gLogger.info("Checking the integrity of transformation %s" % transID)
        gLogger.info("-" * 40)

        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            return res
        directories = res['Value']
        if not directories:
            return S_OK()

        ######################################################
        #
        # This check performs Catalog->SE for possible output directories
        #
        res = self.fc.exists(directories)
        if not res['OK']:
            gLogger.error('Failed to check directory existence',
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed']:
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR("Failed to determine the existance of directories")
        directoryExists = res['Value']['Successful']
        for directory in sorted(directoryExists.keys()):
            if not directoryExists[directory]:
                continue
            iRes = self.consistencyInspector.catalogDirectoryToSE(directory)
            if not iRes['OK']:
                gLogger.error(iRes['Message'])
                return iRes

        gLogger.info("-" * 40)
        gLogger.info("Completed integrity check for transformation %s" %
                     transID)
        return S_OK()

    def finalizeCheck(self, transID):
        """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
        res = self.integrityClient.getTransformationProblematics(int(transID))

        if not res['OK']:
            gLogger.error(
                "Failed to determine whether there were associated problematic files",
                res['Message'])
            newStatus = ''
        elif res['Value']:
            gLogger.info(
                "%d problematic files for transformation %s were found" %
                (len(res['Value']), transID))
            newStatus = "WaitingIntegrity"
        else:
            gLogger.info("No problematics were found for transformation %s" %
                         transID)
            newStatus = "ValidatedOutput"
        if newStatus:
            res = self.transClient.setTransformationParameter(
                transID, 'Status', newStatus)
            if not res['OK']:
                gLogger.error(
                    "Failed to update status of transformation %s to %s" %
                    (transID, newStatus))
            else:
                gLogger.info("Updated status of transformation %s to %s" %
                             (transID, newStatus))
        gLogger.info("-" * 40)
        return S_OK()
Esempio n. 53
0
class TaskManagerAgentBase( AgentModule ):
  ''' To be extended. The extension needs to:
      - provide a taskManager object as data member
      - provide a shifterProxy (string) as data member
      - provide a transType (list of strings) as data member
  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )

    self.taskManager = None
    self.shifterProxy = ''
    self.transClient = TransformationClient()
    self.transType = []

  #############################################################################

  def initialize( self ):
    ''' agent initialization
    '''
    if not self.taskManager:
      return S_ERROR( 'No task manager provided!' )

    if not self.shifterProxy:
      return S_ERROR( 'No shifter proxy provided!' )
    self.am_setOption( 'shifterProxy', self.shifterProxy )

    if not self.transType:
      return S_ERROR( 'No transformation types to look for!' )
    gLogger.info( "Looking for %s" % self.transType )

    gMonitor.registerActivity( "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks",
                               gMonitor.OP_ACUM )

    return S_OK()

  #############################################################################

  def execute( self ):
    ''' The TaskManagerBase execution method.
    '''

    # Determine whether the task status is to be monitored and updated
    enableTaskMonitor = self.am_getOption( 'MonitorTasks', '' )
    if not enableTaskMonitor:
      gLogger.info( "execute: Monitoring of tasks is disabled." )
      gLogger.info( "execute: To enable create the 'MonitorTasks' option" )
    else:
      res = self.updateTaskStatus()
      if not res['OK']:
        gLogger.warn( 'execute: Failed to update task states', res['Message'] )

    # Determine whether the task files status is to be monitored and updated
    enableFileMonitor = self.am_getOption( 'MonitorFiles', '' )
    if not enableFileMonitor:
      gLogger.info( "execute: Monitoring of files is disabled." )
      gLogger.info( "execute: To enable create the 'MonitorFiles' option" )
    else:
      res = self.updateFileStatus()
      if not res['OK']:
        gLogger.warn( 'execute: Failed to update file states', res['Message'] )

    # Determine whether the checking of reserved tasks is to be performed
    enableCheckReserved = self.am_getOption( 'CheckReserved', '' )
    if not enableCheckReserved:
      gLogger.info( "execute: Checking of reserved tasks is disabled." )
      gLogger.info( "execute: To enable create the 'CheckReserved' option" )
    else:
      res = self.checkReservedTasks()
      if not res['OK']:
        gLogger.warn( 'execute: Failed to checked reserved tasks', res['Message'] )

    # Determine whether the submission of tasks is to be executed
    enableSubmission = self.am_getOption( 'SubmitTasks', '' )
    if not enableSubmission:
      gLogger.info( "execute: Submission of tasks is disabled." )
      gLogger.info( "execute: To enable create the 'SubmitTasks' option" )
    else:
      res = self.submitTasks()
      if not res['OK']:
        gLogger.warn( 'execute: Failed to submit created tasks', res['Message'] )

    return S_OK()

  def _selectTransformations( self, transType = [], status = ['Active', 'Completing'], agentType = ['Automatic'] ):
    ''' get the transformations
    '''
    selectCond = {}
    if status:
      selectCond['Status'] = status
    if transType:
      selectCond['Type'] = transType
    if agentType:
      selectCond['AgentType'] = agentType
    res = self.transClient.getTransformations( condDict = selectCond )
    if not res['OK']:
      gLogger.error( "_selectTransformations: Failed to get transformations for selection.", res['Message'] )
    elif not res['Value']:
      gLogger.info( "_selectTransformations: No transformations found for selection." )
    else:
      gLogger.info( "_selectTransformations: Obtained %d transformations for selection" % len( res['Value'] ) )
    return res

  def updateTaskStatus( self ):
    ''' Updates the task status
    '''
    gLogger.info( "updateTaskStatus: Updating the Status of tasks" )
    # Get the transformations to be updated
    status = self.am_getOption( 'UpdateTasksStatus', ['Active', 'Completing', 'Stopped'] )
    res = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      # Get the tasks which are in a UPDATE state
      updateStatus = self.am_getOption( 'TaskUpdateStatus', ['Checking', 'Deleted', 'Killed', 'Staging', 'Stalled',
                                                             'Matched', 'Rescheduled', 'Completed', 'Submitted',
                                                             'Received', 'Waiting', 'Running'] )
      condDict = {"TransformationID":transID, "ExternalStatus":updateStatus}
      timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) )
      res = self.transClient.getTransformationTasks( condDict = condDict,
                                                     older = timeStamp,
                                                     timeStamp = 'LastUpdateTime' )
      if not res['OK']:
        gLogger.error( "updateTaskStatus: Failed to get tasks to update for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )
        continue
      if not res['Value']:
        gLogger.verbose( "updateTaskStatus: No tasks found to update for transformation %s" % transID )
        continue
      res = self.taskManager.getSubmittedTaskStatus( res['Value'] )
      if not res['OK']:
        gLogger.error( "updateTaskStatus: Failed to get updated task statuses for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )
        continue
      statusDict = res['Value']
      for status in sortList( statusDict.keys() ):
        taskIDs = statusDict[status]
        gLogger.info( "updateTaskStatus: Updating %d task(s) from transformation %d to %s" % ( len( taskIDs ),
                                                                                               transID, status ) )
        res = self.transClient.setTaskStatus( transID, taskIDs, status )
        if not res['OK']:
          gLogger.error( "updateTaskStatus: Failed to update task status for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )

    gLogger.info( "updateTaskStatus: Transformation task status update complete" )
    return S_OK()

  def updateFileStatus( self ):
    ''' Update the files status
    '''
    gLogger.info( "updateFileStatus: Updating Status of task files" )
    #Get the transformations to be updated
    status = self.am_getOption( 'UpdateFilesStatus', ['Active', 'Completing', 'Stopped'] )
    res = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) )
      condDict = {'TransformationID' : transID, 'Status' : ['Assigned']}
      res = self.transClient.getTransformationFiles( condDict = condDict, older = timeStamp, timeStamp = 'LastUpdate' )
      if not res['OK']:
        gLogger.error( "updateFileStatus: Failed to get transformation files to update.", res['Message'] )
        continue
      if not res['Value']:
        gLogger.info( "updateFileStatus: No files to be updated for transformation %s." % transID )
        continue
      res = self.taskManager.getSubmittedFileStatus( res['Value'] )
      if not res['OK']:
        gLogger.error( "updateFileStatus: Failed to get updated file statuses for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )
        continue
      statusDict = res['Value']
      if not statusDict:
        gLogger.info( "updateFileStatus: No file statuses to be updated for transformation %s." % transID )
        continue
      fileReport = FileReport( server = self.transClient.getServer() )
      for lfn, status in statusDict.items():
        fileReport.setFileStatus( int( transID ), lfn, status )
      res = fileReport.commit()
      if not res['OK']:
        gLogger.error( "updateFileStatus: Failed to update file status for transformation", "%s %s" % ( transID,
                                                                                                      res['Message'] ) )
      else:
        for status, update in res['Value'].items():
          gLogger.info( "updateFileStatus: Updated %s files for %s to %s." % ( update, transID, status ) )
    gLogger.info( "updateFileStatus: Transformation file status update complete" )
    return S_OK()

  def checkReservedTasks( self ):
    gLogger.info( "checkReservedTasks: Checking Reserved tasks" )
    # Get the transformations which should be checked
    status = self.am_getOption( 'CheckReservedStatus', ['Active', 'Completing', 'Stopped'] )
    res = self._selectTransformations( transType = self.transType, status = status, agentType = [] )
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations
      condDict = {"TransformationID":transID, "ExternalStatus":'Reserved'}
      time_stamp_older = str( datetime.datetime.utcnow() - datetime.timedelta( hours = 1 ) )
      time_stamp_newer = str( datetime.datetime.utcnow() - datetime.timedelta( days = 7 ) )
      res = self.transClient.getTransformationTasks( condDict = condDict, older = time_stamp_older,
                                                     newer = time_stamp_newer )
      if not res['OK']:
        gLogger.error( "checkReservedTasks: Failed to get Reserved tasks for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )
        continue
      if not res['Value']:
        gLogger.verbose( "checkReservedTasks: No Reserved tasks found for transformation %s" % transID )
        continue
      res = self.taskManager.updateTransformationReservedTasks( res['Value'] )
      if not res['OK']:
        gLogger.info( "checkReservedTasks: No Reserved tasks found for transformation %s" % transID )
        continue
      noTasks = res['Value']['NoTasks']
      taskNameIDs = res['Value']['TaskNameIDs']
      # For the tasks with no associated request found re-set the status of the task in the transformationDB
      for taskName in noTasks:
        transID, taskID = taskName.split( '_' )
        gLogger.info( "checkReservedTasks: Resetting status of %s to Created as no associated task found" % ( taskName ) )
        res = self.transClient.setTaskStatus( int( transID ), int( taskID ), 'Created' )
        if not res['OK']:
          gLogger.warn( "checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % ( taskName,
                                                                                                      res['Message'] ) )
      # For the tasks for which an associated request was found update the task details in the transformationDB
      for taskName, extTaskID in taskNameIDs.items():
        transID, taskID = taskName.split( '_' )
        gLogger.info( "checkReservedTasks: Resetting status of %s to Created with ID %s" % ( taskName, extTaskID ) )
        res = self.transClient.setTaskStatusAndWmsID( int( transID ), int( taskID ), 'Submitted', str( extTaskID ) )
        if not res['OK']:
          gLogger.warn( "checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % ( taskName,
                                                                                                      res['Message'] ) )
    gLogger.info( "checkReservedTasks: Updating of reserved tasks complete" )
    return S_OK()

  def submitTasks( self ):
    """ Submit the tasks to an external system, using the taskManager provided
    """
    gLogger.info( "submitTasks: Submitting tasks for transformations" )
    res = getProxyInfo( False, False )
    if not res['OK']:
      gLogger.error( "submitTasks: Failed to determine credentials for submission", res['Message'] )
      return res
    proxyInfo = res['Value']
    owner = proxyInfo['username']
    ownerGroup = proxyInfo['group']
    gLogger.info( "submitTasks: Tasks will be submitted with the credentials %s:%s" % ( owner, ownerGroup ) )
    # Get the transformations which should be submitted
    tasksPerLoop = self.am_getOption( 'TasksPerLoop', 50 )
    status = self.am_getOption( 'SubmitStatus', ['Active', 'Completing'] )
    res = self._selectTransformations( transType = self.transType, status = status )
    if not res['OK']:
      return res
    for transformation in res['Value']:
      transID = transformation['TransformationID']
      transBody = transformation['Body']
      res = self.transClient.getTasksToSubmit( transID, tasksPerLoop )
      if not res['OK']:
        gLogger.error( "submitTasks: Failed to obtain tasks for transformation", "%s %s" % ( transID, res['Message'] ) )
        continue
      tasks = res['Value']['JobDictionary']
      if not tasks:
        gLogger.verbose( "submitTasks: No tasks found for submission for transformation %s" % transID )
        continue
      gLogger.info( "submitTasks: Obtained %d tasks for submission for transformation %s" % ( len( tasks ), transID ) )
      res = self.taskManager.prepareTransformationTasks( transBody, tasks, owner, ownerGroup )
      if not res['OK']:
        gLogger.error( "submitTasks: Failed to prepare tasks for transformation", "%s %s" % ( transID,
                                                                                              res['Message'] ) )
        continue
      res = self.taskManager.submitTransformationTasks( res['Value'] )
      if not res['OK']:
        gLogger.error( "submitTasks: Failed to submit prepared tasks for transformation", "%s %s" % ( transID,
                                                                                                      res['Message'] ) )
        continue
      res = self.taskManager.updateDBAfterTaskSubmission( res['Value'] )
      if not res['OK']:
        gLogger.error( "submitTasks: Failed to update DB after task submission for transformation", "%s %s" % ( transID,
                                                                                                     res['Message'] ) )
        continue
    gLogger.info( "submitTasks: Submission of transformation tasks complete" )
    return S_OK()
Esempio n. 54
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
    """ Usually subclass of AgentModule
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        # few parameters
        self.pluginLocation = ''
        self.transformationStatus = []
        self.maxFiles = 0
        self.transformationTypes = []

        # clients (out of the threads)
        self.transfClient = None

        # parameters for the threading
        self.transQueue = Queue.Queue()
        self.transInQueue = []

        # parameters for caching
        self.workDirectory = ''
        self.cacheFile = ''
        self.controlDirectory = ''

        self.lastFileOffset = {}
        # Validity of the cache
        self.replicaCache = None
        self.replicaCacheValidity = None
        self.writingCache = False
        self.removedFromCache = 0

        self.noUnusedDelay = 0
        self.unusedFiles = {}
        self.unusedTimeStamp = {}

        self.debug = False
        self.transInThread = {}
        self.pluginTimeout = {}

    def initialize(self):
        """ standard initialize
    """
        # few parameters
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        # Prepare to change the name of the CS option as MaxFiles is ambiguous
        self.maxFiles = self.am_getOption('MaxFilesToProcess',
                                          self.am_getOption('MaxFiles', 5000))

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)

        # clients
        self.transfClient = TransformationClient()

        # for caching using a pickle file
        self.workDirectory = self.am_getWorkDirectory()
        self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
        self.controlDirectory = self.am_getControlDirectory()

        # remember the offset if any in TS
        self.lastFileOffset = {}

        # Validity of the cache
        self.replicaCache = {}
        self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity',
                                                      2)

        self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

        # Get it threaded
        maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        self.log.info("Will treat the following transformation types: %s" %
                      str(self.transformationTypes))

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        method = 'finalize'
        if self.transInQueue:
            self.transInQueue = []
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread),
                method=method)
            self._logInfo('Remaining transformations:',
                          ','.join(
                              str(transID) for transID in self.transInThread),
                          method=method)
            while self.transInThread:
                time.sleep(2)
            self._logInfo("Threads are empty, terminating the agent...",
                          method=method)
        self.__writeCache()
        return S_OK()

    def execute(self):
        """ Just puts transformations in the queue
    """
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            self._logError("Failed to obtain transformations:", res['Message'])
            return S_OK()
        # Process the transformations
        count = 0
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            if transDict.get('InheritedFrom'):
                # Try and move datasets from the ancestor production
                res = self.transfClient.moveFilesToDerivedTransformation(
                    transDict)
                if not res['OK']:
                    self._logError(
                        "Error moving files from an inherited transformation",
                        res['Message'],
                        transID=transID)
                else:
                    parentProd, movedFiles = res['Value']
                    if movedFiles:
                        self._logInfo(
                            "Successfully moved files from %d to %d:" %
                            (parentProd, transID),
                            transID=transID)
                        for status, val in movedFiles.iteritems():
                            self._logInfo("\t%d files to status %s" %
                                          (val, status),
                                          transID=transID)
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put(transDict)
        self._logInfo("Out of %d transformations, %d put in thread queue" %
                      (len(res['Value']), count))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
        transName = self.am_getOption('Transformation', 'All')
        method = 'getTransformations'
        if transName == 'All':
            self._logInfo("Getting all transformations%s, status %s." %
                          (' of type %s' % str(self.transformationTypes)
                           if self.transformationTypes else '',
                           str(self.transformationStatus)),
                          method=method)
            transfDict = {'Status': self.transformationStatus}
            if self.transformationTypes:
                transfDict['Type'] = self.transformationTypes
            res = self.transfClient.getTransformations(transfDict,
                                                       extraParams=True)
            if not res['OK']:
                return res
            transformations = res['Value']
            self._logInfo("Obtained %d transformations to process" %
                          len(transformations),
                          method=method)
        else:
            self._logInfo("Getting transformation %s." % transName,
                          method=method)
            res = self.transfClient.getTransformation(transName,
                                                      extraParams=True)
            if not res['OK']:
                self._logError("Failed to get transformation:",
                               res['Message'],
                               method=method)
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def _getClients(self):
        """ returns the clients used in the threads
    """
        threadTransformationClient = TransformationClient()
        threadDataManager = DataManager()

        return {
            'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager
        }

    def _execute(self, threadID):
        """ thread - does the real job: processing the transformations to be processed
    """

        # Each thread will have its own clients
        clients = self._getClients()

        while True:
            transDict = self.transQueue.get()
            try:
                transID = long(transDict['TransformationID'])
                if transID not in self.transInQueue:
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Processing transformation %s." % transID,
                              transID=transID)
                startTime = time.time()
                res = self.processTransformation(transDict, clients)
                if not res['OK']:
                    self._logInfo("Failed to process transformation:",
                                  res['Message'],
                                  transID=transID)
            except Exception as x:  # pylint: disable=broad-except
                self._logException('Exception in plugin',
                                   lException=x,
                                   transID=transID)
            finally:
                if not transID:
                    transID = 'None'
                self._logInfo("Processed transformation in %.1f seconds" %
                              (time.time() - startTime),
                              transID=transID)
                if transID in self.transInQueue:
                    self.transInQueue.remove(transID)
                self.transInThread.pop(transID, None)
                self._logVerbose("%d transformations still in queue" %
                                 len(self.transInQueue))
        return S_OK()

    def processTransformation(self, transDict, clients):
        """ process a single transformation (in transDict)
    """
        method = 'processTransformation'
        transID = transDict['TransformationID']
        forJobs = transDict['Type'].lower() not in ('replication', 'removal')

        # First get the LFNs associated to the transformation
        transFiles = self._getTransformationFiles(
            transDict, clients, replicateOrRemove=not forJobs)
        if not transFiles['OK']:
            return transFiles
        if not transFiles['Value']:
            return S_OK()

        if transID not in self.replicaCache:
            self.__readCache(transID)
        transFiles = transFiles['Value']
        unusedLfns = [f['LFN'] for f in transFiles]
        unusedFiles = len(unusedLfns)

        plugin = transDict.get('Plugin', 'Standard')
        # Limit the number of LFNs to be considered for replication or removal as they are treated individually
        if not forJobs:
            maxFiles = Operations().getValue(
                'TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
            # Get plugin-specific limit in number of files (0 means no limit)
            totLfns = len(unusedLfns)
            lfnsToProcess = self.__applyReduction(unusedLfns,
                                                  maxFiles=maxFiles)
            if len(lfnsToProcess) != totLfns:
                self._logInfo("Reduced number of files from %d to %d" %
                              (totLfns, len(lfnsToProcess)),
                              method=method,
                              transID=transID)
                transFiles = [
                    f for f in transFiles if f['LFN'] in lfnsToProcess
                ]
        else:
            lfnsToProcess = unusedLfns

        # Check the data is available with replicas
        res = self.__getDataReplicas(transDict,
                                     lfnsToProcess,
                                     clients,
                                     forJobs=forJobs)
        if not res['OK']:
            self._logError("Failed to get data replicas:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        self._logInfo("Processing transformation with '%s' plug-in." % plugin,
                      method=method,
                      transID=transID)
        res = self.__generatePluginObject(plugin, clients)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.run()
        if not res['OK']:
            self._logError("Failed to generate tasks for transformation:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        tasks = res['Value']
        self.pluginTimeout[transID] = res.get('Timeout', False)
        # Create the tasks
        allCreated = True
        created = 0
        lfnsInTasks = []
        for se, lfns in tasks:
            res = clients['TransformationClient'].addTaskForTransformation(
                transID, lfns, se)
            if not res['OK']:
                self._logError("Failed to add task generated by plug-in:",
                               res['Message'],
                               method=method,
                               transID=transID)
                allCreated = False
            else:
                created += 1
                lfnsInTasks += [lfn for lfn in lfns if lfn in lfnsToProcess]
        if created:
            self._logInfo("Successfully created %d tasks for transformation." %
                          created,
                          method=method,
                          transID=transID)
        else:
            self._logInfo("No new tasks created for transformation.",
                          method=method,
                          transID=transID)
        self.unusedFiles[transID] = unusedFiles - len(lfnsInTasks)
        # If not all files were obtained, move the offset
        lastOffset = self.lastFileOffset.get(transID)
        if lastOffset:
            self.lastFileOffset[transID] = max(0,
                                               lastOffset - len(lfnsInTasks))
        self.__removeFilesFromCache(transID, lfnsInTasks)

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = clients['TransformationClient'].setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                self._logError(
                    "Failed to update transformation status to 'Active':",
                    res['Message'],
                    method=method,
                    transID=transID)
            else:
                self._logInfo("Updated transformation status to 'Active'.",
                              method=method,
                              transID=transID)
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def _getTransformationFiles(self,
                                transDict,
                                clients,
                                statusList=None,
                                replicateOrRemove=False):
        """ get the data replicas for a certain transID
    """
        # By default, don't skip if no new Unused for DM transformations
        skipIfNoNewUnused = not replicateOrRemove
        transID = transDict['TransformationID']
        plugin = transDict.get('Plugin', 'Standard')
        # Check if files should be sorted and limited in number
        operations = Operations()
        sortedBy = operations.getValue(
            'TransformationPlugins/%s/SortedBy' % plugin, None)
        maxFiles = operations.getValue(
            'TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
        # If the NoUnuse delay is explicitly set, we want to take it into account, and skip if no new Unused
        if operations.getValue(
                'TransformationPlugins/%s/NoUnusedDelay' % plugin, 0):
            skipIfNoNewUnused = True
        noUnusedDelay = 0 if self.pluginTimeout.get(transID, False) else \
            operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, self.noUnusedDelay)
        method = '_getTransformationFiles'
        lastOffset = self.lastFileOffset.setdefault(transID, 0)

        # Files that were problematic (either explicit or because SE was banned) may be recovered,
        # and always removing the missing ones
        if not statusList:
            statusList = ['Unused', 'ProbInFC']
        statusList += ['MissingInFC'] if transDict['Type'] == 'Removal' else []
        transClient = clients['TransformationClient']
        res = transClient.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': statusList
        },
                                                 orderAttribute=sortedBy,
                                                 offset=lastOffset,
                                                 maxfiles=maxFiles)
        if not res['OK']:
            self._logError("Failed to obtain input data:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        transFiles = res['Value']
        if maxFiles and len(transFiles) == maxFiles:
            self.lastFileOffset[transID] += maxFiles
        else:
            del self.lastFileOffset[transID]

        if not transFiles:
            self._logInfo("No '%s' files found for transformation." %
                          ','.join(statusList),
                          method=method,
                          transID=transID)
            if transDict['Status'] == 'Flush':
                res = transClient.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    self._logError(
                        "Failed to update transformation status to 'Active':",
                        res['Message'],
                        method=method,
                        transID=transID)
                else:
                    self._logInfo("Updated transformation status to 'Active'.",
                                  method=method,
                                  transID=transID)
            return S_OK()
        # Check if transformation is kicked
        kickFile = os.path.join(self.controlDirectory,
                                'KickTransformation_%s' % str(transID))
        try:
            kickTrans = os.path.exists(kickFile)
            if kickTrans:
                os.remove(kickFile)
        except OSError:
            pass

        # Check if something new happened
        now = datetime.datetime.utcnow()
        if not kickTrans and skipIfNoNewUnused and noUnusedDelay:
            nextStamp = self.unusedTimeStamp.setdefault(
                transID, now) + datetime.timedelta(hours=noUnusedDelay)
            skip = now < nextStamp
            if len(transFiles) == self.unusedFiles.get(
                    transID, 0) and transDict['Status'] != 'Flush' and skip:
                self._logInfo("No new '%s' files found for transformation." %
                              ','.join(statusList),
                              method=method,
                              transID=transID)
                return S_OK()

        self.unusedTimeStamp[transID] = now
        # If files are not Unused, set them Unused
        notUnused = [
            trFile['LFN'] for trFile in transFiles
            if trFile['Status'] != 'Unused'
        ]
        otherStatuses = sorted(
            set([trFile['Status'] for trFile in transFiles]) - set(['Unused']))
        if notUnused:
            res = transClient.setFileStatusForTransformation(transID,
                                                             'Unused',
                                                             notUnused,
                                                             force=True)
            if not res['OK']:
                self._logError("Error setting %d files Unused:" %
                               len(notUnused),
                               res['Message'],
                               method=method,
                               transID=transID)
            else:
                self._logInfo("Set %d files from %s to Unused" %
                              (len(notUnused), ','.join(otherStatuses)))
                self.__removeFilesFromCache(transID, notUnused)
        return S_OK(transFiles)

    def __applyReduction(self, lfns, maxFiles=None):
        """ eventually remove the number of files to be considered
    """
        if maxFiles is None:
            maxFiles = self.maxFiles
        if not maxFiles or len(lfns) <= maxFiles:
            return lfns
        return randomize(lfns)[:maxFiles]

    def __getDataReplicas(self, transDict, lfns, clients, forJobs=True):
        """ Get the replicas for the LFNs and check their statuses. It first looks within the cache.
    """
        method = '__getDataReplicas'
        transID = transDict['TransformationID']
        if 'RemoveFile' in transDict['Body']:
            # When removing files, we don't care about their replicas
            return S_OK(dict.fromkeys(lfns, ['None']))
        clearCacheFile = os.path.join(self.controlDirectory,
                                      'ClearCache_%s' % str(transID))
        try:
            clearCache = os.path.exists(clearCacheFile)
            if clearCache:
                os.remove(clearCacheFile)
        except:
            pass
        if clearCache or transDict['Status'] == 'Flush':
            self._logInfo("Replica cache cleared",
                          method=method,
                          transID=transID)
            # We may need to get new replicas
            self.__clearCacheForTrans(transID)
        else:
            # If the cache needs to be cleaned
            self.__cleanCache(transID)
        startTime = time.time()
        dataReplicas = {}
        nLfns = len(lfns)
        self._logVerbose("Getting replicas for %d files" % nLfns,
                         method=method,
                         transID=transID)
        cachedReplicaSets = self.replicaCache.get(transID, {})
        cachedReplicas = {}
        # Merge all sets of replicas
        for replicas in cachedReplicaSets.itervalues():
            cachedReplicas.update(replicas)
        self._logInfo("Number of cached replicas: %d" % len(cachedReplicas),
                      method=method,
                      transID=transID)
        setCached = set(cachedReplicas)
        setLfns = set(lfns)
        for lfn in setLfns & setCached:
            dataReplicas[lfn] = cachedReplicas[lfn]
        newLFNs = setLfns - setCached
        self._logInfo("ReplicaCache hit for %d out of %d LFNs" %
                      (len(dataReplicas), nLfns),
                      method=method,
                      transID=transID)
        if newLFNs:
            startTime = time.time()
            self._logInfo("Getting replicas for %d files from catalog" %
                          len(newLFNs),
                          method=method,
                          transID=transID)
            newReplicas = {}
            for chunk in breakListIntoChunks(newLFNs, 10000):
                res = self._getDataReplicasDM(transID,
                                              chunk,
                                              clients,
                                              forJobs=forJobs)
                if res['OK']:
                    reps = dict((lfn, ses)
                                for lfn, ses in res['Value'].iteritems()
                                if ses)
                    newReplicas.update(reps)
                    self.__updateCache(transID, reps)
                else:
                    self._logWarn("Failed to get replicas for %d files" %
                                  len(chunk),
                                  res['Message'],
                                  method=method,
                                  transID=transID)

            self._logInfo("Obtained %d replicas from catalog in %.1f seconds" %
                          (len(newReplicas), time.time() - startTime),
                          method=method,
                          transID=transID)
            dataReplicas.update(newReplicas)
            noReplicas = newLFNs - set(dataReplicas)
            self.__writeCache(transID)
            if noReplicas:
                self._logWarn(
                    "Found %d files without replicas (or only in Failover)" %
                    len(noReplicas),
                    method=method,
                    transID=transID)
        return S_OK(dataReplicas)

    def _getDataReplicasDM(self,
                           transID,
                           lfns,
                           clients,
                           forJobs=True,
                           ignoreMissing=False):
        """ Get the replicas for the LFNs and check their statuses, using the replica manager
    """
        method = '_getDataReplicasDM'

        startTime = time.time()
        self._logVerbose("Getting replicas%s from catalog for %d files" %
                         (' for jobs' if forJobs else '', len(lfns)),
                         method=method,
                         transID=transID)
        if forJobs:
            # Get only replicas eligible for jobs
            res = clients['DataManager'].getReplicasForJobs(lfns, getUrl=False)
        else:
            # Get all replicas
            res = clients['DataManager'].getReplicas(lfns, getUrl=False)
        if not res['OK']:
            return res
        replicas = res['Value']
        # Prepare a dictionary for all LFNs
        dataReplicas = {}
        self._logVerbose(
            "Replica results for %d files obtained in %.2f seconds" %
            (len(lfns), time.time() - startTime),
            method=method,
            transID=transID)
        # If files are neither Successful nor Failed, they are set problematic in the FC
        problematicLfns = [
            lfn for lfn in lfns if lfn not in replicas['Successful']
            and lfn not in replicas['Failed']
        ]
        if problematicLfns:
            self._logInfo(
                "%d files found problematic in the catalog, set ProbInFC" %
                len(problematicLfns))
            res = clients[
                'TransformationClient'].setFileStatusForTransformation(
                    transID, 'ProbInFC', problematicLfns)
            if not res['OK']:
                self._logError("Failed to update status of problematic files:",
                               res['Message'],
                               method=method,
                               transID=transID)
        # Create a dictionary containing all the file replicas
        failoverLfns = []
        for lfn, replicaDict in replicas['Successful'].iteritems():
            for se in replicaDict:
                # This sremains here for backward compatibility in case VOs have not defined SEs not to be used for jobs
                if forJobs and 'failover' in se.lower():
                    self._logVerbose("Ignoring failover replica for %s." % lfn,
                                     method=method,
                                     transID=transID)
                else:
                    dataReplicas.setdefault(lfn, []).append(se)
            if not dataReplicas.get(lfn):
                failoverLfns.append(lfn)
        if failoverLfns:
            self._logVerbose(
                "%d files have no replica but possibly in Failover SE" %
                len(failoverLfns))
        # Make sure that file missing from the catalog are marked in the transformation DB.
        missingLfns = []
        for lfn, reason in replicas['Failed'].iteritems():
            if "No such file or directory" in reason:
                self._logVerbose("%s not found in the catalog." % lfn,
                                 method=method,
                                 transID=transID)
                missingLfns.append(lfn)
        if missingLfns:
            self._logInfo("%d files not found in the catalog" %
                          len(missingLfns))
            if ignoreMissing:
                dataReplicas.update(dict.fromkeys(missingLfns, []))
            else:
                res = clients[
                    'TransformationClient'].setFileStatusForTransformation(
                        transID, 'MissingInFC', missingLfns)
                if not res['OK']:
                    self._logError("Failed to update status of missing files:",
                                   res['Message'],
                                   method=method,
                                   transID=transID)
        return S_OK(dataReplicas)

    def __updateCache(self, transID, newReplicas):
        """ Add replicas to the cache
    """
        self.replicaCache.setdefault(
            transID, {})[datetime.datetime.utcnow()] = newReplicas


#    if len( newReplicas ) > 5000:
#      self.__writeCache( transID )

    def __clearCacheForTrans(self, transID):
        """ Remove all replicas for a transformation
    """
        self.replicaCache.pop(transID, None)

    def __cleanReplicas(self, transID, lfns):
        """ Remove cached replicas that are not in a list
    """
        cachedReplicas = set()
        for replicas in self.replicaCache.get(transID, {}).itervalues():
            cachedReplicas.update(replicas)
        toRemove = cachedReplicas - set(lfns)
        if toRemove:
            self._logInfo("Remove %d files from cache" % len(toRemove),
                          method='__cleanReplicas',
                          transID=transID)
            self.__removeFromCache(transID, toRemove)

    def __cleanCache(self, transID):
        """ Cleans the cache
    """
        try:
            if transID in self.replicaCache:
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                    days=self.replicaCacheValidity)
                for updateTime in set(self.replicaCache[transID]):
                    nCache = len(self.replicaCache[transID][updateTime])
                    if updateTime < timeLimit or not nCache:
                        self._logInfo(
                            "Clear %s replicas for transformation %s, time %s"
                            %
                            ('%d cached' % nCache if nCache else 'empty cache',
                             str(transID), str(updateTime)),
                            transID=transID,
                            method='__cleanCache')
                        del self.replicaCache[transID][updateTime]
                # Remove empty transformations
                if not self.replicaCache[transID]:
                    del self.replicaCache[transID]
        except Exception as x:
            self._logException("Exception when cleaning replica cache:",
                               lException=x)

    def __removeFilesFromCache(self, transID, lfns):
        removed = self.__removeFromCache(transID, lfns)
        if removed:
            self._logInfo("Removed %d replicas from cache" % removed,
                          method='__removeFilesFromCache',
                          transID=transID)
            self.__writeCache(transID)

    def __removeFromCache(self, transID, lfns):
        if transID not in self.replicaCache:
            return
        removed = 0
        if self.replicaCache[transID] and lfns:
            for lfn in lfns:
                for timeKey in self.replicaCache[transID]:
                    if self.replicaCache[transID][timeKey].pop(lfn, None):
                        removed += 1
        return removed

    def __cacheFile(self, transID):
        return self.cacheFile.replace('.pkl', '_%s.pkl' % str(transID))

    @gSynchro
    def __readCache(self, transID):
        """ Reads from the cache
    """
        if transID in self.replicaCache:
            return
        try:
            method = '__readCache'
            fileName = self.__cacheFile(transID)
            if not os.path.exists(fileName):
                self.replicaCache[transID] = {}
            else:
                with open(fileName, 'r') as cacheFile:
                    self.replicaCache[transID] = pickle.load(cacheFile)
                self._logInfo(
                    "Successfully loaded replica cache from file %s (%d files)"
                    % (fileName, self.__filesInCache(transID)),
                    method=method,
                    transID=transID)
        except Exception as x:
            self._logException("Failed to load replica cache from file %s" %
                               fileName,
                               lException=x,
                               method=method,
                               transID=transID)
            self.replicaCache[transID] = {}

    def __filesInCache(self, transID):
        cache = self.replicaCache.get(transID, {})
        return sum(len(lfns) for lfns in cache.itervalues())

    @gSynchro
    def __writeCache(self, transID=None):
        """ Writes the cache
    """
        method = '__writeCache'
        try:
            startTime = time.time()
            transList = [transID] if transID else set(self.replicaCache)
            filesInCache = 0
            nCache = 0
            for t_id in transList:
                # Protect the copy of the cache
                filesInCache += self.__filesInCache(t_id)
                # write to a temporary file in order to avoid corrupted files
                cacheFile = self.__cacheFile(t_id)
                tmpFile = cacheFile + '.tmp'
                with open(tmpFile, 'w') as fd:
                    pickle.dump(self.replicaCache.get(t_id, {}), fd)
                # Now rename the file as it shold
                os.rename(tmpFile, cacheFile)
                nCache += 1
            self._logInfo(
                "Successfully wrote %d replica cache file(s) (%d files) in %.1f seconds"
                % (nCache, filesInCache, time.time() - startTime),
                method=method,
                transID=transID if transID else None)
        except Exception as x:
            self._logException("Could not write replica cache file %s" %
                               cacheFile,
                               lException=x,
                               method=method,
                               transID=t_id)

    def __generatePluginObject(self, plugin, clients):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except ImportError as e:
            self._logException("Failed to import 'TransformationPlugin' %s" %
                               plugin,
                               lException=e,
                               method="__generatePluginObject")
            return S_ERROR()
        try:
            plugin_o = getattr(plugModule, 'TransformationPlugin')(
                '%s' % plugin,
                transClient=clients['TransformationClient'],
                dataManager=clients['DataManager'])
            return S_OK(plugin_o)
        except AttributeError as e:
            self._logException("Failed to create %s()" % plugin,
                               lException=e,
                               method="__generatePluginObject")
            return S_ERROR()
        plugin_o.setDirectory(self.workDirectory)
        plugin_o.setCallback(self.pluginCallback)

    def pluginCallback(self, transID, invalidateCache=False):
        """ Standard plugin callback
    """
        if invalidateCache:
            try:
                if transID in self.replicaCache:
                    self._logInfo("Removed cached replicas for transformation",
                                  method='pluginCallBack',
                                  transID=transID)
                    self.replicaCache.pop(transID)
                    self.__writeCache(transID)
            except:
                pass
class TransformationCleaningAgent( AgentModule ):

  #############################################################################
  def initialize( self ):
    """Sets defaults """
    self.replicaManager = ReplicaManager()
    self.transClient = TransformationClient()
    self.wmsClient = WMSClient()
    self.requestClient = RequestClient()
    self.metadataClient = FileCatalogClient()
    self.storageUsageClient = StorageUsageClient()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication'] ) )
    gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'] ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days
    gLogger.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    gLogger.info( "Will remove logs found on storage element: %s" % self.logSE )
    return S_OK()

  #############################################################################
  def execute( self ):
    """ The TransformationCleaningAgent execution method.
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag' % ( self.section ) )
      return S_OK( 'Disabled via CS flag' )

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( {'Status':'Cleaning', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.cleanTransformation( transDict['TransformationID'] )

    # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( {'Status':'RemovingFiles', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.removeTransformationOutput( transDict['TransformationID'] )

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( {'Status':'Completed', 'Type':self.transformationTypes}, older = olderThanTime )
    if res['OK']:
      for transDict in res['Value']:
        self.archiveTransformation( transDict['TransformationID'] )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'StorageUsage' in self.directoryLocations:
      res = self.storageUsageClient.getStorageDirectories( '', '', transID, [] )
      if not res['OK']:
        gLogger.error( "Failed to obtain storage usage directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  def __addDirs( self, transID, newDirs, existingDirs ):
    for dir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, dir ):
        if not dir in existingDirs:
          existingDirs.append( dir )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    gLogger.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      gLogger.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      gLogger.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove storage directory", res['Message'] )
      return res
    gLogger.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    gLogger.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    gLogger.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      gLogger.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        gLogger.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    gLogger.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    gLogger.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove log files", res['Message'] )
      return res
    gLogger.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    gLogger.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    gLogger.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    gLogger.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB """
    gLogger.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes any mention of the supplied transformation 
    """
    gLogger.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID, directories ):
    res = self.metadataClient.findFilesByMetadata( {self.transfidmeta:transID} )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not len(fileToRemove):
      gLogger.info('No files found for transID %s'%transID)
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    gLogger.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        gLogger.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType == 'Replication':
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    res = self.transClient.getTransformationTasks( condDict = {'TransformationID':transID} )
    if not res['OK']:
      gLogger.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = []
    for taskDict in res['Value']:
      externalIDs.append( taskDict['ExternalID'] )
    gLogger.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    gLogger.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, jobIDs ):
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):
      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        gLogger.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ):
        gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif res.has_key( 'NonauthorizedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif res.has_key( 'FailedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False
    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    gLogger.info( "Successfully removed all tasks from the WMS" )
    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      gLogger.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    gLogger.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        gLogger.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        gLogger.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      gLogger.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      gLogger.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    gLogger.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Esempio n. 56
0
class InputDataAgent( AgentModule ):

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )

    self.fileLog = {}
    self.timeLog = {}
    self.fullTimeLog = {}

    self.pollingTime = self.am_getOption( 'PollingTime', 120 )
    self.fullUpdatePeriod = self.am_getOption( 'FullUpdatePeriod', 86400 )
    self.refreshonly = self.am_getOption( 'RefreshOnly', False )
    self.dateKey = self.am_getOption( 'DateKey', None )

    self.transClient = TransformationClient()
    self.metadataClient = FileCatalogClient()
    self.transformationTypes = None

  #############################################################################
  def initialize( self ):
    ''' Make the necessary initializations
    '''
    gMonitor.registerActivity( "Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sorted( dataProc + dataManip )
    extendables = Operations().getValue( 'Transformations/ExtendableTransfTypes', [])
    if extendables:
      for extendable in extendables:
        if extendable in self.transformationTypes:
          self.transformationTypes.remove(extendable)
          #This is because the Extendables do not use this Agent (have no Input data query)
          
    return S_OK()

  ##############################################################################
  def execute( self ):
    ''' Main execution method
    '''

    gMonitor.addMark( 'Iteration', 1 )
    # Get all the transformations
    result = self.transClient.getTransformations( {'Status' : 'Active', 
                                                   'Type' : self.transformationTypes } )
    if not result['OK']:
      gLogger.error( "InputDataAgent.execute: Failed to get transformations.", result['Message'] )
      return S_OK()

    # Process each transformation
    for transDict in result['Value']:
      transID = long( transDict['TransformationID'] )
      res = self.transClient.getTransformationInputDataQuery( transID )
      if not res['OK']:
        if res['Message'] == 'No InputDataQuery found for transformation':
          gLogger.info( "InputDataAgent.execute: No input data query found for transformation %d" % transID )
        else:
          gLogger.error( "InputDataAgent.execute: Failed to get input data query for %d" % transID, res['Message'] )
        continue
      inputDataQuery = res['Value']

      if self.refreshonly:
        # Determine the correct time stamp to use for this transformation
        if self.timeLog.has_key( transID ):
          if self.fullTimeLog.has_key( transID ):
            # If it is more than a day since the last reduced query, make a full query just in case
            if ( datetime.datetime.utcnow() - self.fullTimeLog[transID] ) < datetime.timedelta( seconds = self.fullUpdatePeriod ):
              timeStamp = self.timeLog[transID]
              if self.dateKey:
                inputDataQuery[self.dateKey] = ( timeStamp - datetime.timedelta( seconds = 10 ) ).strftime( '%Y-%m-%d %H:%M:%S' )
              else:
                gLogger.error( "DateKey was not set in the CS, cannot use the RefreshOnly" )
            else:
              self.fullTimeLog[transID] = datetime.datetime.utcnow()
        self.timeLog[transID] = datetime.datetime.utcnow()
        if not self.fullTimeLog.has_key( transID ):
          self.fullTimeLog[transID] = datetime.datetime.utcnow()

      # Perform the query to the metadata catalog
      gLogger.verbose( "Using input data query for transformation %d: %s" % ( transID, str( inputDataQuery ) ) )
      start = time.time()
      result = self.metadataClient.findFilesByMetadata( inputDataQuery )
      rtime = time.time() - start
      gLogger.verbose( "Metadata catalog query time: %.2f seconds." % ( rtime ) )
      if not result['OK']:
        gLogger.error( "InputDataAgent.execute: Failed to get response from the metadata catalog", result['Message'] )
        continue
      lfnList = result['Value']

      # Check if the number of files has changed since the last cycle
      nlfns = len( lfnList )
      gLogger.info( "%d files returned for transformation %d from the metadata catalog" % ( nlfns, int( transID ) ) )
      if self.fileLog.has_key( transID ):
        if nlfns == self.fileLog[transID]:
          gLogger.verbose( 'No new files in metadata catalog since last check' )
      self.fileLog[transID] = nlfns

      # Add any new files to the transformation
      addedLfns = []
      if lfnList:
        gLogger.verbose( 'Processing %d lfns for transformation %d' % ( len( lfnList ), transID ) )
        # Add the files to the transformation
        gLogger.verbose( 'Adding %d lfns for transformation %d' % ( len( lfnList ), transID ) )
        result = self.transClient.addFilesToTransformation( transID, sorted( lfnList ) )
        if not result['OK']:
          gLogger.warn( "InputDataAgent.execute: failed to add lfns to transformation", result['Message'] )
          self.fileLog[transID] = 0
        else:
          if result['Value']['Failed']:
            for lfn, error in res['Value']['Failed'].items():
              gLogger.warn( "InputDataAgent.execute: Failed to add %s to transformation" % lfn, error )
          if result['Value']['Successful']:
            for lfn, status in result['Value']['Successful'].items():
              if status == 'Added':
                addedLfns.append( lfn )
            gLogger.info( "InputDataAgent.execute: Added %d files to transformation" % len( addedLfns ) )

    return S_OK()
Esempio n. 57
0
class InputDataAgent(AgentModule):

  #############################################################################
  def initialize(self):
    """ Make the necessary initializations """
    self.fileLog = {}
    self.timeLog = {}
    self.fullTimeLog = {}
    self.pollingTime = self.am_getOption('PollingTime',120)
    self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod',86400)
    gMonitor.registerActivity("Iteration","Agent Loops",AGENT_NAME,"Loops/min",gMonitor.OP_SUM)
    self.transClient = TransformationClient('TransformationDB')
    self.metadataClient = FileCatalogClient()
    return S_OK()

  ##############################################################################
  def execute(self):
    """ Main execution method
    """

    gMonitor.addMark('Iteration',1)
    # Get all the transformations
    result = self.transClient.getTransformations(condDict={'Status':'Active'})
    activeTransforms = []
    if not result['OK']:
      gLogger.error("InputDataAgent.execute: Failed to get transformations.", result['Message'])
      return S_OK()

    # Process each transformation
    for transDict in result['Value']:    
      transID = long(transDict['TransformationID'])
      res = self.transClient.getTransformationInputDataQuery(transID)
      if not res['OK']:
        if res['Message'] == 'No InputDataQuery found for transformation':
          gLogger.info("InputDataAgent.execute: No input data query found for transformation %d" % transID)
        else:
          gLogger.error("InputDataAgent.execute: Failed to get input data query for %d" % transID, res['Message'])
        continue
      inputDataQuery = res['Value']
        
      # Determine the correct time stamp to use for this transformation
      if self.timeLog.has_key(transID):
        if self.fullTimeLog.has_key(transID):
          # If it is more than a day since the last reduced query, make a full query just in case
          if (datetime.datetime.utcnow() - self.fullTimeLog[transID]) < datetime.timedelta(seconds=self.fullUpdatePeriod):
            timeStamp = self.timeLog[transID]
            inputDataQuery['StartDate'] = (timeStamp - datetime.timedelta(seconds=10)).strftime('%Y-%m-%d %H:%M:%S')
          else:
            self.fullTimeLog[transID] = datetime.datetime.utcnow()    
      self.timeLog[transID] = datetime.datetime.utcnow()
      if not self.fullTimeLog.has_key(transID):
        self.fullTimeLog[transID] = datetime.datetime.utcnow()

      # Perform the query to the metadata catalog
      gLogger.verbose("Using input data query for transformation %d: %s" % (transID,str(inputDataQuery)))
      start = time.time()              
      result = self.metadataClient.findFilesByMetadata(inputDataQuery)    
      rtime = time.time()-start    
      gLogger.verbose("Metadata catalog query time: %.2f seconds." % (rtime))
      if not result['OK']:
        gLogger.error("InputDataAgent.execute: Failed to get response from the metadata catalog", result['Message'])
        continue
      lfnList = result['Value']   

      # Check if the number of files has changed since the last cycle
      nlfns = len(lfnList)
      gLogger.info("%d files returned for transformation %d from the metadata catalog" % (nlfns,int(transID)) )
      if self.fileLog.has_key(transID):
        if nlfns == self.fileLog[transID]:
          gLogger.verbose('No new files in metadata catalog since last check')
      self.fileLog[transID] = nlfns

      # Add any new files to the transformation
      addedLfns = []
      if lfnList:
        gLogger.verbose('Processing %d lfns for transformation %d' % (len(lfnList),transID) )
        # Add the files to the transformation
        gLogger.verbose('Adding %d lfns for transformation %d' % (len(lfnList),transID) )
        result = self.transClient.addFilesToTransformation(transID,sortList(lfnList))
        if not result['OK']:
          gLogger.warn("InputDataAgent.execute: failed to add lfns to transformation", result['Message'])   
          self.fileLog[transID] = 0
        else:
          if result['Value']['Failed']:
            for lfn,error in res['Value']['Failed'].items():
              gLogger.warn("InputDataAgent.execute: Failed to add %s to transformation" % lfn,error)
          if result['Value']['Successful']:
            for lfn,status in result['Value']['Successful'].items():
              if status == 'Added':
                addedLfns.append(lfn)
            gLogger.info("InputDataAgent.execute: Added %d files to transformation" % len(addedLfns))
            
    return S_OK()