Example #1
0
def main():
  Script.parseCommandLine()

  from DIRAC.ProductionSystem.Client.ProductionClient import ProductionClient
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient

  prodClient = ProductionClient()
  transClient = TransformationClient()

  # get arguments
  args = Script.getPositionalArgs()
  if len(args) == 3:
    parentTransID = args[2]
  elif len(args) == 2:
    parentTransID = ''
  else:
    Script.showHelp(exitCode=1)

  prodID = args[0]
  transID = args[1]

  res = transClient.getTransformation(transID)
  if not res['OK']:
    DIRAC.gLogger.error('Failed to get transformation %s: %s' % (transID, res['Message']))
    DIRAC.exit(-1)

  transID = res['Value']['TransformationID']

  if parentTransID:
    res = transClient.getTransformation(parentTransID)
    if not res['OK']:
      DIRAC.gLogger.error('Failed to get transformation %s: %s' % (parentTransID, res['Message']))
      DIRAC.exit(-1)
    parentTransID = res['Value']['TransformationID']

  res = prodClient.getProduction(prodID)
  if not res['OK']:
    DIRAC.gLogger.error('Failed to get production %s: %s' % (prodID, res['Message']))
    DIRAC.exit(-1)

  prodID = res['Value']['ProductionID']
  res = prodClient.addTransformationsToProduction(prodID, transID, parentTransID)
  if not res['OK']:
    DIRAC.gLogger.error(res['Message'])
    DIRAC.exit(-1)

  if parentTransID:
    msg = 'Transformation %s successfully added to production %s with parent transformation %s' % \
          (transID, prodID, parentTransID)
  else:
    msg = 'Transformation %s successfully added to production %s with no parent transformation' %  \
          (transID, prodID)

  DIRAC.gLogger.notice(msg)

  DIRAC.exit(0)
Example #2
0
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument("prodID:         Production ID")
    Script.registerArgument("transID:        Transformation ID")
    Script.registerArgument("parentTransID:  Parent Transformation ID", default="", mandatory=False)
    _, args = Script.parseCommandLine()

    from DIRAC.ProductionSystem.Client.ProductionClient import ProductionClient
    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient

    prodClient = ProductionClient()
    transClient = TransformationClient()

    # get arguments
    prodID, transID, parentTransID = Script.getPositionalArgs(group=True)
    if len(args) > 3:
        Script.showHelp(exitCode=1)

    res = transClient.getTransformation(transID)
    if not res["OK"]:
        DIRAC.gLogger.error("Failed to get transformation %s: %s" % (transID, res["Message"]))
        DIRAC.exit(-1)

    transID = res["Value"]["TransformationID"]

    if parentTransID:
        res = transClient.getTransformation(parentTransID)
        if not res["OK"]:
            DIRAC.gLogger.error("Failed to get transformation %s: %s" % (parentTransID, res["Message"]))
            DIRAC.exit(-1)
        parentTransID = res["Value"]["TransformationID"]

    res = prodClient.getProduction(prodID)
    if not res["OK"]:
        DIRAC.gLogger.error("Failed to get production %s: %s" % (prodID, res["Message"]))
        DIRAC.exit(-1)

    prodID = res["Value"]["ProductionID"]
    res = prodClient.addTransformationsToProduction(prodID, transID, parentTransID)
    if not res["OK"]:
        DIRAC.gLogger.error(res["Message"])
        DIRAC.exit(-1)

    if parentTransID:
        msg = "Transformation %s successfully added to production %s with parent transformation %s" % (
            transID,
            prodID,
            parentTransID,
        )
    else:
        msg = "Transformation %s successfully added to production %s with no parent transformation" % (transID, prodID)

    DIRAC.gLogger.notice(msg)

    DIRAC.exit(0)
  def _getProdInfoFromIDs(self):
    """get the processName, energy and eventsPerJob from the MetaData catalog

    :raises: AttributeError if some of the information cannot be found
    :returns: None
    """
    if not self.prodIDs:
      raise AttributeError("No prodIDs defined")

    self.eventsPerJobs = []
    self.processes = []
    self.energies = []
    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
    trc = TransformationClient()
    fc = FileCatalogClient()
    for prodID in self.prodIDs:
      gLogger.notice("Getting information for %s" % prodID)
      tRes = trc.getTransformation(str(prodID))
      if not tRes['OK']:
        raise AttributeError("No prodInfo found for %s" % prodID)
      self.eventsPerJobs.append(int(tRes['Value']['EventsPerTask']))
      lfnRes = fc.findFilesByMetadata({'ProdID': prodID})
      if not lfnRes['OK'] or not lfnRes['Value']:
        raise AttributeError("Could not find files for %s: %s " % (prodID, lfnRes.get('Message', lfnRes.get('Value'))))
      path = os.path.dirname(lfnRes['Value'][0])
      fileRes = fc.getDirectoryUserMetadata(path)
      self.processes.append(fileRes['Value']['EvtType'])
      self.energies.append(fileRes['Value']['Energy'])
      gLogger.notice("Found (Evts,Type,Energy): %s %s %s " %
                     (self.eventsPerJobs[-1], self.processes[-1], self.energies[-1]))
Example #4
0
def _extend():
    """Extends all the tasks"""
    clip = _Params()
    clip.registerSwitches()
    Script.parseCommandLine()

    from DIRAC import gLogger, exit as dexit

    if not clip.prod or not clip.tasks:
        gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
        dexit(1)

    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    tc = TransformationClient()
    res = tc.getTransformation(clip.prod)
    trans = res['Value']
    transp = trans['Plugin']
    if transp != 'Limited':
        gLogger.error(
            "This cannot be used on productions that are not using the 'Limited' plugin"
        )
        dexit(0)

    gLogger.info("Prod %s has %s tasks registered" %
                 (clip.prod, trans['MaxNumberOfTasks']))
    if clip.tasks > 0:
        max_tasks = trans['MaxNumberOfTasks'] + clip.tasks
        groupsize = trans['GroupSize']
        gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %
                       (clip.tasks, clip.tasks * groupsize, clip.prod))
    elif clip.tasks < 0:
        max_tasks = -1
        gLogger.notice(
            "Now all existing files in the DB for production %s will be processed."
            % clip.prod)
    else:
        gLogger.error("Number of tasks must be different from 0")
        dexit(1)
    res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks',
                                        max_tasks)
    if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
    gLogger.notice("Production %s extended!" % clip.prod)

    dexit(0)
def _getLogFolderFromID( clip ):
  """Obtain the folder of the logfiles from the prodID

  Fills the clip.logD variable
  """
  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient

  ## Check if transformation exists and get its type
  server = TransformationClient()
  result = server.getTransformation( clip.prodid )
  if not result['OK']:
    return result
  transType = result['Value']['Type']
  query = { 'ProdID' : clip.prodid }
  if 'Reconstruction' in transType:
    query['Datatype'] = 'REC'

  result = FileCatalogClient().findFilesByMetadata( query, '/' )
  if not result['OK']:
    return result

  elif result['Value']:
    lfns = result['Value']
    baseLFN = "/".join( lfns[0].split( '/' )[:-2] )
    if not clip.getAllSubdirs:
      lfns = lfns[:1]
    clip.logD = []
    lastdir = ""
    for lfn in lfns:
      subFolderNumber = lfn.split( '/' )[-2]
      logdir = os.path.join( baseLFN, 'LOG', subFolderNumber ) 
      if lastdir != logdir:
        gLogger.notice( 'Setting logdir to %s' % logdir )
        clip.logD.append(logdir) 
        lastdir=logdir

  else:
    return S_ERROR( "Cannot discover the LogFilePath: No output files yet" )

  return S_OK()
Example #6
0
def _getLogFolderFromID( clip ):
  """Obtain the folder of the logfiles from the prodID

  Fills the clip.logD variable
  """
  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient

  ## Check if transformation exists and get its type
  server = TransformationClient()
  result = server.getTransformation( clip.prodid )
  if not result['OK']:
    return result
  transType = result['Value']['Type']
  query = { 'ProdID' : clip.prodid }
  if 'Reconstruction' in transType:
    query['Datatype'] = 'REC'

  result = FileCatalogClient().findFilesByMetadata( query, '/' )
  if not result['OK']:
    return result

  elif result['Value']:
    lfns = result['Value']
    baseLFN = "/".join( lfns[0].split( '/' )[:-2] )
    if not clip.getAllSubdirs:
      lfns = lfns[:1]
    clip.logD = []
    lastdir = ""
    for lfn in lfns:
      subFolderNumber = lfn.split( '/' )[-2]
      logdir = os.path.join( baseLFN, 'LOG', subFolderNumber ) 
      if lastdir != logdir:
        gLogger.notice( 'Setting logdir to %s' % logdir )
        clip.logD.append(logdir) 
        lastdir=logdir

  else:
    return S_ERROR( "Cannot discover the LogFilePath: No output files yet" )

  return S_OK()
def _extend():
  """Extends all the tasks"""
  clip = _Params()
  clip.registerSwitches()
  Script.parseCommandLine()
  
  from DIRAC import gLogger, exit as dexit
  
  if not clip.prod or not clip.tasks:
    gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
    dexit(1)
    
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()
  res = tc.getTransformation(clip.prod)
  trans= res['Value']
  transp = trans['Plugin']
  if transp != 'Limited':
    gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin")
    dexit(0)
  
  gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) )
  if clip.tasks >0:
    max_tasks = trans['MaxNumberOfTasks'] + clip.tasks  
    groupsize = trans['GroupSize']
    gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod))
  elif clip.tasks <0:
    max_tasks = -1
    gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod)
  else:
    gLogger.error("Number of tasks must be different from 0")
    dexit(1)
  res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks)
  if not res['OK']:
    gLogger.error(res['Message'])
    dexit(1)
  gLogger.notice("Production %s extended!" % clip.prod)
    
  dexit(0)
Example #8
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
    """ Usually subclass of AgentModule
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        # few parameters
        self.pluginLocation = ''
        self.transformationStatus = []
        self.maxFiles = 0
        self.transformationTypes = []

        # clients (out of the threads)
        self.transfClient = None

        # parameters for the threading
        self.transQueue = Queue.Queue()
        self.transInQueue = []

        # parameters for caching
        self.workDirectory = ''
        self.cacheFile = ''
        self.controlDirectory = ''

        self.lastFileOffset = {}
        # Validity of the cache
        self.replicaCache = None
        self.replicaCacheValidity = None
        self.writingCache = False
        self.removedFromCache = 0

        self.noUnusedDelay = 0
        self.unusedFiles = {}
        self.unusedTimeStamp = {}

        self.debug = False
        self.transInThread = {}
        self.pluginTimeout = {}

    def initialize(self):
        """ standard initialize
    """
        # few parameters
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        # Prepare to change the name of the CS option as MaxFiles is ambiguous
        self.maxFiles = self.am_getOption('MaxFilesToProcess',
                                          self.am_getOption('MaxFiles', 5000))

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)

        # clients
        self.transfClient = TransformationClient()

        # for caching using a pickle file
        self.workDirectory = self.am_getWorkDirectory()
        self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
        self.controlDirectory = self.am_getControlDirectory()

        # remember the offset if any in TS
        self.lastFileOffset = {}

        # Validity of the cache
        self.replicaCache = {}
        self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity',
                                                      2)

        self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

        # Get it threaded
        maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        self.log.info("Will treat the following transformation types: %s" %
                      str(self.transformationTypes))

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        method = 'finalize'
        if self.transInQueue:
            self.transInQueue = []
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread),
                method=method)
            self._logInfo('Remaining transformations:',
                          ','.join(
                              str(transID) for transID in self.transInThread),
                          method=method)
            while self.transInThread:
                time.sleep(2)
            self._logInfo("Threads are empty, terminating the agent...",
                          method=method)
        self.__writeCache()
        return S_OK()

    def execute(self):
        """ Just puts transformations in the queue
    """
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            self._logError("Failed to obtain transformations:", res['Message'])
            return S_OK()
        # Process the transformations
        count = 0
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            if transDict.get('InheritedFrom'):
                # Try and move datasets from the ancestor production
                res = self.transfClient.moveFilesToDerivedTransformation(
                    transDict)
                if not res['OK']:
                    self._logError(
                        "Error moving files from an inherited transformation",
                        res['Message'],
                        transID=transID)
                else:
                    parentProd, movedFiles = res['Value']
                    if movedFiles:
                        self._logInfo(
                            "Successfully moved files from %d to %d:" %
                            (parentProd, transID),
                            transID=transID)
                        for status, val in movedFiles.iteritems():
                            self._logInfo("\t%d files to status %s" %
                                          (val, status),
                                          transID=transID)
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put(transDict)
        self._logInfo("Out of %d transformations, %d put in thread queue" %
                      (len(res['Value']), count))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
        transName = self.am_getOption('Transformation', 'All')
        method = 'getTransformations'
        if transName == 'All':
            self._logInfo("Getting all transformations%s, status %s." %
                          (' of type %s' % str(self.transformationTypes)
                           if self.transformationTypes else '',
                           str(self.transformationStatus)),
                          method=method)
            transfDict = {'Status': self.transformationStatus}
            if self.transformationTypes:
                transfDict['Type'] = self.transformationTypes
            res = self.transfClient.getTransformations(transfDict,
                                                       extraParams=True)
            if not res['OK']:
                return res
            transformations = res['Value']
            self._logInfo("Obtained %d transformations to process" %
                          len(transformations),
                          method=method)
        else:
            self._logInfo("Getting transformation %s." % transName,
                          method=method)
            res = self.transfClient.getTransformation(transName,
                                                      extraParams=True)
            if not res['OK']:
                self._logError("Failed to get transformation:",
                               res['Message'],
                               method=method)
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def _getClients(self):
        """ returns the clients used in the threads
    """
        threadTransformationClient = TransformationClient()
        threadDataManager = DataManager()

        return {
            'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager
        }

    def _execute(self, threadID):
        """ thread - does the real job: processing the transformations to be processed
    """

        # Each thread will have its own clients
        clients = self._getClients()

        while True:
            transDict = self.transQueue.get()
            try:
                transID = long(transDict['TransformationID'])
                if transID not in self.transInQueue:
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Processing transformation %s." % transID,
                              transID=transID)
                startTime = time.time()
                res = self.processTransformation(transDict, clients)
                if not res['OK']:
                    self._logInfo("Failed to process transformation:",
                                  res['Message'],
                                  transID=transID)
            except Exception as x:  # pylint: disable=broad-except
                self._logException('Exception in plugin',
                                   lException=x,
                                   transID=transID)
            finally:
                if not transID:
                    transID = 'None'
                self._logInfo("Processed transformation in %.1f seconds" %
                              (time.time() - startTime),
                              transID=transID)
                if transID in self.transInQueue:
                    self.transInQueue.remove(transID)
                self.transInThread.pop(transID, None)
                self._logVerbose("%d transformations still in queue" %
                                 len(self.transInQueue))
        return S_OK()

    def processTransformation(self, transDict, clients):
        """ process a single transformation (in transDict)
    """
        method = 'processTransformation'
        transID = transDict['TransformationID']
        forJobs = transDict['Type'].lower() not in ('replication', 'removal')

        # First get the LFNs associated to the transformation
        transFiles = self._getTransformationFiles(
            transDict, clients, replicateOrRemove=not forJobs)
        if not transFiles['OK']:
            return transFiles
        if not transFiles['Value']:
            return S_OK()

        if transID not in self.replicaCache:
            self.__readCache(transID)
        transFiles = transFiles['Value']
        unusedLfns = [f['LFN'] for f in transFiles]
        unusedFiles = len(unusedLfns)

        plugin = transDict.get('Plugin', 'Standard')
        # Limit the number of LFNs to be considered for replication or removal as they are treated individually
        if not forJobs:
            maxFiles = Operations().getValue(
                'TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
            # Get plugin-specific limit in number of files (0 means no limit)
            totLfns = len(unusedLfns)
            lfnsToProcess = self.__applyReduction(unusedLfns,
                                                  maxFiles=maxFiles)
            if len(lfnsToProcess) != totLfns:
                self._logInfo("Reduced number of files from %d to %d" %
                              (totLfns, len(lfnsToProcess)),
                              method=method,
                              transID=transID)
                transFiles = [
                    f for f in transFiles if f['LFN'] in lfnsToProcess
                ]
        else:
            lfnsToProcess = unusedLfns

        # Check the data is available with replicas
        res = self.__getDataReplicas(transDict,
                                     lfnsToProcess,
                                     clients,
                                     forJobs=forJobs)
        if not res['OK']:
            self._logError("Failed to get data replicas:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        self._logInfo("Processing transformation with '%s' plug-in." % plugin,
                      method=method,
                      transID=transID)
        res = self.__generatePluginObject(plugin, clients)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.run()
        if not res['OK']:
            self._logError("Failed to generate tasks for transformation:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        tasks = res['Value']
        self.pluginTimeout[transID] = res.get('Timeout', False)
        # Create the tasks
        allCreated = True
        created = 0
        lfnsInTasks = []
        for se, lfns in tasks:
            res = clients['TransformationClient'].addTaskForTransformation(
                transID, lfns, se)
            if not res['OK']:
                self._logError("Failed to add task generated by plug-in:",
                               res['Message'],
                               method=method,
                               transID=transID)
                allCreated = False
            else:
                created += 1
                lfnsInTasks += [lfn for lfn in lfns if lfn in lfnsToProcess]
        if created:
            self._logInfo("Successfully created %d tasks for transformation." %
                          created,
                          method=method,
                          transID=transID)
        else:
            self._logInfo("No new tasks created for transformation.",
                          method=method,
                          transID=transID)
        self.unusedFiles[transID] = unusedFiles - len(lfnsInTasks)
        # If not all files were obtained, move the offset
        lastOffset = self.lastFileOffset.get(transID)
        if lastOffset:
            self.lastFileOffset[transID] = max(0,
                                               lastOffset - len(lfnsInTasks))
        self.__removeFilesFromCache(transID, lfnsInTasks)

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = clients['TransformationClient'].setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                self._logError(
                    "Failed to update transformation status to 'Active':",
                    res['Message'],
                    method=method,
                    transID=transID)
            else:
                self._logInfo("Updated transformation status to 'Active'.",
                              method=method,
                              transID=transID)
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def _getTransformationFiles(self,
                                transDict,
                                clients,
                                statusList=None,
                                replicateOrRemove=False):
        """ get the data replicas for a certain transID
    """
        # By default, don't skip if no new Unused for DM transformations
        skipIfNoNewUnused = not replicateOrRemove
        transID = transDict['TransformationID']
        plugin = transDict.get('Plugin', 'Standard')
        # Check if files should be sorted and limited in number
        operations = Operations()
        sortedBy = operations.getValue(
            'TransformationPlugins/%s/SortedBy' % plugin, None)
        maxFiles = operations.getValue(
            'TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
        # If the NoUnuse delay is explicitly set, we want to take it into account, and skip if no new Unused
        if operations.getValue(
                'TransformationPlugins/%s/NoUnusedDelay' % plugin, 0):
            skipIfNoNewUnused = True
        noUnusedDelay = 0 if self.pluginTimeout.get(transID, False) else \
            operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, self.noUnusedDelay)
        method = '_getTransformationFiles'
        lastOffset = self.lastFileOffset.setdefault(transID, 0)

        # Files that were problematic (either explicit or because SE was banned) may be recovered,
        # and always removing the missing ones
        if not statusList:
            statusList = ['Unused', 'ProbInFC']
        statusList += ['MissingInFC'] if transDict['Type'] == 'Removal' else []
        transClient = clients['TransformationClient']
        res = transClient.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': statusList
        },
                                                 orderAttribute=sortedBy,
                                                 offset=lastOffset,
                                                 maxfiles=maxFiles)
        if not res['OK']:
            self._logError("Failed to obtain input data:",
                           res['Message'],
                           method=method,
                           transID=transID)
            return res
        transFiles = res['Value']
        if maxFiles and len(transFiles) == maxFiles:
            self.lastFileOffset[transID] += maxFiles
        else:
            del self.lastFileOffset[transID]

        if not transFiles:
            self._logInfo("No '%s' files found for transformation." %
                          ','.join(statusList),
                          method=method,
                          transID=transID)
            if transDict['Status'] == 'Flush':
                res = transClient.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    self._logError(
                        "Failed to update transformation status to 'Active':",
                        res['Message'],
                        method=method,
                        transID=transID)
                else:
                    self._logInfo("Updated transformation status to 'Active'.",
                                  method=method,
                                  transID=transID)
            return S_OK()
        # Check if transformation is kicked
        kickFile = os.path.join(self.controlDirectory,
                                'KickTransformation_%s' % str(transID))
        try:
            kickTrans = os.path.exists(kickFile)
            if kickTrans:
                os.remove(kickFile)
        except OSError:
            pass

        # Check if something new happened
        now = datetime.datetime.utcnow()
        if not kickTrans and skipIfNoNewUnused and noUnusedDelay:
            nextStamp = self.unusedTimeStamp.setdefault(
                transID, now) + datetime.timedelta(hours=noUnusedDelay)
            skip = now < nextStamp
            if len(transFiles) == self.unusedFiles.get(
                    transID, 0) and transDict['Status'] != 'Flush' and skip:
                self._logInfo("No new '%s' files found for transformation." %
                              ','.join(statusList),
                              method=method,
                              transID=transID)
                return S_OK()

        self.unusedTimeStamp[transID] = now
        # If files are not Unused, set them Unused
        notUnused = [
            trFile['LFN'] for trFile in transFiles
            if trFile['Status'] != 'Unused'
        ]
        otherStatuses = sorted(
            set([trFile['Status'] for trFile in transFiles]) - set(['Unused']))
        if notUnused:
            res = transClient.setFileStatusForTransformation(transID,
                                                             'Unused',
                                                             notUnused,
                                                             force=True)
            if not res['OK']:
                self._logError("Error setting %d files Unused:" %
                               len(notUnused),
                               res['Message'],
                               method=method,
                               transID=transID)
            else:
                self._logInfo("Set %d files from %s to Unused" %
                              (len(notUnused), ','.join(otherStatuses)))
                self.__removeFilesFromCache(transID, notUnused)
        return S_OK(transFiles)

    def __applyReduction(self, lfns, maxFiles=None):
        """ eventually remove the number of files to be considered
    """
        if maxFiles is None:
            maxFiles = self.maxFiles
        if not maxFiles or len(lfns) <= maxFiles:
            return lfns
        return randomize(lfns)[:maxFiles]

    def __getDataReplicas(self, transDict, lfns, clients, forJobs=True):
        """ Get the replicas for the LFNs and check their statuses. It first looks within the cache.
    """
        method = '__getDataReplicas'
        transID = transDict['TransformationID']
        if 'RemoveFile' in transDict['Body']:
            # When removing files, we don't care about their replicas
            return S_OK(dict.fromkeys(lfns, ['None']))
        clearCacheFile = os.path.join(self.controlDirectory,
                                      'ClearCache_%s' % str(transID))
        try:
            clearCache = os.path.exists(clearCacheFile)
            if clearCache:
                os.remove(clearCacheFile)
        except:
            pass
        if clearCache or transDict['Status'] == 'Flush':
            self._logInfo("Replica cache cleared",
                          method=method,
                          transID=transID)
            # We may need to get new replicas
            self.__clearCacheForTrans(transID)
        else:
            # If the cache needs to be cleaned
            self.__cleanCache(transID)
        startTime = time.time()
        dataReplicas = {}
        nLfns = len(lfns)
        self._logVerbose("Getting replicas for %d files" % nLfns,
                         method=method,
                         transID=transID)
        cachedReplicaSets = self.replicaCache.get(transID, {})
        cachedReplicas = {}
        # Merge all sets of replicas
        for replicas in cachedReplicaSets.itervalues():
            cachedReplicas.update(replicas)
        self._logInfo("Number of cached replicas: %d" % len(cachedReplicas),
                      method=method,
                      transID=transID)
        setCached = set(cachedReplicas)
        setLfns = set(lfns)
        for lfn in setLfns & setCached:
            dataReplicas[lfn] = cachedReplicas[lfn]
        newLFNs = setLfns - setCached
        self._logInfo("ReplicaCache hit for %d out of %d LFNs" %
                      (len(dataReplicas), nLfns),
                      method=method,
                      transID=transID)
        if newLFNs:
            startTime = time.time()
            self._logInfo("Getting replicas for %d files from catalog" %
                          len(newLFNs),
                          method=method,
                          transID=transID)
            newReplicas = {}
            for chunk in breakListIntoChunks(newLFNs, 10000):
                res = self._getDataReplicasDM(transID,
                                              chunk,
                                              clients,
                                              forJobs=forJobs)
                if res['OK']:
                    reps = dict((lfn, ses)
                                for lfn, ses in res['Value'].iteritems()
                                if ses)
                    newReplicas.update(reps)
                    self.__updateCache(transID, reps)
                else:
                    self._logWarn("Failed to get replicas for %d files" %
                                  len(chunk),
                                  res['Message'],
                                  method=method,
                                  transID=transID)

            self._logInfo("Obtained %d replicas from catalog in %.1f seconds" %
                          (len(newReplicas), time.time() - startTime),
                          method=method,
                          transID=transID)
            dataReplicas.update(newReplicas)
            noReplicas = newLFNs - set(dataReplicas)
            self.__writeCache(transID)
            if noReplicas:
                self._logWarn(
                    "Found %d files without replicas (or only in Failover)" %
                    len(noReplicas),
                    method=method,
                    transID=transID)
        return S_OK(dataReplicas)

    def _getDataReplicasDM(self,
                           transID,
                           lfns,
                           clients,
                           forJobs=True,
                           ignoreMissing=False):
        """ Get the replicas for the LFNs and check their statuses, using the replica manager
    """
        method = '_getDataReplicasDM'

        startTime = time.time()
        self._logVerbose("Getting replicas%s from catalog for %d files" %
                         (' for jobs' if forJobs else '', len(lfns)),
                         method=method,
                         transID=transID)
        if forJobs:
            # Get only replicas eligible for jobs
            res = clients['DataManager'].getReplicasForJobs(lfns, getUrl=False)
        else:
            # Get all replicas
            res = clients['DataManager'].getReplicas(lfns, getUrl=False)
        if not res['OK']:
            return res
        replicas = res['Value']
        # Prepare a dictionary for all LFNs
        dataReplicas = {}
        self._logVerbose(
            "Replica results for %d files obtained in %.2f seconds" %
            (len(lfns), time.time() - startTime),
            method=method,
            transID=transID)
        # If files are neither Successful nor Failed, they are set problematic in the FC
        problematicLfns = [
            lfn for lfn in lfns if lfn not in replicas['Successful']
            and lfn not in replicas['Failed']
        ]
        if problematicLfns:
            self._logInfo(
                "%d files found problematic in the catalog, set ProbInFC" %
                len(problematicLfns))
            res = clients[
                'TransformationClient'].setFileStatusForTransformation(
                    transID, 'ProbInFC', problematicLfns)
            if not res['OK']:
                self._logError("Failed to update status of problematic files:",
                               res['Message'],
                               method=method,
                               transID=transID)
        # Create a dictionary containing all the file replicas
        failoverLfns = []
        for lfn, replicaDict in replicas['Successful'].iteritems():
            for se in replicaDict:
                # This sremains here for backward compatibility in case VOs have not defined SEs not to be used for jobs
                if forJobs and 'failover' in se.lower():
                    self._logVerbose("Ignoring failover replica for %s." % lfn,
                                     method=method,
                                     transID=transID)
                else:
                    dataReplicas.setdefault(lfn, []).append(se)
            if not dataReplicas.get(lfn):
                failoverLfns.append(lfn)
        if failoverLfns:
            self._logVerbose(
                "%d files have no replica but possibly in Failover SE" %
                len(failoverLfns))
        # Make sure that file missing from the catalog are marked in the transformation DB.
        missingLfns = []
        for lfn, reason in replicas['Failed'].iteritems():
            if "No such file or directory" in reason:
                self._logVerbose("%s not found in the catalog." % lfn,
                                 method=method,
                                 transID=transID)
                missingLfns.append(lfn)
        if missingLfns:
            self._logInfo("%d files not found in the catalog" %
                          len(missingLfns))
            if ignoreMissing:
                dataReplicas.update(dict.fromkeys(missingLfns, []))
            else:
                res = clients[
                    'TransformationClient'].setFileStatusForTransformation(
                        transID, 'MissingInFC', missingLfns)
                if not res['OK']:
                    self._logError("Failed to update status of missing files:",
                                   res['Message'],
                                   method=method,
                                   transID=transID)
        return S_OK(dataReplicas)

    def __updateCache(self, transID, newReplicas):
        """ Add replicas to the cache
    """
        self.replicaCache.setdefault(
            transID, {})[datetime.datetime.utcnow()] = newReplicas


#    if len( newReplicas ) > 5000:
#      self.__writeCache( transID )

    def __clearCacheForTrans(self, transID):
        """ Remove all replicas for a transformation
    """
        self.replicaCache.pop(transID, None)

    def __cleanReplicas(self, transID, lfns):
        """ Remove cached replicas that are not in a list
    """
        cachedReplicas = set()
        for replicas in self.replicaCache.get(transID, {}).itervalues():
            cachedReplicas.update(replicas)
        toRemove = cachedReplicas - set(lfns)
        if toRemove:
            self._logInfo("Remove %d files from cache" % len(toRemove),
                          method='__cleanReplicas',
                          transID=transID)
            self.__removeFromCache(transID, toRemove)

    def __cleanCache(self, transID):
        """ Cleans the cache
    """
        try:
            if transID in self.replicaCache:
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                    days=self.replicaCacheValidity)
                for updateTime in set(self.replicaCache[transID]):
                    nCache = len(self.replicaCache[transID][updateTime])
                    if updateTime < timeLimit or not nCache:
                        self._logInfo(
                            "Clear %s replicas for transformation %s, time %s"
                            %
                            ('%d cached' % nCache if nCache else 'empty cache',
                             str(transID), str(updateTime)),
                            transID=transID,
                            method='__cleanCache')
                        del self.replicaCache[transID][updateTime]
                # Remove empty transformations
                if not self.replicaCache[transID]:
                    del self.replicaCache[transID]
        except Exception as x:
            self._logException("Exception when cleaning replica cache:",
                               lException=x)

    def __removeFilesFromCache(self, transID, lfns):
        removed = self.__removeFromCache(transID, lfns)
        if removed:
            self._logInfo("Removed %d replicas from cache" % removed,
                          method='__removeFilesFromCache',
                          transID=transID)
            self.__writeCache(transID)

    def __removeFromCache(self, transID, lfns):
        if transID not in self.replicaCache:
            return
        removed = 0
        if self.replicaCache[transID] and lfns:
            for lfn in lfns:
                for timeKey in self.replicaCache[transID]:
                    if self.replicaCache[transID][timeKey].pop(lfn, None):
                        removed += 1
        return removed

    def __cacheFile(self, transID):
        return self.cacheFile.replace('.pkl', '_%s.pkl' % str(transID))

    @gSynchro
    def __readCache(self, transID):
        """ Reads from the cache
    """
        if transID in self.replicaCache:
            return
        try:
            method = '__readCache'
            fileName = self.__cacheFile(transID)
            if not os.path.exists(fileName):
                self.replicaCache[transID] = {}
            else:
                with open(fileName, 'r') as cacheFile:
                    self.replicaCache[transID] = pickle.load(cacheFile)
                self._logInfo(
                    "Successfully loaded replica cache from file %s (%d files)"
                    % (fileName, self.__filesInCache(transID)),
                    method=method,
                    transID=transID)
        except Exception as x:
            self._logException("Failed to load replica cache from file %s" %
                               fileName,
                               lException=x,
                               method=method,
                               transID=transID)
            self.replicaCache[transID] = {}

    def __filesInCache(self, transID):
        cache = self.replicaCache.get(transID, {})
        return sum(len(lfns) for lfns in cache.itervalues())

    @gSynchro
    def __writeCache(self, transID=None):
        """ Writes the cache
    """
        method = '__writeCache'
        try:
            startTime = time.time()
            transList = [transID] if transID else set(self.replicaCache)
            filesInCache = 0
            nCache = 0
            for t_id in transList:
                # Protect the copy of the cache
                filesInCache += self.__filesInCache(t_id)
                # write to a temporary file in order to avoid corrupted files
                cacheFile = self.__cacheFile(t_id)
                tmpFile = cacheFile + '.tmp'
                with open(tmpFile, 'w') as fd:
                    pickle.dump(self.replicaCache.get(t_id, {}), fd)
                # Now rename the file as it shold
                os.rename(tmpFile, cacheFile)
                nCache += 1
            self._logInfo(
                "Successfully wrote %d replica cache file(s) (%d files) in %.1f seconds"
                % (nCache, filesInCache, time.time() - startTime),
                method=method,
                transID=transID if transID else None)
        except Exception as x:
            self._logException("Could not write replica cache file %s" %
                               cacheFile,
                               lException=x,
                               method=method,
                               transID=t_id)

    def __generatePluginObject(self, plugin, clients):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except ImportError as e:
            self._logException("Failed to import 'TransformationPlugin' %s" %
                               plugin,
                               lException=e,
                               method="__generatePluginObject")
            return S_ERROR()
        try:
            plugin_o = getattr(plugModule, 'TransformationPlugin')(
                '%s' % plugin,
                transClient=clients['TransformationClient'],
                dataManager=clients['DataManager'])
            return S_OK(plugin_o)
        except AttributeError as e:
            self._logException("Failed to create %s()" % plugin,
                               lException=e,
                               method="__generatePluginObject")
            return S_ERROR()
        plugin_o.setDirectory(self.workDirectory)
        plugin_o.setCallback(self.pluginCallback)

    def pluginCallback(self, transID, invalidateCache=False):
        """ Standard plugin callback
    """
        if invalidateCache:
            try:
                if transID in self.replicaCache:
                    self._logInfo("Removed cached replicas for transformation",
                                  method='pluginCallBack',
                                  transID=transID)
                    self.replicaCache.pop(transID)
                    self.__writeCache(transID)
            except:
                pass
Example #9
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
  """ Usually subclass of AgentModule
  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)
    TransformationAgentsUtilities.__init__(self)

    # few parameters
    self.pluginLocation = ''
    self.transformationStatus = []
    self.maxFiles = 0
    self.transformationTypes = []

    # clients (out of the threads)
    self.transfClient = None

    # parameters for the threading
    self.transQueue = Queue.Queue()
    self.transInQueue = []

    # parameters for caching
    self.workDirectory = ''
    self.cacheFile = ''
    self.controlDirectory = ''

    self.lastFileOffset = {}
    # Validity of the cache
    self.replicaCache = None
    self.replicaCacheValidity = None
    self.writingCache = False
    self.removedFromCache = 0

    self.noUnusedDelay = 0
    self.unusedFiles = {}
    self.unusedTimeStamp = {}

    self.debug = False
    self.transInThread = {}
    self.pluginTimeout = {}

  def initialize(self):
    """ standard initialize
    """
    # few parameters
    self.pluginLocation = self.am_getOption('PluginLocation',
                                            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
    self.transformationStatus = self.am_getOption('transformationStatus', ['Active', 'Completing', 'Flush'])
    # Prepare to change the name of the CS option as MaxFiles is ambiguous
    self.maxFiles = self.am_getOption('MaxFilesToProcess', self.am_getOption('MaxFiles', 5000))

    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      dataProc = Operations().getValue('Transformations/DataProcessing', ['MCSimulation', 'Merge'])
      dataManip = Operations().getValue('Transformations/DataManipulation', ['Replication', 'Removal'])
      self.transformationTypes = sorted(dataProc + dataManip)

    # clients
    self.transfClient = TransformationClient()

    # for caching using a pickle file
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
    self.controlDirectory = self.am_getControlDirectory()

    # remember the offset if any in TS
    self.lastFileOffset = {}

    # Validity of the cache
    self.replicaCache = {}
    self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity', 2)

    self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

    # Get it threaded
    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
    self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    self.log.info("Will treat the following transformation types: %s" % str(self.transformationTypes))

    return S_OK()

  def finalize(self):
    """ graceful finalization
    """
    method = 'finalize'
    if self.transInQueue:
      self.transInQueue = []
      self._logInfo("Wait for threads to get empty before terminating the agent (%d tasks)" %
                    len(self.transInThread), method=method)
      self._logInfo('Remaining transformations:',
                    ','.join(str(transID) for transID in self.transInThread), method=method)
      while self.transInThread:
        time.sleep(2)
      self._logInfo("Threads are empty, terminating the agent...", method=method)
    self.__writeCache()
    return S_OK()

  def execute(self):
    """ Just puts transformations in the queue
    """
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      self._logError("Failed to obtain transformations:", res['Message'])
      return S_OK()
    # Process the transformations
    count = 0
    for transDict in res['Value']:
      transID = long(transDict['TransformationID'])
      if transDict.get('InheritedFrom'):
        # Try and move datasets from the ancestor production
        res = self.transfClient.moveFilesToDerivedTransformation(transDict)
        if not res['OK']:
          self._logError("Error moving files from an inherited transformation", res['Message'], transID=transID)
        else:
          parentProd, movedFiles = res['Value']
          if movedFiles:
            self._logInfo("Successfully moved files from %d to %d:" % (parentProd, transID), transID=transID)
            for status, val in movedFiles.iteritems():
              self._logInfo("\t%d files to status %s" % (val, status), transID=transID)
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append(transID)
        self.transQueue.put(transDict)
    self._logInfo("Out of %d transformations, %d put in thread queue" % (len(res['Value']), count))
    return S_OK()

  def getTransformations(self):
    """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
    transName = self.am_getOption('Transformation', 'All')
    method = 'getTransformations'
    if transName == 'All':
      self._logInfo("Getting all transformations%s, status %s." %
                    (' of type %s' % str(self.transformationTypes) if self.transformationTypes else '',
                     str(self.transformationStatus)),
                    method=method)
      transfDict = {'Status': self.transformationStatus}
      if self.transformationTypes:
        transfDict['Type'] = self.transformationTypes
      res = self.transfClient.getTransformations(transfDict, extraParams=True)
      if not res['OK']:
        return res
      transformations = res['Value']
      self._logInfo("Obtained %d transformations to process" % len(transformations), method=method)
    else:
      self._logInfo("Getting transformation %s." % transName, method=method)
      res = self.transfClient.getTransformation(transName, extraParams=True)
      if not res['OK']:
        self._logError("Failed to get transformation:", res['Message'], method=method)
        return res
      transformations = [res['Value']]
    return S_OK(transformations)

  def _getClients(self):
    """ returns the clients used in the threads
    """
    threadTransformationClient = TransformationClient()
    threadDataManager = DataManager()

    return {'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager}

  def _execute(self, threadID):
    """ thread - does the real job: processing the transformations to be processed
    """

    # Each thread will have its own clients
    clients = self._getClients()

    while True:
      transDict = self.transQueue.get()
      try:
        transID = long(transDict['TransformationID'])
        if transID not in self.transInQueue:
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID))
        self._logInfo("Processing transformation %s." % transID, transID=transID)
        startTime = time.time()
        res = self.processTransformation(transDict, clients)
        if not res['OK']:
          self._logInfo("Failed to process transformation:", res['Message'], transID=transID)
      except Exception as x:  # pylint: disable=broad-except
        self._logException('Exception in plugin', lException=x, transID=transID)
      finally:
        if not transID:
          transID = 'None'
        self._logInfo("Processed transformation in %.1f seconds" % (time.time() - startTime), transID=transID)
        if transID in self.transInQueue:
          self.transInQueue.remove(transID)
        self.transInThread.pop(transID, None)
        self._logVerbose("%d transformations still in queue" % len(self.transInQueue))
    return S_OK()

  def processTransformation(self, transDict, clients):
    """ process a single transformation (in transDict)
    """
    method = 'processTransformation'
    transID = transDict['TransformationID']
    forJobs = transDict['Type'].lower() not in ('replication', 'removal')

    # First get the LFNs associated to the transformation
    transFiles = self._getTransformationFiles(transDict, clients, replicateOrRemove=not forJobs)
    if not transFiles['OK']:
      return transFiles
    if not transFiles['Value']:
      return S_OK()

    if transID not in self.replicaCache:
      self.__readCache(transID)
    transFiles = transFiles['Value']
    unusedLfns = [f['LFN'] for f in transFiles]
    unusedFiles = len(unusedLfns)

    plugin = transDict.get('Plugin', 'Standard')
    # Limit the number of LFNs to be considered for replication or removal as they are treated individually
    if not forJobs:
      maxFiles = Operations().getValue('TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
      # Get plugin-specific limit in number of files (0 means no limit)
      totLfns = len(unusedLfns)
      lfnsToProcess = self.__applyReduction(unusedLfns, maxFiles=maxFiles)
      if len(lfnsToProcess) != totLfns:
        self._logInfo("Reduced number of files from %d to %d" % (totLfns, len(lfnsToProcess)),
                      method=method, transID=transID)
        transFiles = [f for f in transFiles if f['LFN'] in lfnsToProcess]
    else:
      lfnsToProcess = unusedLfns

    # Check the data is available with replicas
    res = self.__getDataReplicas(transDict, lfnsToProcess, clients, forJobs=forJobs)
    if not res['OK']:
      self._logError("Failed to get data replicas:", res['Message'],
                     method=method, transID=transID)
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    self._logInfo("Processing transformation with '%s' plug-in." % plugin,
                  method=method, transID=transID)
    res = self.__generatePluginObject(plugin, clients)
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters(transDict)
    oPlugin.setInputData(dataReplicas)
    oPlugin.setTransformationFiles(transFiles)
    res = oPlugin.run()
    if not res['OK']:
      self._logError("Failed to generate tasks for transformation:", res['Message'],
                     method=method, transID=transID)
      return res
    tasks = res['Value']
    self.pluginTimeout[transID] = res.get('Timeout', False)
    # Create the tasks
    allCreated = True
    created = 0
    lfnsInTasks = []
    for se, lfns in tasks:
      res = clients['TransformationClient'].addTaskForTransformation(transID, lfns, se)
      if not res['OK']:
        self._logError("Failed to add task generated by plug-in:", res['Message'],
                       method=method, transID=transID)
        allCreated = False
      else:
        created += 1
        lfnsInTasks += [lfn for lfn in lfns if lfn in lfnsToProcess]
    if created:
      self._logInfo("Successfully created %d tasks for transformation." % created,
                    method=method, transID=transID)
    else:
      self._logInfo("No new tasks created for transformation.",
                    method=method, transID=transID)
    self.unusedFiles[transID] = unusedFiles - len(lfnsInTasks)
    # If not all files were obtained, move the offset
    lastOffset = self.lastFileOffset.get(transID)
    if lastOffset:
      self.lastFileOffset[transID] = max(0, lastOffset - len(lfnsInTasks))
    self.__removeFilesFromCache(transID, lfnsInTasks)

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = clients['TransformationClient'].setTransformationParameter(transID, 'Status', 'Active')
      if not res['OK']:
        self._logError("Failed to update transformation status to 'Active':", res['Message'],
                       method=method, transID=transID)
      else:
        self._logInfo("Updated transformation status to 'Active'.",
                      method=method, transID=transID)
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def _getTransformationFiles(self, transDict, clients, statusList=None, replicateOrRemove=False):
    """ get the data replicas for a certain transID
    """
    # By default, don't skip if no new Unused for DM transformations
    skipIfNoNewUnused = not replicateOrRemove
    transID = transDict['TransformationID']
    plugin = transDict.get('Plugin', 'Standard')
    # Check if files should be sorted and limited in number
    operations = Operations()
    sortedBy = operations.getValue('TransformationPlugins/%s/SortedBy' % plugin, None)
    maxFiles = operations.getValue('TransformationPlugins/%s/MaxFilesToProcess' % plugin, 0)
    # If the NoUnuse delay is explicitly set, we want to take it into account, and skip if no new Unused
    if operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, 0):
      skipIfNoNewUnused = True
    noUnusedDelay = 0 if self.pluginTimeout.get(transID, False) else \
        operations.getValue('TransformationPlugins/%s/NoUnusedDelay' % plugin, self.noUnusedDelay)
    method = '_getTransformationFiles'
    lastOffset = self.lastFileOffset.setdefault(transID, 0)

    # Files that were problematic (either explicit or because SE was banned) may be recovered,
    # and always removing the missing ones
    if not statusList:
      statusList = ['Unused', 'ProbInFC']
    statusList += ['MissingInFC'] if transDict['Type'] == 'Removal' else []
    transClient = clients['TransformationClient']
    res = transClient.getTransformationFiles(condDict={'TransformationID': transID,
                                                       'Status': statusList},
                                             orderAttribute=sortedBy,
                                             offset=lastOffset, maxfiles=maxFiles)
    if not res['OK']:
      self._logError("Failed to obtain input data:", res['Message'],
                     method=method, transID=transID)
      return res
    transFiles = res['Value']
    if maxFiles and len(transFiles) == maxFiles:
      self.lastFileOffset[transID] += maxFiles
    else:
      del self.lastFileOffset[transID]

    if not transFiles:
      self._logInfo("No '%s' files found for transformation." % ','.join(statusList),
                    method=method, transID=transID)
      if transDict['Status'] == 'Flush':
        res = transClient.setTransformationParameter(transID, 'Status', 'Active')
        if not res['OK']:
          self._logError("Failed to update transformation status to 'Active':", res['Message'],
                         method=method, transID=transID)
        else:
          self._logInfo("Updated transformation status to 'Active'.",
                        method=method, transID=transID)
      return S_OK()
    # Check if transformation is kicked
    kickFile = os.path.join(self.controlDirectory, 'KickTransformation_%s' % str(transID))
    try:
      kickTrans = os.path.exists(kickFile)
      if kickTrans:
        os.remove(kickFile)
    except OSError:
      pass

    # Check if something new happened
    now = datetime.datetime.utcnow()
    if not kickTrans and skipIfNoNewUnused and noUnusedDelay:
      nextStamp = self.unusedTimeStamp.setdefault(transID, now) + datetime.timedelta(hours=noUnusedDelay)
      skip = now < nextStamp
      if len(transFiles) == self.unusedFiles.get(transID, 0) and transDict['Status'] != 'Flush' and skip:
        self._logInfo("No new '%s' files found for transformation." % ','.join(statusList),
                      method=method, transID=transID)
        return S_OK()

    self.unusedTimeStamp[transID] = now
    # If files are not Unused, set them Unused
    notUnused = [trFile['LFN'] for trFile in transFiles if trFile['Status'] != 'Unused']
    otherStatuses = sorted(set([trFile['Status'] for trFile in transFiles]) - set(['Unused']))
    if notUnused:
      res = transClient.setFileStatusForTransformation(transID, 'Unused', notUnused, force=True)
      if not res['OK']:
        self._logError("Error setting %d files Unused:" % len(notUnused), res['Message'],
                       method=method, transID=transID)
      else:
        self._logInfo("Set %d files from %s to Unused" % (len(notUnused), ','.join(otherStatuses)))
        self.__removeFilesFromCache(transID, notUnused)
    return S_OK(transFiles)

  def __applyReduction(self, lfns, maxFiles=None):
    """ eventually remove the number of files to be considered
    """
    if maxFiles is None:
      maxFiles = self.maxFiles
    if not maxFiles or len(lfns) <= maxFiles:
      return lfns
    return randomize(lfns)[:maxFiles]

  def __getDataReplicas(self, transDict, lfns, clients, forJobs=True):
    """ Get the replicas for the LFNs and check their statuses. It first looks within the cache.
    """
    method = '__getDataReplicas'
    transID = transDict['TransformationID']
    if 'RemoveFile' in transDict['Body']:
      # When removing files, we don't care about their replicas
      return S_OK(dict.fromkeys(lfns, ['None']))
    clearCacheFile = os.path.join(self.controlDirectory, 'ClearCache_%s' % str(transID))
    try:
      clearCache = os.path.exists(clearCacheFile)
      if clearCache:
        os.remove(clearCacheFile)
    except:
      pass
    if clearCache or transDict['Status'] == 'Flush':
      self._logInfo("Replica cache cleared", method=method, transID=transID)
      # We may need to get new replicas
      self.__clearCacheForTrans(transID)
    else:
      # If the cache needs to be cleaned
      self.__cleanCache(transID)
    startTime = time.time()
    dataReplicas = {}
    nLfns = len(lfns)
    self._logVerbose("Getting replicas for %d files" % nLfns, method=method, transID=transID)
    cachedReplicaSets = self.replicaCache.get(transID, {})
    cachedReplicas = {}
    # Merge all sets of replicas
    for replicas in cachedReplicaSets.itervalues():
      cachedReplicas.update(replicas)
    self._logInfo("Number of cached replicas: %d" % len(cachedReplicas), method=method, transID=transID)
    setCached = set(cachedReplicas)
    setLfns = set(lfns)
    for lfn in setLfns & setCached:
      dataReplicas[lfn] = cachedReplicas[lfn]
    newLFNs = setLfns - setCached
    self._logInfo("ReplicaCache hit for %d out of %d LFNs" % (len(dataReplicas), nLfns),
                  method=method, transID=transID)
    if newLFNs:
      startTime = time.time()
      self._logInfo("Getting replicas for %d files from catalog" % len(newLFNs),
                    method=method, transID=transID)
      newReplicas = {}
      for chunk in breakListIntoChunks(newLFNs, 10000):
        res = self._getDataReplicasDM(transID, chunk, clients, forJobs=forJobs)
        if res['OK']:
          reps = dict((lfn, ses) for lfn, ses in res['Value'].iteritems() if ses)
          newReplicas.update(reps)
          self.__updateCache(transID, reps)
        else:
          self._logWarn("Failed to get replicas for %d files" % len(chunk), res['Message'],
                        method=method, transID=transID)

      self._logInfo("Obtained %d replicas from catalog in %.1f seconds"
                    % (len(newReplicas), time.time() - startTime),
                    method=method, transID=transID)
      dataReplicas.update(newReplicas)
      noReplicas = newLFNs - set(dataReplicas)
      self.__writeCache(transID)
      if noReplicas:
        self._logWarn("Found %d files without replicas (or only in Failover)" % len(noReplicas),
                      method=method, transID=transID)
    return S_OK(dataReplicas)

  def _getDataReplicasDM(self, transID, lfns, clients, forJobs=True, ignoreMissing=False):
    """ Get the replicas for the LFNs and check their statuses, using the replica manager
    """
    method = '_getDataReplicasDM'

    startTime = time.time()
    self._logVerbose("Getting replicas%s from catalog for %d files" % (' for jobs' if forJobs else '', len(lfns)),
                     method=method, transID=transID)
    if forJobs:
      # Get only replicas eligible for jobs
      res = clients['DataManager'].getReplicasForJobs(lfns, getUrl=False)
    else:
      # Get all replicas
      res = clients['DataManager'].getReplicas(lfns, getUrl=False)
    if not res['OK']:
      return res
    replicas = res['Value']
    # Prepare a dictionary for all LFNs
    dataReplicas = {}
    self._logVerbose("Replica results for %d files obtained in %.2f seconds" %
                     (len(lfns), time.time() - startTime),
                     method=method, transID=transID)
    # If files are neither Successful nor Failed, they are set problematic in the FC
    problematicLfns = [lfn for lfn in lfns if lfn not in replicas['Successful'] and lfn not in replicas['Failed']]
    if problematicLfns:
      self._logInfo("%d files found problematic in the catalog, set ProbInFC" % len(problematicLfns))
      res = clients['TransformationClient'].setFileStatusForTransformation(transID, 'ProbInFC', problematicLfns)
      if not res['OK']:
        self._logError("Failed to update status of problematic files:", res['Message'],
                       method=method, transID=transID)
    # Create a dictionary containing all the file replicas
    failoverLfns = []
    for lfn, replicaDict in replicas['Successful'].iteritems():
      for se in replicaDict:
        # This sremains here for backward compatibility in case VOs have not defined SEs not to be used for jobs
        if forJobs and 'failover' in se.lower():
          self._logVerbose("Ignoring failover replica for %s." % lfn, method=method, transID=transID)
        else:
          dataReplicas.setdefault(lfn, []).append(se)
      if not dataReplicas.get(lfn):
        failoverLfns.append(lfn)
    if failoverLfns:
      self._logVerbose("%d files have no replica but possibly in Failover SE" % len(failoverLfns))
    # Make sure that file missing from the catalog are marked in the transformation DB.
    missingLfns = []
    for lfn, reason in replicas['Failed'].iteritems():
      if "No such file or directory" in reason:
        self._logVerbose("%s not found in the catalog." % lfn, method=method, transID=transID)
        missingLfns.append(lfn)
    if missingLfns:
      self._logInfo("%d files not found in the catalog" % len(missingLfns))
      if ignoreMissing:
        dataReplicas.update(dict.fromkeys(missingLfns, []))
      else:
        res = clients['TransformationClient'].setFileStatusForTransformation(transID, 'MissingInFC', missingLfns)
        if not res['OK']:
          self._logError("Failed to update status of missing files:", res['Message'],
                         method=method, transID=transID)
    return S_OK(dataReplicas)

  def __updateCache(self, transID, newReplicas):
    """ Add replicas to the cache
    """
    self.replicaCache.setdefault(transID, {})[datetime.datetime.utcnow()] = newReplicas
#    if len( newReplicas ) > 5000:
#      self.__writeCache( transID )

  def __clearCacheForTrans(self, transID):
    """ Remove all replicas for a transformation
    """
    self.replicaCache.pop(transID, None)

  def __cleanReplicas(self, transID, lfns):
    """ Remove cached replicas that are not in a list
    """
    cachedReplicas = set()
    for replicas in self.replicaCache.get(transID, {}).itervalues():
      cachedReplicas.update(replicas)
    toRemove = cachedReplicas - set(lfns)
    if toRemove:
      self._logInfo("Remove %d files from cache" % len(toRemove), method='__cleanReplicas', transID=transID)
      self.__removeFromCache(transID, toRemove)

  def __cleanCache(self, transID):
    """ Cleans the cache
    """
    try:
      if transID in self.replicaCache:
        timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=self.replicaCacheValidity)
        for updateTime in set(self.replicaCache[transID]):
          nCache = len(self.replicaCache[transID][updateTime])
          if updateTime < timeLimit or not nCache:
            self._logInfo("Clear %s replicas for transformation %s, time %s" %
                          ('%d cached' % nCache if nCache else 'empty cache', str(transID), str(updateTime)),
                          transID=transID, method='__cleanCache')
            del self.replicaCache[transID][updateTime]
        # Remove empty transformations
        if not self.replicaCache[transID]:
          del self.replicaCache[transID]
    except Exception as x:
      self._logException("Exception when cleaning replica cache:", lException=x)

  def __removeFilesFromCache(self, transID, lfns):
    removed = self.__removeFromCache(transID, lfns)
    if removed:
      self._logInfo("Removed %d replicas from cache" % removed, method='__removeFilesFromCache', transID=transID)
      self.__writeCache(transID)

  def __removeFromCache(self, transID, lfns):
    if transID not in self.replicaCache:
      return
    removed = 0
    if self.replicaCache[transID] and lfns:
      for lfn in lfns:
        for timeKey in self.replicaCache[transID]:
          if self.replicaCache[transID][timeKey].pop(lfn, None):
            removed += 1
    return removed

  def __cacheFile(self, transID):
    return self.cacheFile.replace('.pkl', '_%s.pkl' % str(transID))

  @gSynchro
  def __readCache(self, transID):
    """ Reads from the cache
    """
    if transID in self.replicaCache:
      return
    try:
      method = '__readCache'
      fileName = self.__cacheFile(transID)
      if not os.path.exists(fileName):
        self.replicaCache[transID] = {}
      else:
        with open(fileName, 'r') as cacheFile:
          self.replicaCache[transID] = pickle.load(cacheFile)
        self._logInfo("Successfully loaded replica cache from file %s (%d files)" %
                      (fileName, self.__filesInCache(transID)),
                      method=method, transID=transID)
    except Exception as x:
      self._logException("Failed to load replica cache from file %s" % fileName, lException=x,
                         method=method, transID=transID)
      self.replicaCache[transID] = {}

  def __filesInCache(self, transID):
    cache = self.replicaCache.get(transID, {})
    return sum(len(lfns) for lfns in cache.itervalues())

  @gSynchro
  def __writeCache(self, transID=None):
    """ Writes the cache
    """
    method = '__writeCache'
    try:
      startTime = time.time()
      transList = [transID] if transID else set(self.replicaCache)
      filesInCache = 0
      nCache = 0
      for t_id in transList:
        # Protect the copy of the cache
        filesInCache += self.__filesInCache(t_id)
        # write to a temporary file in order to avoid corrupted files
        cacheFile = self.__cacheFile(t_id)
        tmpFile = cacheFile + '.tmp'
        with open(tmpFile, 'w') as fd:
          pickle.dump(self.replicaCache.get(t_id, {}), fd)
        # Now rename the file as it shold
        os.rename(tmpFile, cacheFile)
        nCache += 1
      self._logInfo("Successfully wrote %d replica cache file(s) (%d files) in %.1f seconds"
                    % (nCache, filesInCache, time.time() - startTime),
                    method=method, transID=transID if transID else None)
    except Exception as x:
      self._logException("Could not write replica cache file %s" % cacheFile, lException=x,
                         method=method, transID=t_id)

  def __generatePluginObject(self, plugin, clients):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin'])
    except ImportError as e:
      self._logException("Failed to import 'TransformationPlugin' %s" % plugin, lException=e,
                         method="__generatePluginObject")
      return S_ERROR()
    try:
      plugin_o = getattr(plugModule, 'TransformationPlugin')('%s' % plugin,
                                                             transClient=clients['TransformationClient'],
                                                             dataManager=clients['DataManager'])
      return S_OK(plugin_o)
    except AttributeError as e:
      self._logException("Failed to create %s()" % plugin, lException=e, method="__generatePluginObject")
      return S_ERROR()
    plugin_o.setDirectory(self.workDirectory)
    plugin_o.setCallback(self.pluginCallback)

  def pluginCallback(self, transID, invalidateCache=False):
    """ Standard plugin callback
    """
    if invalidateCache:
      try:
        if transID in self.replicaCache:
          self._logInfo("Removed cached replicas for transformation", method='pluginCallBack', transID=transID)
          self.replicaCache.pop(transID)
          self.__writeCache(transID)
      except:
        pass
Example #10
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def setBody(self, body):
        """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
        self.item_called = "Body"
        if isinstance(body, basestring):
            return self.__setParam(body)
        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], basestring):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].iteritems():
                if not isinstance(par, basestring):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if not par in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val,
                    (basestring, int, long, float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
            return self.__setParam(json.dumps(body))

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except BaseException:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                          'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                          'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                               'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter('Plugin',
                                                choices=self.supportedPlugins,
                                                default='Standard')
                if not res['OK']:
                    return res
                self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = set(seList) - set(res['Value'])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Example #11
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                        'TransformationName'    : types.StringTypes,
                        'Status'                : types.StringTypes,
                        'Description'           : types.StringTypes,
                        'LongDescription'       : types.StringTypes,
                        'Type'                  : types.StringTypes,
                        'Plugin'                : types.StringTypes,
                        'AgentType'             : types.StringTypes,
                        'FileMask'              : types.StringTypes,
                        'TransformationGroup'   : types.StringTypes,
                        'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                        'InheritedFrom'         : [types.IntType, types.LongType],
                        'Body'                  : types.StringTypes,
                        'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                        'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                         'TransformationName'    : '',
                         'Status'                : 'New',
                         'Description'           : '',
                         'LongDescription'       : '',
                         'Type'                  : '',
                         'Plugin'                : 'Standard',
                         'AgentType'             : 'Manual',
                         'FileMask'              : '',
                         'TransformationGroup'   : 'General',
                         'GroupSize'             : 1,
                         'InheritedFrom'         : 0,
                         'Body'                  : '',
                         'MaxNumberOfTasks'       : 0,
                         'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                               ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def setBody( self, body ):
    """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
    self.item_called = "Body"
    if isinstance( body, basestring ):
      return self.__setParam( body )
    if not isinstance( body, ( list, tuple ) ):
      raise TypeError( "Expected list or string, but %r is %s" % ( body, type( body ) ) )

    for tup in body:
      if not isinstance( tup, ( tuple, list ) ):
        raise TypeError( "Expected tuple or list, but %r is %s" % ( tup, type( tup ) ) )
      if len( tup ) != 2:
        raise TypeError( "Expected 2-tuple, but %r is length %d" % ( tup, len( tup ) ) )
      if not isinstance( tup[0], basestring ):
        raise TypeError( "Expected string, but first entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      if not isinstance( tup[1], dict ):
        raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      for par, val in tup[1].iteritems():
        if not isinstance( par, basestring ):
          raise TypeError( "Expected string, but key in dictionary %r is %s" % ( par, type( par ) ) )
        if not par in Operation.ATTRIBUTE_NAMES:
          raise ValueError( "Unknown attribute for Operation: %s" % par )
        if not isinstance( val, ( basestring, int, long, float, list, tuple, dict ) ):
          raise TypeError( "Cannot encode %r, in json" % ( val ) )
      return self.__setParam( json.dumps( body ) )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                              orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                              orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                          orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author = ""
    res = getProxyInfo()
    if res['OK']:
      author = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " % res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [],
                                outputFields = ['TransformationID', 'Status',
                                                'AgentType', 'TransformationName',
                                                'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )
          return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info( "Will list transformations created by user '%s' with status '%s'" % ( userName, ', '.join( transStatus ) ) )
    else:
      gLogger.info( "Will list transformations created by '%s' with status '%s'" % ( authorDN, ', '.join( transStatus ) ) )

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = [] ):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len( transID )
    paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                      'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                      'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                           'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map( lambda pname: paramValues[ paramNames.index( pname ) ], paramShowNames )
          showDict = dict( zip( paramShowNamesShort, paramShowValues ) )
          dictList.append( showDict )

      except Exception as x:
        print 'Exception %s ' % str( x )

    if not len( dictList ) > 0:
      gLogger.error( 'No found transformations satisfying input condition' )
      return S_ERROR( 'No found transformations satisfying input condition' )
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                              self.paramValues['Description'],
                                              self.paramValues['LongDescription'],
                                              self.paramValues['Type'],
                                              self.paramValues['Plugin'],
                                              self.paramValues['AgentType'],
                                              self.paramValues['FileMask'],
                                              transformationGroup = self.paramValues['TransformationGroup'],
                                              groupSize = self.paramValues['GroupSize'],
                                              inheritedFrom = self.paramValues['InheritedFrom'],
                                              body = self.paramValues['Body'],
                                              maxTasks = self.paramValues['MaxNumberOfTasks'],
                                              eventsPerTask = self.paramValues['EventsPerTask'],
                                              addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.notice( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if paramName not in self.paramTypes:
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.notice( "To add this parameter later please execute the following." )
          gLogger.notice( "oTransformation = Transformation(%d)" % transID )
          gLogger.notice( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    """ Few checks
    """
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " )

    plugin = self.paramValues['Plugin']
    if plugin:
      if not plugin in self.supportedPlugins:
        gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
        res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
        if not res['OK']:
          return res
        self.paramValues['Plugin'] = res['Value']

    plugin = self.paramValues['Plugin']

    return S_OK()

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if groupSize <= 0:
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE',
                                                                                                      'TargetSE'] ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if not self.paramValues.get( requiredParam ):
        paramValue = raw_input( "Please enter " + requiredParam + " " )
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = set( seList ) - set( res['Value'] )
    if missing:
      for se in missing:
        gLogger.error( "StorageElement %s is not known" % se )
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
Example #12
0
prodClient = ProductionClient()
transClient = TransformationClient()

# get arguments
args = Script.getPositionalArgs()
if (len(args) == 3):
  parentTransID = args[2]
elif (len(args) == 2):
  parentTransID = ''
else:
  Script.showHelp()

prodID = args[0]
transID = args[1]

res = transClient.getTransformation(transID)
if not res['OK']:
  DIRAC.gLogger.error('Failed to get transformation %s: %s' % (transID, res['Message']))
  DIRAC.exit(-1)

transID = res['Value']['TransformationID']

if parentTransID:
  res = transClient.getTransformation(parentTransID)
  if not res['OK']:
    DIRAC.gLogger.error('Failed to get transformation %s: %s' % (parentTransID, res['Message']))
    DIRAC.exit(-1)
  parentTransID = res['Value']['TransformationID']

res = prodClient.getProduction(prodID)
if not res['OK']:
Example #13
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}

    self.supportedPlugins = ['Broadcast', 'Standard', 'BySize', 'ByShare']
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError, 'TransformationID %d does not exist' % transID
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, se, seList ):
    if type( seList ) in types.StringTypes:
      try:
        seList = eval( seList )
      except:
        seList = seList.replace( ',', ' ' ).split()
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = se
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError, name

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      oldValue = self.paramValues[self.item_called]
      if oldValue != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] )
    if not self.item_called in self.paramTypes.keys():
      if not self.paramValues.has_key( self.item_called ):
        change = True
      else:
        oldValue = self.paramValues[self.item_called]
        if oldValue != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                             self.paramValues['Description'],
                                             self.paramValues['LongDescription'],
                                             self.paramValues['Type'],
                                             self.paramValues['Plugin'],
                                             self.paramValues['AgentType'],
                                             self.paramValues['FileMask'],
                                             transformationGroup = self.paramValues['TransformationGroup'],
                                             groupSize = self.paramValues['GroupSize'],
                                             inheritedFrom = self.paramValues['InheritedFrom'],
                                             body = self.paramValues['Body'],
                                             maxTasks = self.paramValues['MaxNumberOfTasks'],
                                             eventsPerTask = self.paramValues['EventsPerTask'],
                                             addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.info( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if not self.paramTypes.has_key( paramName ):
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.info( "To add this parameter later please execute the following." )
          gLogger.info( "oTransformation = Transformation(%d)" % transID )
          gLogger.info( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        res = self.__promptForParameter( parameter )
        if not res['OK']:
          return res

    plugin = self.paramValues['Plugin']
    if not plugin in self.supportedPlugins:
      gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
      res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
      if not res['OK']:
        return res
    plugin = self.paramValues['Plugin']
    checkPlugin = "_check%sPlugin" % plugin
    fcn = None
    if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
      fcn = getattr( self, checkPlugin )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
    res = fcn()
    return res

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if ( groupSize <= 0 ):
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( string.join( ['SourceSE', 'TargetSE'], ', ' ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ):
        res = self.__promptForParameter( requiredParam, insert = False )
        if not res['OK']:
          return res
        paramValue = res['Value']
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = []
    for se in seList:
      if not se in res['Value']:
        gLogger.error( "StorageElement %s is not known" % se )
        missing.append( se )
    if missing:
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.info( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
Example #14
0
class TransformationCLI(CLI, API):
    def __init__(self):
        self.server = TransformationClient()
        self.indentSpace = 4
        CLI.__init__(self)
        API.__init__(self)

    def printPair(self, key, value, separator=":"):
        valueList = value.split("\n")
        print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)),
                             separator, valueList[0].strip())
        for valueLine in valueList[1:-1]:
            print "%s  %s" % (" " * self.indentSpace, valueLine.strip())

    def do_help(self, args):
        """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commands"""
        CLI.do_help(self, args)

    # overriting default help command
    def do_helpall(self, args):
        """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
        if len(args) == 0:
            print "\nAvailable commands:\n"
            attrList = dir(self)
            attrList.sort()
            for attribute in attrList:
                if attribute.find("do_") == 0:
                    self.printPair(attribute[3:],
                                   getattr(self, attribute).__doc__[1:])
                    print ""
        else:
            command = args.split()[0].strip()
            try:
                obj = getattr(self, "do_%s" % command)
            except:
                print "There's no such %s command" % command
                return
            self.printPair(command, obj.__doc__[1:])

    def do_shell(self, args):
        """Execute a shell command

       usage !<shell_command>
    """
        comm = args
        res = shellCall(0, comm)
        if res['OK'] and res['Value'][0] == 0:
            _returnCode, stdOut, stdErr = res['Value']
            print "%s\n%s" % (stdOut, stdErr)
        else:
            print res['Message']

    def check_params(self, args, num):
        """Checks if the number of parameters correct"""
        argss = args.split()
        length = len(argss)
        if length < num:
            print "Error: Number of arguments provided %d less that required %d, please correct." % (
                length, num)
            return (False, length)
        return (argss, length)

    def check_id_or_name(self, id_or_name):
        """resolve name or Id by converting type of argument """
        if id_or_name.isdigit():
            return long(id_or_name)  # its look like id
        return id_or_name

    ####################################################################
    #
    # These are the methods for transformation manipulation
    #

    def do_getall(self, args):
        """Get transformation details

       usage: getall [Status] [Status]
    """
        oTrans = Transformation()
        oTrans.getTransformations(transStatus=args.split(), printOutput=True)

    def do_getAllByUser(self, args):
        """Get all transformations created by a given user

The first argument is the authorDN or username. The authorDN
is preferred: it need to be inside quotes because contains
white spaces. Only authorDN should be quoted.

When the username is provided instead, 
the authorDN is retrieved from the uploaded proxy,
so that the retrieved transformations are those created by
the user who uploaded that proxy: that user could be different
that the username provided to the function.

       usage: getAllByUser authorDN or username [Status] [Status]
    """
        oTrans = Transformation()
        argss = args.split()
        username = ""
        author = ""
        status = []
        if not len(argss) > 0:
            print self.do_getAllByUser.__doc__
            return

        # if the user didnt quoted the authorDN ends
        if '=' in argss[0] and argss[0][0] not in ["'", '"']:
            print "AuthorDN need to be quoted (just quote that argument)"
            return

        if argss[0][0] in ["'", '"']:  # authorDN given
            author = argss[0]
            status_idx = 1
            for arg in argss[1:]:
                author += ' ' + arg
                status_idx += 1
                if arg[-1] in ["'", '"']:
                    break
            # At this point we should have something like 'author'
            if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']:
                print "AuthorDN need to be quoted (just quote that argument)"
                return
            else:
                author = author[1:-1]  # throw away the quotes
            # the rest are the requested status
            status = argss[status_idx:]
        else:  # username given
            username = argss[0]
            status = argss[1:]

        oTrans.getTransformationsByUser(authorDN=author,
                                        userName=username,
                                        transStatus=status,
                                        printOutput=True)

    def do_summaryTransformations(self, args):
        """Show the summary for a list of Transformations

    Fields starting with 'F' ('J')  refers to files (jobs).
    Proc. stand for processed.

        Usage: summaryTransformations <ProdID> [<ProdID> ...]
    """
        argss = args.split()
        if not len(argss) > 0:
            print self.do_summaryTransformations.__doc__
            return

        transid = argss
        oTrans = Transformation()
        oTrans.getSummaryTransformations(transID=transid)

    def do_getStatus(self, args):
        """Get transformation details

       usage: getStatus <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.getTransformation(transName)
            if not res['OK']:
                print "Getting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s: %s" % (transName, res['Value']['Status'])

    def do_setStatus(self, args):
        """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
        argss = args.split()
        if not len(argss) > 1:
            print "transformation and status not supplied"
            return
        status = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, 'Status', status)
            if not res['OK']:
                print "Setting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s set to %s" % (transName, status)

    def do_start(self, args):
        """Start transformation

       usage: start <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Active')
            if not res['OK']:
                print "Setting Status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                res = self.server.setTransformationParameter(
                    transName, 'AgentType', 'Automatic')
                if not res['OK']:
                    print "Setting AgentType of %s failed: %s" % (
                        transName, res['Message'])
                else:
                    print "%s started" % transName

    def do_stop(self, args):
        """Stop transformation

       usage: stop <transID|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'AgentType', 'Manual')
            if not res['OK']:
                print "Stopping of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s stopped" % transName

    def do_flush(self, args):
        """Flush transformation

       usage: flush <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Flush')
            if not res['OK']:
                print "Flushing of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s flushing" % transName

    def do_get(self, args):
        """Get transformation definition

    usage: get <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            res['Value'].pop('Body')
            printDict(res['Value'])

    def do_getBody(self, args):
        """Get transformation body

    usage: getBody <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            print res['Value']['Body']

    def do_getFileStat(self, args):
        """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationStats(transName)
        if not res['OK']:
            print "Failed to get statistics for %s: %s" % (transName,
                                                           res['Message'])
        else:
            res['Value'].pop('Total')
            printDict(res['Value'])

    def do_modMask(self, args):
        """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        mask = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, "FileMask", mask)
            if not res['OK']:
                print "Failed to modify input file mask for %s: %s" % (
                    transName, res['Message'])
            else:
                print "Updated %s filemask" % transName

    def do_getFiles(self, args):
        """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        status = argss[1:]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            if status:
                selectDict['Status'] = status
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                self._printFormattedDictList(
                    res['Value'],
                    ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                    'LFN', 'LFN')
            else:
                print "No files found"

    def do_getFileStatus(self, args):
        """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
        argss = args.split()
        if len(argss) < 2:
            print "transformation and file not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]

        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                filesList = []
                for fileDict in res['Value']:
                    if fileDict['LFN'] in lfns:
                        filesList.append(fileDict)
                if filesList:
                    self._printFormattedDictList(filesList, [
                        'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'
                    ], 'LFN', 'LFN')
                else:
                    print "Could not find any LFN in", lfns, "for transformation", transName
            else:
                print "No files found"

    def do_getOutputFiles(self, args):
        """Get output files for the transformation

    usage: getOutputFiles <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            fc = FileCatalog()
            meta = {}
            meta['ProdID'] = transName
            res = fc.findFilesByMetadata(meta)
            if not res['OK']:
                print res['Message']
                return
            if not len(res['Value']) > 0:
                print 'No output files yet for transformation %d' % int(
                    transName)
                return
            else:
                for lfn in res['Value']:
                    print lfn

    def do_getInputDataQuery(self, args):
        """Get input data query for the transformation

    usage: getInputDataQuery <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationInputDataQuery(transName)
        if not res['OK']:
            print "Failed to get transformation input data query: %s" % res[
                'Message']
        else:
            print res['Value']

    def do_setFileStatus(self, args):
        """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
        argss = args.split()
        if not len(argss) == 3:
            print "transformation file and status not supplied"
            return
        transName = argss[0]
        lfn = argss[1]
        status = argss[2]
        res = self.server.setFileStatusForTransformation(
            transName, status, [lfn])
        if not res['OK']:
            print "Failed to update file status: %s" % res['Message']
        else:
            print "Updated file status to %s" % status

    def do_resetFile(self, args):
        """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfns>
    """
        argss = args.split()
        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(
            transName, 'Unused', lfns)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            if 'Failed' in res['Value']:
                print "Could not reset some files: "
                for lfn, reason in res['Value']['Failed'].items():
                    print lfn, reason
            else:
                print "Updated file statuses to 'Unused' for %d file(s)" % len(
                    lfns)

    def do_resetProcessedFile(self, args):
        """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
        argss = args.split()

        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(transName,
                                                         'Unused',
                                                         lfns,
                                                         force=True)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            if 'Failed' in res['Value'] and res['Value']['Failed']:
                print "Could not reset some files: "
                for lfn, reason in res['Value']['Failed'].items():
                    print lfn, reason
            else:
                print "Updated file statuses to 'Unused' for %d file(s)" % len(
                    lfns)

    ####################################################################
    #
    # These are the methods for file manipulation
    #

    def do_addDirectory(self, args):
        """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no directory supplied"
            return
        for directory in argss:
            res = self.server.addDirectory(directory, force=True)
            if not res['OK']:
                print 'failed to add directory %s: %s' % (directory,
                                                          res['Message'])
            else:
                print 'added %s files for %s' % (res['Value'], directory)

    def do_replicas(self, args):
        """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.getReplicas(argss)
        if not res['OK']:
            print "failed to get any replica information: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to get replica information for %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            ses = sorted(res['Value']['Successful'][lfn].keys())
            outStr = "%s :" % lfn.ljust(100)
            for se in ses:
                outStr = "%s %s" % (outStr, se.ljust(15))
            print outStr

    def do_addFile(self, args):
        """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        lfnDict = {}
        for lfn in argss:
            lfnDict[lfn] = {
                'PFN': 'IGNORED-PFN',
                'SE': 'IGNORED-SE',
                'Size': 0,
                'GUID': 'IGNORED-GUID',
                'Checksum': 'IGNORED-CHECKSUM'
            }
        res = self.server.addFile(lfnDict, force=True)
        if not res['OK']:
            print "failed to add any files: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeFile(self, args):
        """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.removeFile(argss)
        if not res['OK']:
            print "failed to remove any files: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_addReplica(self, args):
        """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
        argss = args.split()
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.addReplica(lfnDict, force=True)
        if not res['OK']:
            print "failed to add replica: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add replica: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeReplica(self, args):
        """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
        argss = args.split()
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.removeReplica(lfnDict)
        if not res['OK']:
            print "failed to remove replica: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove replica: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_setReplicaStatus(self, args):
        """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
        argss = args.split()
        if not len(argss) > 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        status = argss[1]
        se = argss[2]
        lfnDict = {}
        lfnDict[lfn] = {
            'Status': status,
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.setReplicaStatus(lfnDict)
        if not res['OK']:
            print "failed to set replica status: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to set replica status: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "updated replica status %s" % lfn
Example #15
0
class ConsistencyInspector(object):
    """ A class for handling some consistency checks
  """
    def __init__(self,
                 interactive=True,
                 transClient=None,
                 dm=None,
                 fc=None,
                 dic=None):
        """ c'tor
        interactive: Data Manager (True) or DIRAC Agente (False)
        transClient: TransformationClient() if None, else transClient params
        dm: DataManager() if None, else dm params
        fc: FileCatalog() if None, else fc params
        One object for every production/directoriesList...
    """
        self.interactive = interactive
        self.transClient = TransformationClient(
        ) if transClient is None else transClient
        self.dm = dm if dm else DataManager()
        self.fc = fc if fc else FileCatalog()
        self.dic = dic if dic else DataIntegrityClient()
        self.dirac = Dirac()

        # Base elements from which to start the consistency checks
        self._prod = 0
        self._bkQuery = None
        self._fileType = []
        self._fileTypesExcluded = []
        self._lfns = []
        self.noLFC = False
        self.directories = []

        # Accessory elements
        self.runsList = []
        self.runStatus = None
        self.fromProd = None
        self.transType = ''
        self.cachedReplicas = {}

        self.prcdWithDesc = []
        self.prcdWithoutDesc = []
        self.prcdWithMultDesc = []
        self.nonPrcdWithDesc = []
        self.nonPrcdWithoutDesc = []
        self.nonPrcdWithMultDesc = []
        self.descForPrcdLFNs = []
        self.descForNonPrcdLFNs = []
        self.removedFiles = []

        self.absentLFNsInFC = []
        self.existLFNsNoSE = {}
        self.existLFNsBadReplicas = {}
        self.existLFNsBadFiles = {}
        self.existLFNsNotExisting = {}
        self.commonAncestors = {}
        self.multipleDescendants = {}
        self.ancestors = {}

        self._verbose = False

    def __logVerbose(self, msg, msg1=''):
        if self._verbose:
            newMsg = '[ConsistencyChecks] ' + (
                '[%s] ' % str(self.prod)) if self.prod else ''
            # Add that prefix to all lines of the message
            newMsg1 = msg1.replace('\n', '\n' + newMsg)
            newMsg += msg.replace('\n', '\n' + newMsg)
            gLogger.notice(newMsg, newMsg1)
        else:
            gLogger.verbose(msg, msg1)

    ################################################################################

    def checkFC2SE(self):
        repDict = self.compareChecksum(self.lfns)
        self.existLFNsNoSE = repDict['MissingReplica']
        self.existLFNsNotExisting = repDict['MissingAllReplicas']
        self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted']
        self.existLFNsBadFiles = repDict['AllReplicasCorrupted']

    def getReplicasPresence(self, lfns):
        """ get the replicas using the standard FileCatalog.getReplicas()
    """
        present = set()
        notPresent = set()

        chunkSize = 100
        printProgress = (len(lfns) > chunkSize)
        startTime = time.time()
        self.__write(
            "Checking replicas for %d files%s" %
            (len(lfns),
             (' (chunks of %d)' % chunkSize) if printProgress else '... '))
        for chunk in breakListIntoChunks(lfns, chunkSize):
            if printProgress:
                self.__write('.')
            for _ in xrange(1, 10):
                res = self.fc.getReplicas(chunk)
                if res['OK']:
                    present.update(res['Value']['Successful'])
                    self.cachedReplicas.update(res['Value']['Successful'])
                    notPresent.update(res['Value']['Failed'])
                    break
                else:
                    time.sleep(0.1)
        self.__write(' (%.1f seconds)\n' % (time.time() - startTime))

        if notPresent:
            self.__logVerbose("Files without replicas:",
                              '\n'.join([''] + sorted(notPresent)))
        return list(present), list(notPresent)

    ################################################################################

    def getReplicasPresenceFromDirectoryScan(self, lfns):
        """ Get replicas scanning the directories. Might be faster.
    """

        dirs = {}
        present = []
        notPresent = []
        compare = True

        for lfn in lfns:
            dirN = os.path.dirname(lfn)
            if lfn == dirN + '/':
                compare = False
            dirs.setdefault(dirN, []).append(lfn)

        if compare:
            self.__write(
                "Checking File Catalog for %d files from %d directories " %
                (len(lfns), len(dirs)))
        else:
            self.__write("Getting files from %d directories " % len(dirs))
        startTime = time.time()

        for dirN in sorted(dirs):
            startTime1 = time.time()
            self.__write('.')
            lfnsFound = self._getFilesFromDirectoryScan(dirN)
            gLogger.verbose("Obtained %d files in %.1f seconds" %
                            (len(lfnsFound), time.time() - startTime1))
            if compare:
                pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound)
                notPresent += notPr
                present += pr
            else:
                present += lfnsFound

        self.__write(' (%.1f seconds)\n' % (time.time() - startTime))
        gLogger.info("Found %d files with replicas and %d without" %
                     (len(present), len(notPresent)))
        return present, notPresent

    ################################################################################

    def __compareLFNLists(self, lfns, lfnsFound):
        """ return files in both lists and files in lfns and not in lfnsFound
    """
        present = []
        notPresent = lfns
        startTime = time.time()
        self.__logVerbose("Comparing list of %d LFNs with second list of %d" %
                          (len(lfns), len(lfnsFound)))
        if lfnsFound:
            setLfns = set(lfns)
            setLfnsFound = set(lfnsFound)
            present = list(setLfns & setLfnsFound)
            notPresent = list(setLfns - setLfnsFound)
        self.__logVerbose("End of comparison: %.1f seconds" %
                          (time.time() - startTime))
        return present, notPresent

    def _getFilesFromDirectoryScan(self, dirs):
        """ calls dm.getFilesFromDirectory
    """

        level = gLogger.getLevel()
        gLogger.setLevel('FATAL')
        res = self.dm.getFilesFromDirectory(dirs)
        gLogger.setLevel(level)
        if not res['OK']:
            if 'No such file or directory' not in res['Message']:
                gLogger.error(
                    "Error getting files from directories %s:" % dirs,
                    res['Message'])
            return []
        if res['Value']:
            lfnsFound = res['Value']
        else:
            lfnsFound = []

        return lfnsFound

    ################################################################################

    def _getTSFiles(self):
        """ Helper function - get files from the TS
    """

        selectDict = {'TransformationID': self.prod}
        if self._lfns:
            selectDict['LFN'] = self._lfns
        elif self.runStatus and self.fromProd:
            res = self.transClient.getTransformationRuns({
                'TransformationID':
                self.fromProd,
                'Status':
                self.runStatus
            })
            if not res['OK']:
                gLogger.error("Failed to get runs for transformation %d" %
                              self.prod)
            else:
                if res['Value']:
                    self.runsList.extend([
                        run['RunNumber'] for run in res['Value']
                        if run['RunNumber'] not in self.runsList
                    ])
                    gLogger.notice("%d runs selected" % len(res['Value']))
                elif not self.runsList:
                    gLogger.notice("No runs selected, check completed")
                    DIRAC.exit(0)
        if not self._lfns and self.runsList:
            selectDict['RunNumber'] = self.runsList

        res = self.transClient.getTransformation(self.prod)
        if not res['OK']:
            gLogger.error("Failed to find transformation %s" % self.prod)
            return [], [], []
        status = res['Value']['Status']
        if status not in ('Active', 'Stopped', 'Completed', 'Idle'):
            gLogger.notice(
                "Transformation %s in status %s, will not check if files are processed"
                % (self.prod, status))
            processedLFNs = []
            nonProcessedLFNs = []
            nonProcessedStatuses = []
            if self._lfns:
                processedLFNs = self._lfns
        else:
            res = self.transClient.getTransformationFiles(selectDict)
            if not res['OK']:
                gLogger.error(
                    "Failed to get files for transformation %d" % self.prod,
                    res['Message'])
                return [], [], []
            else:
                processedLFNs = [
                    item['LFN'] for item in res['Value']
                    if item['Status'] == 'Processed'
                ]
                nonProcessedLFNs = [
                    item['LFN'] for item in res['Value']
                    if item['Status'] != 'Processed'
                ]
                nonProcessedStatuses = list(
                    set(item['Status'] for item in res['Value']
                        if item['Status'] != 'Processed'))

        return processedLFNs, nonProcessedLFNs, nonProcessedStatuses

    def __getDirectories(self):
        """ get the directories where to look into (they are either given, or taken from the transformation ID
    """
        if self.directories:
            directories = []
            printout = False
            for directory in self.directories:
                if not directory.endswith('...'):
                    directories.append(directory)
                else:
                    printout = True
                    topDir = os.path.dirname(directory)
                    res = self.fc.listDirectory(topDir)
                    if not res['OK']:
                        return S_ERROR(
                            errno.ENOENT, res['Message']
                        )  #DError(errno.ENOENT, res['Message'] )
                    else:
                        matchDir = directory.split('...')[0]
                        directories += [
                            d for d in res['Value']['Successful'].get(
                                topDir, {}).get('SubDirs', [])
                            if d.startswith(matchDir)
                        ]
            if printout:
                gLogger.always('Expanded list of %d directories:\n%s' %
                               (len(directories), '\n'.join(directories)))
            return directories
        else:
            return S_ERROR(
                errno.ENOENT, 'Need to specify the directories'
            )  #DError(errno.ENOENT, 'Need to specify the directories')

    ################################################################################

    def __write(self, text):
        if self.interactive:
            sys.stdout.write(text)
            sys.stdout.flush()
            print text

    ################################################################################

    def _selectByFileType(self,
                          lfnDict,
                          fileTypes=None,
                          fileTypesExcluded=None):
        """ Select only those files from the values of lfnDict that have a certain type
    """
        if not lfnDict:
            return {}
        if not fileTypes:
            fileTypes = self.fileType
        if not fileTypesExcluded:
            fileTypesExcluded = self.fileTypesExcluded
        else:
            fileTypesExcluded += [
                ft for ft in self.fileTypesExcluded
                if ft not in fileTypesExcluded
            ]
        # lfnDict is a dictionary of dictionaries including the metadata, create a deep copy to get modified
        ancDict = dict(lfnDict)
        if fileTypes == ['']:
            fileTypes = []
        # and loop on the original dictionaries
        for ancestor in lfnDict:
            for desc in lfnDict[ancestor].keys():
                ft = lfnDict[ancestor][desc]['FileType']
                if ft in fileTypesExcluded or (fileTypes
                                               and ft not in fileTypes):
                    ancDict[ancestor].pop(desc)
            if not len(ancDict[ancestor]):
                ancDict.pop(ancestor)
        return ancDict

    @staticmethod
    def _getFileTypesCount(lfnDict):
        """ return file types count
    """
        ft_dict = {}
        for ancestor in lfnDict:
            t_dict = {}
            for desc in lfnDict[ancestor]:
                ft = lfnDict[ancestor][desc]['FileType']
                t_dict[ft] = t_dict.setdefault(ft, 0) + 1
            ft_dict[ancestor] = t_dict

        return ft_dict

    def __getLFNsFromFC(self):
        if not self.lfns:
            directories = []
            for dirName in self.__getDirectories():
                if not dirName.endswith('/'):
                    dirName += '/'
                directories.append(dirName)
            present, notPresent = self.getReplicasPresenceFromDirectoryScan(
                directories)
        else:
            present, notPresent = self.getReplicasPresence(self.lfns)
        return present, notPresent

    def compareChecksum(self, lfns):
        """compare the checksum of the file in the FC and the checksum of the physical replicas.
       Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with
       files with all replicas corrupted, and one with files with some replicas corrupted and at least
       one good replica
    """
        retDict = {
            'AllReplicasCorrupted': {},
            'SomeReplicasCorrupted': {},
            'MissingReplica': {},
            'MissingAllReplicas': {},
            'NoReplicas': {}
        }

        chunkSize = 100
        replicas = {}
        setLfns = set(lfns)
        cachedLfns = setLfns & set(self.cachedReplicas)
        for lfn in cachedLfns:
            replicas[lfn] = self.cachedReplicas[lfn]
        lfnsLeft = list(setLfns - cachedLfns)
        if lfnsLeft:
            self.__write("Get replicas for %d files (chunks of %d): " %
                         (len(lfnsLeft), chunkSize))
            for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize):
                self.__write('.')
                replicasRes = self.fc.getReplicas(lfnChunk)
                if not replicasRes['OK']:
                    gLogger.error("error:  %s" % replicasRes['Message'])
                    return S_ERROR(errno.ENOENT,
                                   "error:  %s" % replicasRes['Message'])
                replicasRes = replicasRes['Value']
                if replicasRes['Failed']:
                    retDict['NoReplicas'].update(replicasRes['Failed'])
                replicas.update(replicasRes['Successful'])

        self.__write("Get FC metadata for %d files to be checked: " %
                     len(lfns))
        metadata = {}
        for lfnChunk in breakListIntoChunks(replicas.keys(), chunkSize):
            self.__write('.')
            res = self.fc.getFileMetadata(lfnChunk)
            if not res['OK']:
                return S_ERROR(errno.ENOENT, "error %s" % res['Message'])
            metadata.update(res['Value']['Successful'])

        gLogger.notice("Check existence and compare checksum file by file...")
        csDict = {}
        seFiles = {}
        # Reverse the LFN->SE dictionary
        nReps = 0
        for lfn in replicas:
            csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get(
                lfn, {}).get('Checksum')
            for se in replicas[lfn]:
                seFiles.setdefault(se, []).append(lfn)
                nReps += 1

        gLogger.notice('Getting checksum of %d replicas in %d SEs' %
                       (nReps, len(seFiles)))
        checkSum = {}
        lfnNotExisting = {}
        lfnNoInfo = {}
        logLevel = gLogger.getLevel()
        gLogger.setLevel('FATAL')
        for num, se in enumerate(sorted(seFiles)):
            self.__write('\n%d. At %s (%d files): ' %
                         (num, se, len(seFiles[se])))
            oSe = StorageElement(se)
            notFound = 0
            for surlChunk in breakListIntoChunks(seFiles[se], chunkSize):
                self.__write('.')
                metadata = oSe.getFileMetadata(surlChunk)
                if not metadata['OK']:
                    gLogger.error(
                        "Error: getFileMetadata returns %s. Ignore those replicas"
                        % (metadata['Message']))
                    # Remove from list of replicas as we don't know whether it is OK or not
                    for lfn in seFiles[se]:
                        lfnNoInfo.setdefault(lfn, []).append(se)
                else:
                    metadata = metadata['Value']
                    notFound += len(metadata['Failed'])
                    for lfn in metadata['Failed']:
                        lfnNotExisting.setdefault(lfn, []).append(se)
                    for lfn in metadata['Successful']:
                        checkSum.setdefault(
                            lfn,
                            {})[se] = metadata['Successful'][lfn]['Checksum']
            if notFound:
                gLogger.error('%d files not found' % notFound)

        gLogger.setLevel(logLevel)

        gLogger.notice('Verifying checksum of %d files' % len(replicas))
        for lfn in replicas:
            # get the lfn checksum from the LFC
            replicaDict = replicas[lfn]
            oneGoodReplica = False
            allGoodReplicas = True
            lfcChecksum = csDict[lfn].pop('LFCChecksum')
            for se in replicaDict:
                # If replica doesn't exist skip check
                if se in lfnNotExisting.get(lfn, []):
                    allGoodReplicas = False
                    continue
                if se in lfnNoInfo.get(lfn, []):
                    # If there is no info, a priori it could be good
                    oneGoodReplica = True
                    continue
                # get the surls metadata and compare the checksum
                surlChecksum = checkSum.get(lfn, {}).get(se, '')
                if not surlChecksum or not compareAdler(
                        lfcChecksum, surlChecksum):
                    # if lfcChecksum does not match surlChecksum
                    csDict[lfn][se] = {'PFNChecksum': surlChecksum}
                    gLogger.info(
                        "ERROR!! checksum mismatch at %s for LFN %s:  LFC checksum: %s , PFN checksum : %s "
                        % (se, lfn, lfcChecksum, surlChecksum))
                    allGoodReplicas = False
                else:
                    oneGoodReplica = True
            if not oneGoodReplica:
                if lfn in lfnNotExisting:
                    gLogger.info("=> All replicas are missing", lfn)
                    retDict['MissingAllReplicas'][lfn] = 'All'
                else:
                    gLogger.info("=> All replicas have bad checksum", lfn)
                    retDict['AllReplicasCorrupted'][lfn] = csDict[lfn]
            elif not allGoodReplicas:
                if lfn in lfnNotExisting:
                    gLogger.info("=> At least one replica missing", lfn)
                    retDict['MissingReplica'][lfn] = lfnNotExisting[lfn]
                else:
                    gLogger.info("=> At least one replica with good Checksum",
                                 lfn)
                    retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn]

        return S_OK(retDict)

    ################################################################################
    # properties

    def set_prod(self, value):
        """ Setter """
        if value:
            value = int(value)
            res = self.transClient.getTransformation(value, extraParams=False)
            if not res['OK']:
                S_ERROR(
                    errno.ENOENT, "Couldn't find transformation %d: %s" %
                    (value, res['Message']))
            else:
                self.transType = res['Value']['Type']
            if self.interactive:
                gLogger.info("Production %d has type %s" %
                             (value, self.transType))
        else:
            value = 0
        self._prod = value

    def get_prod(self):
        """ Getter """
        return self._prod

    prod = property(get_prod, set_prod)

    def set_fileType(self, value):
        """ Setter """
        self._fileType = [ft.upper() for ft in value]

    def get_fileType(self):
        """ Getter """
        return self._fileType

    fileType = property(get_fileType, set_fileType)

    def set_fileTypesExcluded(self, value):
        """ Setter """
        self._fileTypesExcluded = [ft.upper() for ft in value]

    def get_fileTypesExcluded(self):
        """ Getter """
        return self._fileTypesExcluded

    fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded)

    def set_lfns(self, value):
        """ Setter """
        if isinstance(value, basestring):
            value = [value]
        value = [v.replace(' ', '').replace('//', '/') for v in value]
        self._lfns = value

    def get_lfns(self):
        """ Getter """
        return self._lfns

    lfns = property(get_lfns, set_lfns)

    ###############################################################################################
    #
    #  This part was backported from DataIntegrityClient
    #
    #
    #  This section contains the specific methods for File Catalog->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog
        for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)
        if isinstance(lfnDir, basestring):
            lfnDir = [lfnDir]
        res = self._getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)
        if isinstance(lfns, basestring):
            lfns = [lfns]
        res = self._getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value']
        res = self._getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas, _zeroReplicaFiles = res['Value']
        res = self.checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """

        #FIXME: we better use the compareChecksum function instead of this one! or maybe directly checkFC2SE

        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        seLfns = {}
        for lfn, replicaDict in replicas.iteritems():
            for se, _url in replicaDict.iteritems():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sorted(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].iteritems():
                if lfn in catalogMetadata:
                    if metadata['Size'] != catalogMetadata[lfn][
                            'Size']:  # and ( metadata['Size'] != 0 ):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.dic.reportProblematicReplicas(sizeMismatch, se,
                                                   'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].iteritems():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.dic.reportProblematicReplicas(missingReplicas, se,
                                               'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.iteritems():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if not lfnMetadata['Size']:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.dic.reportProblematicReplicas(unavailableReplicas, se,
                                               'PFNUnavailable')
        if zeroSizeReplicas:
            self.dic.reportProblematicReplicas(zeroSizeReplicas, se,
                                               'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->File Catalog checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories
        and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->FC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if isinstance(lfnDir, basestring):
            lfnDir = [lfnDir]
        res = self.getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.dic.reportProblematicReplicas(notRegisteredLfns,
                                               storageElement,
                                               'LFNNotRegistered')
        if failedLfns:
            return S_ERROR(errno.ENOENT, 'Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self._getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.iteritems():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.dic.reportProblematicReplicas(sizeMismatch, storageElement,
                                               'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].iteritems():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR(errno.ENOENT,
                           'Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(errno.ENOENT,
                               res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].iteritems():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.iteritems():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if not metadata['Size']:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.dic.reportProblematicReplicas(zeroSizeFiles, storageElement,
                                               'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def _getCatalogDirectoryContents(self, lfnDirs):
        """ Obtain the contents of the supplied directory, recursively
    """
        def _getDirectoryContent(directory):
            """ Inner function: recursively scan a directory, returns list of LFNs
      """
            filesInDirectory = {}

            gLogger.debug("Examining %s" % directory)

            res = self.fc.listDirectory(directory)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            if directory in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory content',
                    '%s %s' % (directory, res['Value']['Failed'][directory]))
                return S_ERROR('Failed to get directory content')
            if directory not in res['Value']['Successful']:
                return S_ERROR('Directory not existing?')

            # first, adding the files found in the current directory
            gLogger.debug(
                "Files in %s: %d" %
                (directory, len(
                    res['Value']['Successful'][directory]['Files'])))
            filesInDirectory.update(
                res['Value']['Successful'][directory]['Files'])

            #then, looking for subDirectories content
            if res['Value']['Successful'][directory]['SubDirs']:
                for l_dir in res['Value']['Successful'][directory]['SubDirs']:
                    #recursion here
                    subDirContent = _getDirectoryContent(l_dir)
                    if not subDirContent['OK']:
                        return subDirContent
                    else:
                        filesInDirectory.update(subDirContent['Value'])

            return S_OK(filesInDirectory)

        gLogger.info('Obtaining the catalog contents for %d directories' %
                     len(lfnDirs))

        allFiles = {}
        for lfnDir in lfnDirs:
            dirContent = _getDirectoryContent(lfnDir)
            if not dirContent['OK']:
                return dirContent
            else:
                gLogger.debug("Content of directory %s: %d files" %
                              (lfnDir, len(dirContent['Value'])))
                allFiles.update(dirContent['Value'])

        gLogger.debug("Content of directories examined: %d files" %
                      len(allFiles))

        replicas = self.fc.getReplicas(list(allFiles))
        if not replicas['OK']:
            return replicas
        if replicas['Value']['Failed']:
            return S_ERROR("Failures in replicas discovery")

        return S_OK({
            'Metadata': allFiles,
            'Replicas': replicas['Value']['Successful']
        })

    def _getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].iteritems():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK((allReplicas, zeroReplicaFiles))

    def _getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].iteritems():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
    Script.setUsageMessage("%s -p 2145 -t 200" % Script.scriptName)
    
if __name__=='__main__':
  clip = Params()
  clip.registerSwitches()
  Script.parseCommandLine()
  
  from DIRAC import gLogger, exit as dexit
  
  if not clip.prod or not clip.tasks:
    gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
    dexit(1)
    
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()
  res = tc.getTransformation(clip.prod)
  trans= res['Value']
  transp = trans['Plugin']
  if transp != 'Limited':
    gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin")
    dexit(0)
  
  gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) )
  if clip.tasks >0:
    max_tasks = trans['MaxNumberOfTasks'] + clip.tasks  
    groupsize = trans['GroupSize']
    gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod))
  elif clip.tasks <0:
    max_tasks = -1
    gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod)
  else:
Example #17
0
class ConsistencyInspector(object):
    """A class for handling some consistency checks"""
    def __init__(self,
                 interactive=True,
                 transClient=None,
                 dm=None,
                 fc=None,
                 dic=None):
        """c'tor
        interactive: Data Manager (True) or DIRAC Agente (False)
        transClient: TransformationClient() if None, else transClient params
        dm: DataManager() if None, else dm params
        fc: FileCatalog() if None, else fc params
        One object for every production/directoriesList...
        """
        self.interactive = interactive
        self.transClient = TransformationClient(
        ) if transClient is None else transClient
        self.dataManager = dm if dm else DataManager()
        self.fileCatalog = fc if fc else FileCatalog()
        self.dic = dic if dic else DataIntegrityClient()
        self.dirac = Dirac()

        # Base elements from which to start the consistency checks
        self._prod = 0
        self._bkQuery = None
        self._fileType = []
        self._fileTypesExcluded = []
        self._lfns = []
        self.directories = []

        # Accessory elements
        self.runsList = []
        self.runStatus = None
        self.fromProd = None
        self.transType = ""
        self.cachedReplicas = {}

        self.prcdWithDesc = []
        self.prcdWithoutDesc = []
        self.prcdWithMultDesc = []
        self.nonPrcdWithDesc = []
        self.nonPrcdWithoutDesc = []
        self.nonPrcdWithMultDesc = []
        self.descForPrcdLFNs = []
        self.descForNonPrcdLFNs = []
        self.removedFiles = []

        self.absentLFNsInFC = []
        self.existLFNsNoSE = {}
        self.existLFNsBadReplicas = {}
        self.existLFNsBadFiles = {}
        self.existLFNsNotExisting = {}
        self.commonAncestors = {}
        self.multipleDescendants = {}
        self.ancestors = {}

        self._verbose = False

    def __logVerbose(self, msg, msg1=""):
        """logger helper for verbose information"""
        if self._verbose:
            newMsg = "[ConsistencyChecks] " + (
                "[%s] " % str(self.prod)) if self.prod else ""
            # Add that prefix to all lines of the message
            newMsg1 = msg1.replace("\n", "\n" + newMsg)
            newMsg += msg.replace("\n", "\n" + newMsg)
            gLogger.notice(newMsg, newMsg1)
        else:
            gLogger.verbose(msg, msg1)

    ##########################################################################

    def checkFC2SE(self):
        """check files vs SE information"""
        repDict = self.compareChecksum(self.lfns)
        self.existLFNsNoSE = repDict["MissingReplica"]
        self.existLFNsNotExisting = repDict["MissingAllReplicas"]
        self.existLFNsBadReplicas = repDict["SomeReplicasCorrupted"]
        self.existLFNsBadFiles = repDict["AllReplicasCorrupted"]

    def getReplicasPresence(self, lfns):
        """get the replicas using the standard FileCatalog.getReplicas()"""
        present = set()
        notPresent = set()

        chunkSize = 100
        printProgress = len(lfns) > chunkSize
        startTime = time.time()
        self.__write(
            "Checking replicas for %d files%s" %
            (len(lfns),
             (" (chunks of %d)" % chunkSize) if printProgress else "... "))
        for chunk in breakListIntoChunks(lfns, chunkSize):
            if printProgress:
                self.__write(".")
            for _ in range(1, 10):
                res = self.fileCatalog.getReplicas(chunk)
                if res["OK"]:
                    present.update(res["Value"]["Successful"])
                    self.cachedReplicas.update(res["Value"]["Successful"])
                    notPresent.update(res["Value"]["Failed"])
                    break
                else:
                    time.sleep(0.1)
        self.__write(" (%.1f seconds)\n" % (time.time() - startTime))

        if notPresent:
            self.__logVerbose("Files without replicas:",
                              "\n".join([""] + sorted(notPresent)))
        return list(present), list(notPresent)

    ##########################################################################

    def getReplicasPresenceFromDirectoryScan(self, lfns):
        """Get replicas scanning the directories. Might be faster."""

        dirs = {}
        present = []
        notPresent = []
        compare = True

        for lfn in lfns:
            dirN = os.path.dirname(lfn)
            if lfn == dirN + "/":
                compare = False
            dirs.setdefault(dirN, []).append(lfn)

        if compare:
            self.__write(
                "Checking File Catalog for %d files from %d directories " %
                (len(lfns), len(dirs)))
        else:
            self.__write("Getting files from %d directories " % len(dirs))
        startTime = time.time()

        for dirN in sorted(dirs):
            startTime1 = time.time()
            self.__write(".")
            lfnsFound = self._getFilesFromDirectoryScan(dirN)
            gLogger.verbose("Obtained %d files in %.1f seconds" %
                            (len(lfnsFound), time.time() - startTime1))
            if compare:
                pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound)
                notPresent += notPr
                present += pr
            else:
                present += lfnsFound

        self.__write(" (%.1f seconds)\n" % (time.time() - startTime))
        gLogger.info("Found %d files with replicas and %d without" %
                     (len(present), len(notPresent)))
        return present, notPresent

    ##########################################################################

    def __compareLFNLists(self, lfns, lfnsFound):
        """return files in both lists and files in lfns and not in lfnsFound"""
        present = []
        notPresent = lfns
        startTime = time.time()
        self.__logVerbose("Comparing list of %d LFNs with second list of %d" %
                          (len(lfns), len(lfnsFound)))
        if lfnsFound:
            setLfns = set(lfns)
            setLfnsFound = set(lfnsFound)
            present = list(setLfns & setLfnsFound)
            notPresent = list(setLfns - setLfnsFound)
        self.__logVerbose("End of comparison: %.1f seconds" %
                          (time.time() - startTime))
        return present, notPresent

    def _getFilesFromDirectoryScan(self, dirs):
        """calls dm.getFilesFromDirectory"""

        level = gLogger.getLevel()
        gLogger.setLevel("FATAL")
        res = self.dataManager.getFilesFromDirectory(dirs)
        gLogger.setLevel(level)
        if not res["OK"]:
            if "No such file or directory" not in res["Message"]:
                gLogger.error(
                    "Error getting files from directories %s:" % dirs,
                    res["Message"])
            return []
        if res["Value"]:
            lfnsFound = res["Value"]
        else:
            lfnsFound = []

        return lfnsFound

    ##########################################################################

    def _getTSFiles(self):
        """Helper function - get files from the TS"""

        selectDict = {"TransformationID": self.prod}
        if self._lfns:
            selectDict["LFN"] = self._lfns
        elif self.runStatus and self.fromProd:
            res = self.transClient.getTransformationRuns({
                "TransformationID":
                self.fromProd,
                "Status":
                self.runStatus
            })
            if not res["OK"]:
                gLogger.error("Failed to get runs for transformation %d" %
                              self.prod)
            else:
                if res["Value"]:
                    self.runsList.extend([
                        run["RunNumber"] for run in res["Value"]
                        if run["RunNumber"] not in self.runsList
                    ])
                    gLogger.notice("%d runs selected" % len(res["Value"]))
                elif not self.runsList:
                    gLogger.notice("No runs selected, check completed")
                    DIRAC.exit(0)
        if not self._lfns and self.runsList:
            selectDict["RunNumber"] = self.runsList

        res = self.transClient.getTransformation(self.prod)
        if not res["OK"]:
            gLogger.error("Failed to find transformation %s" % self.prod)
            return [], [], []
        status = res["Value"]["Status"]
        if status not in ("Active", "Stopped", "Completed", "Idle"):
            gLogger.notice(
                "Transformation %s in status %s, will not check if files are processed"
                % (self.prod, status))
            processedLFNs = []
            nonProcessedLFNs = []
            nonProcessedStatuses = []
            if self._lfns:
                processedLFNs = self._lfns
        else:
            res = self.transClient.getTransformationFiles(selectDict)
            if not res["OK"]:
                gLogger.error(
                    "Failed to get files for transformation %d" % self.prod,
                    res["Message"])
                return [], [], []
            else:
                processedLFNs = [
                    item["LFN"] for item in res["Value"]
                    if item["Status"] == "Processed"
                ]
                nonProcessedLFNs = [
                    item["LFN"] for item in res["Value"]
                    if item["Status"] != "Processed"
                ]
                nonProcessedStatuses = list(
                    set(item["Status"] for item in res["Value"]
                        if item["Status"] != "Processed"))

        return processedLFNs, nonProcessedLFNs, nonProcessedStatuses

    def __getDirectories(self):
        """get the directories where to look into (they are either given, or taken from the transformation ID"""
        if self.directories:
            directories = []
            printout = False
            for directory in self.directories:
                if not directory.endswith("..."):
                    directories.append(directory)
                else:
                    printout = True
                    topDir = os.path.dirname(directory)
                    res = self.fileCatalog.listDirectory(topDir)
                    if not res["OK"]:
                        # DError(errno.ENOENT, res['Message'] )
                        return S_ERROR(errno.ENOENT, res["Message"])
                    else:
                        matchDir = directory.split("...")[0]
                        directories += [
                            d for d in res["Value"]["Successful"].get(
                                topDir, {}).get("SubDirs", [])
                            if d.startswith(matchDir)
                        ]
            if printout:
                gLogger.always("Expanded list of %d directories:\n%s" %
                               (len(directories), "\n".join(directories)))
            return directories
        else:
            return S_ERROR(errno.ENOENT, "Need to specify the directories")

    ##########################################################################

    def __write(self, text):
        if self.interactive:
            sys.stdout.write(text)
            sys.stdout.flush()

    ##########################################################################

    def _selectByFileType(self,
                          lfnDict,
                          fileTypes=None,
                          fileTypesExcluded=None):
        """Select only those files from the values of lfnDict that have a certain type"""
        if not lfnDict:
            return {}
        if not fileTypes:
            fileTypes = self.fileType
        if not fileTypesExcluded:
            fileTypesExcluded = self.fileTypesExcluded
        else:
            fileTypesExcluded += [
                ft for ft in self.fileTypesExcluded
                if ft not in fileTypesExcluded
            ]
        # lfnDict is a dictionary of dictionaries including the metadata, create a
        # deep copy to get modified
        ancDict = dict(lfnDict)
        if fileTypes == [""]:
            fileTypes = []
        # and loop on the original dictionaries
        for ancestor in lfnDict:
            for desc in list(lfnDict[ancestor]):
                ft = lfnDict[ancestor][desc]["FileType"]
                if ft in fileTypesExcluded or (fileTypes
                                               and ft not in fileTypes):
                    ancDict[ancestor].pop(desc)
            if not len(ancDict[ancestor]):
                ancDict.pop(ancestor)
        return ancDict

    @staticmethod
    def _getFileTypesCount(lfnDict):
        """return file types count"""
        ft_dict = {}
        for ancestor in lfnDict:
            t_dict = {}
            for desc in lfnDict[ancestor]:
                ft = lfnDict[ancestor][desc]["FileType"]
                t_dict[ft] = t_dict.setdefault(ft, 0) + 1
            ft_dict[ancestor] = t_dict

        return ft_dict

    def __getLFNsFromFC(self):
        """Check if a list of LFNs is in the FC or not"""
        if not self.lfns:
            directories = []
            for dirName in self.__getDirectories():
                if not dirName.endswith("/"):
                    dirName += "/"
                directories.append(dirName)
            present, notPresent = self.getReplicasPresenceFromDirectoryScan(
                directories)
        else:
            present, notPresent = self.getReplicasPresence(self.lfns)
        return present, notPresent

    def compareChecksum(self, lfns):
        """compare the checksum of the file in the FC and the checksum of the physical replicas.
        Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with
        files with all replicas corrupted, and one with files with some replicas corrupted and at least
        one good replica
        """
        retDict = {
            "AllReplicasCorrupted": {},
            "SomeReplicasCorrupted": {},
            "MissingReplica": {},
            "MissingAllReplicas": {},
            "NoReplicas": {},
        }

        chunkSize = 100
        replicas = {}
        setLfns = set(lfns)
        cachedLfns = setLfns & set(self.cachedReplicas)
        for lfn in cachedLfns:
            replicas[lfn] = self.cachedReplicas[lfn]
        lfnsLeft = list(setLfns - cachedLfns)
        if lfnsLeft:
            self.__write("Get replicas for %d files (chunks of %d): " %
                         (len(lfnsLeft), chunkSize))
            for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize):
                self.__write(".")
                replicasRes = self.fileCatalog.getReplicas(lfnChunk)
                if not replicasRes["OK"]:
                    gLogger.error("error:  %s" % replicasRes["Message"])
                    return S_ERROR(errno.ENOENT,
                                   "error:  %s" % replicasRes["Message"])
                replicasRes = replicasRes["Value"]
                if replicasRes["Failed"]:
                    retDict["NoReplicas"].update(replicasRes["Failed"])
                replicas.update(replicasRes["Successful"])

        self.__write("Get FC metadata for %d files to be checked: " %
                     len(lfns))
        metadata = {}
        for lfnChunk in breakListIntoChunks(replicas, chunkSize):
            self.__write(".")
            res = self.fileCatalog.getFileMetadata(lfnChunk)
            if not res["OK"]:
                return S_ERROR(errno.ENOENT, "error %s" % res["Message"])
            metadata.update(res["Value"]["Successful"])

        gLogger.notice("Check existence and compare checksum file by file...")
        csDict = {}
        seFiles = {}
        # Reverse the LFN->SE dictionary
        nReps = 0
        for lfn in replicas:
            csDict.setdefault(lfn, {})["FCChecksum"] = metadata.get(
                lfn, {}).get("Checksum")
            for se in replicas[lfn]:
                seFiles.setdefault(se, []).append(lfn)
                nReps += 1

        gLogger.notice("Getting checksum of %d replicas in %d SEs" %
                       (nReps, len(seFiles)))
        checkSum = {}
        lfnNotExisting = {}
        lfnNoInfo = {}
        logLevel = gLogger.getLevel()
        gLogger.setLevel("FATAL")
        for num, se in enumerate(sorted(seFiles)):
            self.__write("\n%d. At %s (%d files): " %
                         (num, se, len(seFiles[se])))
            oSe = StorageElement(se)
            notFound = 0
            for surlChunk in breakListIntoChunks(seFiles[se], chunkSize):
                self.__write(".")
                metadata = oSe.getFileMetadata(surlChunk)
                if not metadata["OK"]:
                    gLogger.error(
                        "Error: getFileMetadata returns %s. Ignore those replicas"
                        % (metadata["Message"]))
                    # Remove from list of replicas as we don't know whether it is OK or
                    # not
                    for lfn in seFiles[se]:
                        lfnNoInfo.setdefault(lfn, []).append(se)
                else:
                    metadata = metadata["Value"]
                    notFound += len(metadata["Failed"])
                    for lfn in metadata["Failed"]:
                        lfnNotExisting.setdefault(lfn, []).append(se)
                    for lfn in metadata["Successful"]:
                        checkSum.setdefault(
                            lfn,
                            {})[se] = metadata["Successful"][lfn]["Checksum"]
            if notFound:
                gLogger.error("%d files not found" % notFound)

        gLogger.setLevel(logLevel)

        gLogger.notice("Verifying checksum of %d files" % len(replicas))
        for lfn in replicas:
            # get the lfn checksum from the FC
            replicaDict = replicas[lfn]
            oneGoodReplica = False
            allGoodReplicas = True
            fcChecksum = csDict[lfn].pop("FCChecksum")
            for se in replicaDict:
                # If replica doesn't exist skip check
                if se in lfnNotExisting.get(lfn, []):
                    allGoodReplicas = False
                    continue
                if se in lfnNoInfo.get(lfn, []):
                    # If there is no info, a priori it could be good
                    oneGoodReplica = True
                    continue
                # get the surls metadata and compare the checksum
                surlChecksum = checkSum.get(lfn, {}).get(se, "")
                if not surlChecksum or not compareAdler(
                        fcChecksum, surlChecksum):
                    # if fcChecksum does not match surlChecksum
                    csDict[lfn][se] = {"PFNChecksum": surlChecksum}
                    gLogger.info(
                        "ERROR!! checksum mismatch at %s for LFN %s:  FC checksum: %s , PFN checksum : %s "
                        % (se, lfn, fcChecksum, surlChecksum))
                    allGoodReplicas = False
                else:
                    oneGoodReplica = True
            if not oneGoodReplica:
                if lfn in lfnNotExisting:
                    gLogger.info("=> All replicas are missing", lfn)
                    retDict["MissingAllReplicas"][lfn] = "All"
                else:
                    gLogger.info("=> All replicas have bad checksum", lfn)
                    retDict["AllReplicasCorrupted"][lfn] = csDict[lfn]
            elif not allGoodReplicas:
                if lfn in lfnNotExisting:
                    gLogger.info("=> At least one replica missing", lfn)
                    retDict["MissingReplica"][lfn] = lfnNotExisting[lfn]
                else:
                    gLogger.info("=> At least one replica with good Checksum",
                                 lfn)
                    retDict["SomeReplicasCorrupted"][lfn] = csDict[lfn]

        return S_OK(retDict)

    ##########################################################################
    # properties

    def set_prod(self, value):
        """Setter"""
        if value:
            value = int(value)
            res = self.transClient.getTransformation(value, extraParams=False)
            if not res["OK"]:
                raise Exception("Couldn't find transformation %d: %s" %
                                (value, res["Message"]))
            else:
                self.transType = res["Value"]["Type"]
            if self.interactive:
                gLogger.info("Production %d has type %s" %
                             (value, self.transType))
        else:
            value = 0
        self._prod = value

    def get_prod(self):
        """Getter"""
        return self._prod

    prod = property(get_prod, set_prod)

    def set_fileType(self, value):
        """Setter"""
        self._fileType = [ft.upper() for ft in value]

    def get_fileType(self):
        """Getter"""
        return self._fileType

    fileType = property(get_fileType, set_fileType)

    def set_fileTypesExcluded(self, value):
        """Setter"""
        self._fileTypesExcluded = [ft.upper() for ft in value]

    def get_fileTypesExcluded(self):
        """Getter"""
        return self._fileTypesExcluded

    fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded)

    def set_lfns(self, value):
        """Setter"""
        if isinstance(value, six.string_types):
            value = [value]
        value = [v.replace(" ", "").replace("//", "/") for v in value]
        self._lfns = value

    def get_lfns(self):
        """Getter"""
        return self._lfns

    lfns = property(get_lfns, set_lfns)

    ##########################################################################
    #
    #  This part was backported from DataIntegrityClient
    #
    #
    #  This section contains the specific methods for File Catalog->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """This obtains the replica and metadata information from the catalog
        for the supplied directory and checks against the storage elements.
        """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)
        if isinstance(lfnDir, six.string_types):
            lfnDir = [lfnDir]
        res = self._getCatalogDirectoryContents(lfnDir)
        if not res["OK"]:
            return res
        replicas = res["Value"]["Replicas"]
        catalogMetadata = res["Value"]["Metadata"]
        res = self.checkPhysicalFiles(replicas, catalogMetadata)
        if not res["OK"]:
            return res
        resDict = {
            "CatalogMetadata": catalogMetadata,
            "CatalogReplicas": replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """This obtains the replica and metadata information from the catalog and checks against the storage elements."""
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)
        if isinstance(lfns, six.string_types):
            lfns = [lfns]
        res = self._getCatalogMetadata(lfns)
        if not res["OK"]:
            return res
        catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res["Value"]
        res = self._getCatalogReplicas(list(catalogMetadata))
        if not res["OK"]:
            return res
        replicas, _zeroReplicaFiles = res["Value"]
        res = self.checkPhysicalFiles(replicas, catalogMetadata)
        if not res["OK"]:
            return res
        resDict = {
            "CatalogMetadata": catalogMetadata,
            "CatalogReplicas": replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None):
        """This method takes the supplied replica and metadata information obtained
        from the catalog and checks against the storage elements.
        """

        # FIXME: we better use the compareChecksum function instead of this one!
        # or maybe directly checkFC2SE

        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info("%s %s" %
                     ("Storage Element".ljust(20), "Replicas".rjust(20)))

        for se in sorted(seLfns):
            files = len(seLfns[se])
            gLogger.info("%s %s" % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res["OK"]:
                gLogger.error("Failed to get physical file metadata.",
                              res["Message"])
                return res
            for lfn, metadata in res["Value"].items():
                if lfn in catalogMetadata:
                    # and ( metadata['Size'] != 0 ):
                    if metadata["Size"] != catalogMetadata[lfn]["Size"]:
                        sizeMismatch.append((lfn, "deprecatedUrl", se,
                                             "CatalogPFNSizeMismatch"))
            if sizeMismatch:
                self.dic.reportProblematicReplicas(sizeMismatch, se,
                                                   "CatalogPFNSizeMismatch")
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """Check obtain the physical file metadata and check the files are available"""
        gLogger.info("Checking the integrity of %s physical files at %s" %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res["OK"]:
            gLogger.error("Failed to get metadata for lfns.", res["Message"])
            return res
        pfnMetadata = res["Value"]["Successful"]
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res["Value"]["Failed"].items():
            if re.search("File does not exist", reason):
                missingReplicas.append(
                    (lfn, "deprecatedUrl", se, "PFNMissing"))
        if missingReplicas:
            self.dic.reportProblematicReplicas(missingReplicas, se,
                                               "PFNMissing")
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, metadata in pfnMetadata.items():
            if metadata.get("Lost"):
                lostReplicas.append((lfn, "deprecatedUrl", se, "PFNLost"))
            if metadata.get("Unavailable") or not metadata["Accessible"]:
                unavailableReplicas.append(
                    (lfn, "deprecatedUrl", se, "PFNUnavailable"))
            if not metadata["Size"]:
                zeroSizeReplicas.append(
                    (lfn, "deprecatedUrl", se, "PFNZeroSize"))
        if lostReplicas:
            self.dic.reportProblematicReplicas(lostReplicas, se, "PFNLost")
        if unavailableReplicas:
            self.dic.reportProblematicReplicas(unavailableReplicas, se,
                                               "PFNUnavailable")
        if zeroSizeReplicas:
            self.dic.reportProblematicReplicas(zeroSizeReplicas, se,
                                               "PFNZeroSize")
        gLogger.info(
            "Checking the integrity of physical files at %s complete" % se)
        return S_OK(pfnMetadata)

    ##########################################################################
    #
    # This section contains the specific methods for SE->File Catalog checks
    #

    def _getCatalogDirectoryContents(self, lfnDirs):
        """Obtain the contents of the supplied directory, recursively"""
        def _getDirectoryContent(directory):
            """Inner function: recursively scan a directory, returns list of LFNs"""
            filesInDirectory = {}

            gLogger.debug("Examining %s" % directory)

            res = self.fileCatalog.listDirectory(directory)
            if not res["OK"]:
                gLogger.error("Failed to get directory contents",
                              res["Message"])
                return res
            if directory in res["Value"]["Failed"]:
                gLogger.error(
                    "Failed to get directory content",
                    "%s %s" % (directory, res["Value"]["Failed"][directory]))
                return S_ERROR("Failed to get directory content")
            if directory not in res["Value"]["Successful"]:
                return S_ERROR("Directory not existing?")

            # first, adding the files found in the current directory
            gLogger.debug(
                "Files in %s: %d" %
                (directory, len(
                    res["Value"]["Successful"][directory]["Files"])))
            filesInDirectory.update(
                res["Value"]["Successful"][directory]["Files"])

            # then, looking for subDirectories content
            if res["Value"]["Successful"][directory]["SubDirs"]:
                for l_dir in res["Value"]["Successful"][directory]["SubDirs"]:
                    # recursion here
                    subDirContent = _getDirectoryContent(l_dir)
                    if not subDirContent["OK"]:
                        return subDirContent
                    else:
                        filesInDirectory.update(subDirContent["Value"])

            return S_OK(filesInDirectory)

        gLogger.info("Obtaining the catalog contents for %d directories" %
                     len(lfnDirs))

        allFiles = {}
        for lfnDir in lfnDirs:
            dirContent = _getDirectoryContent(lfnDir)
            if not dirContent["OK"]:
                return dirContent
            else:
                gLogger.debug("Content of directory %s: %d files" %
                              (lfnDir, len(dirContent["Value"])))
                allFiles.update(dirContent["Value"])

        gLogger.debug("Content of directories examined: %d files" %
                      len(allFiles))

        replicas = self.fileCatalog.getReplicas(list(allFiles))
        if not replicas["OK"]:
            return replicas
        if replicas["Value"]["Failed"]:
            return S_ERROR("Failures in replicas discovery")

        return S_OK({
            "Metadata": allFiles,
            "Replicas": replicas["Value"]["Successful"]
        })

    def _getCatalogReplicas(self, lfns):
        """Obtain the file replicas from the catalog while checking that there are replicas"""
        if not lfns:
            return S_OK(([], []))

        gLogger.info("Obtaining the replicas for %s files" % len(lfns))
        zeroReplicaFiles = []
        res = self.fileCatalog.getReplicas(lfns, allStatus=True)
        if not res["OK"]:
            gLogger.error("Failed to get catalog replicas", res["Message"])
            return res
        allReplicas = res["Value"]["Successful"]
        for lfn, error in res["Value"]["Failed"].items():
            if re.search("File has zero replicas", error):
                zeroReplicaFiles.append(lfn)
        gLogger.info("Obtaining the replicas for files complete")
        return S_OK((allReplicas, zeroReplicaFiles))

    def _getCatalogMetadata(self, lfns):
        """Obtain the file metadata from the catalog while checking they exist"""
        allMetadata = []
        missingCatalogFiles = []
        zeroSizeFiles = []

        if not lfns:
            return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
        gLogger.info("Obtaining the catalog metadata for %s files" % len(lfns))

        res = self.fileCatalog.getFileMetadata(lfns)
        if not res["OK"]:
            gLogger.error("Failed to get catalog metadata", res["Message"])
            return res
        allMetadata = res["Value"]["Successful"]
        for lfn, error in res["Value"]["Failed"].items():
            if re.search("No such file or directory", error):
                missingCatalogFiles.append(lfn)
        gLogger.info("Obtaining the catalog metadata complete")
        return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
Example #18
0
class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)
Example #19
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                              ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                             orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                             orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                         orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author   = ""
    res = getProxyInfo()
    if res['OK']:
      author   = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " %res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                                     'AgentType', 'TransformationName',
                                                                                                     'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor   = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))
          return S_ERROR("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info("Will list transformations created by user '%s' with status '%s'" %(userName, ', '.join( transStatus )))
    else:
      gLogger.info("Will list transformations created by '%s' with status '%s'" %(authorDN, ', '.join( transStatus )))

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = []):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len(transID)
    paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                      'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                      'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                           'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map(lambda pname: paramValues[ paramNames.index(pname) ], paramShowNames)
          showDict = dict(zip( paramShowNamesShort, paramShowValues ))
          dictList.append( showDict )

      except Exception, x:
        print 'Exception %s ' %str(x)

    if not len(dictList) > 0:
      gLogger.error( 'No found transformations satisfying input condition')
      return S_ERROR( 'No found transformations satisfying input condition')
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )
def _getProductionSummary():
  clip = _Params()
  clip.registerSwitch()
  Script.parseCommandLine()
  from ILCDIRAC.Core.Utilities.HTML                             import Table
  from ILCDIRAC.Core.Utilities.ProcessList                      import ProcessList
  from DIRAC.TransformationSystem.Client.TransformationClient   import TransformationClient
  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  from DIRAC import gConfig, gLogger
  prod = clip.prod
  full_detail = clip.full_det
  fc = FileCatalogClient()
  
  processlist = gConfig.getValue('/LocalSite/ProcessListPath')
  prl = ProcessList(processlist)
  processesdict = prl.getProcessesDict()
  
  trc = TransformationClient()
  prodids = []
  if not prod:
    conddict = {}
    conddict['Status'] = clip.statuses
    if clip.ptypes:
      conddict['Type'] = clip.ptypes
    res = trc.getTransformations( conddict )
    if res['OK']:
      for transfs in res['Value']:
        prodids.append(transfs['TransformationID'])
  else:
    prodids.extend(prod)

  metadata = []
  
  gLogger.info("Will run on prods %s" % str(prodids))
  
  for prodID in prodids:
    if prodID<clip.minprod:
      continue
    meta = {}
    meta['ProdID']=prodID
    res = trc.getTransformation(str(prodID))
    if not res['OK']:
      gLogger.error("Error getting transformation %s" % prodID )
      continue
    prodtype = res['Value']['Type']
    proddetail = res['Value']['Description']
    if prodtype == 'MCReconstruction' or prodtype == 'MCReconstruction_Overlay' :
      meta['Datatype']='DST'
    elif prodtype == 'MCGeneration':
      meta['Datatype']='gen'
    elif prodtype == 'MCSimulation':
      meta['Datatype']='SIM'
    elif prodtype in ['Split','Merge']:
      gLogger.warn("Invalid query for %s productions" % prodtype)
      continue
    else:
      gLogger.error("Unknown production type %s"% prodtype)
      continue
    res = fc.findFilesByMetadata(meta)  
    if not res['OK']:
      gLogger.error(res['Message'])
      continue
    lfns = res['Value']
    nb_files = len(lfns)
    path = ""
    if not len(lfns):
      gLogger.warn("No files found for prod %s" % prodID)
      continue
    path = os.path.dirname(lfns[0])
    res = fc.getDirectoryUserMetadata(path)
    if not res['OK']:
      gLogger.warn('No meta data found for %s' % path)
      continue
    dirmeta = {}
    dirmeta['proddetail'] = proddetail
    dirmeta['prodtype'] = prodtype
    dirmeta['nb_files']=nb_files
    dirmeta.update(res['Value'])
    lumi  = 0.
    nbevts = 0
    addinfo = None
    files = 0
    xsec = 0.0
    if not full_detail:
      lfn  = lfns[0]
      info = _getFileInfo(lfn)
      nbevts = info[1]*len(lfns)
      lumi = info[0]*len(lfns)
      addinfo = info[2]
      if 'xsection' in addinfo:
        if 'sum' in addinfo['xsection']:
          if 'xsection' in addinfo['xsection']['sum']:
            xsec += addinfo['xsection']['sum']['xsection']
            files += 1
    else:
      for lfn in lfns:
        info = _getFileInfo(lfn)
        lumi += info[0]
        nbevts += info[1]
        addinfo = info[2]
        if 'xsection' in addinfo:
          if 'sum' in addinfo['xsection']:
            if 'xsection' in addinfo['xsection']['sum']:
              xsec += addinfo['xsection']['sum']['xsection']
              files += 1
    if not lumi:
      xsec = 0
      files = 0
      depthDict = {}  
      depSet = set()  
      res = fc.getFileAncestors(lfns,[1,2,3,4])
      temp_ancestorlist = []
      if res['OK']:
        for lfn,ancestorsDict in res['Value']['Successful'].items():
          for ancestor,dep in ancestorsDict.items():
            depthDict.setdefault(dep,[])
            if ancestor not in temp_ancestorlist:
              depthDict[dep].append(ancestor)
              depSet.add(dep)
              temp_ancestorlist.append(ancestor)
      depList = list(depSet)
      depList.sort()
      for ancestor in depthDict[depList[-1]]:
        info = _getFileInfo(ancestor)
        lumi += info[0]
        addinfo = info[2]
        if 'xsection' in addinfo:
          if 'sum' in addinfo['xsection']:
            if 'xsection' in addinfo['xsection']['sum']:
              xsec += addinfo['xsection']['sum']['xsection']
              files += 1
    if xsec and files:
      xsec /= files
      dirmeta['CrossSection']=xsec
    else:
      dirmeta['CrossSection']=0.0
          
    if nbevts:
      dirmeta['NumberOfEvents']=nbevts
    #if not lumi:
    #  dirmeta['Luminosity']=0
    #  dirmeta['CrossSection']=0
    #else:
    #  if nbevts:
    #    dirmeta['CrossSection']=nbevts/lumi
    #  else:
    #    dirmeta['CrossSection']=0
    #if addinfo:
    #  if 'xsection' in addinfo:
    #    if 'sum' in addinfo['xsection']:
    #      if 'xsection' in addinfo['xsection']['sum']:
    #        dirmeta['CrossSection']=addinfo['xsection']['sum']['xsection']
    if 'NumberOfEvents' not in dirmeta:
      dirmeta['NumberOfEvents']=0
    #print processesdict[dirmeta['EvtType']]
    dirmeta['detail']=''
    if dirmeta['EvtType'] in processesdict:
      if 'Detail' in processesdict[dirmeta['EvtType']]:
        detail = processesdict[dirmeta['EvtType']]['Detail']
        
    else:
      detail=dirmeta['EvtType']
  
  
    if not prodtype == 'MCGeneration':
      res = trc.getTransformationInputDataQuery(str(prodID))
      if res['OK']:
        if 'ProdID' in res['Value']:
          dirmeta['MomProdID']=res['Value']['ProdID']
    if 'MomProdID' not in dirmeta:
      dirmeta['MomProdID']=0
    dirmeta['detail']= _translate(detail)

    metadata.append(dirmeta)
  
  detectors = {}
  detectors['ILD'] = {}
  corres = {"MCGeneration":'gen',"MCSimulation":'SIM',"MCReconstruction":"REC","MCReconstruction_Overlay":"REC"}
  detectors['ILD']['SIM'] = []
  detectors['ILD']['REC'] = []
  detectors['SID'] = {}
  detectors['SID']['SIM'] = []
  detectors['SID']['REC'] = []
  detectors['sid'] = {}
  detectors['sid']['SIM'] = []
  detectors['sid']['REC'] = []
  detectors['gen']=[]
  for channel in metadata:
    if 'DetectorType'  not in channel:
      detectors['gen'].append((channel['detail'],
                               channel['Energy'],
                               channel['ProdID'],
                               channel['nb_files'],
                               channel['NumberOfEvents']/channel['nb_files'],
                               channel['NumberOfEvents'],
                               channel['CrossSection'],str(channel['proddetail'])))
    else:
      if not channel['DetectorType'] in detectors:
        gLogger.error("This is unknown detector", channel['DetectorType'])
        continue
      detectors[channel['DetectorType']][corres[channel['prodtype']]].append((channel['detail'],
                                                                              channel['Energy'],
                                                                              channel['DetectorType'],
                                                                              channel['ProdID'],
                                                                              channel['nb_files'],
                                                                              channel['NumberOfEvents']/channel['nb_files'],
                                                                              channel['NumberOfEvents'],
                                                                              channel['CrossSection'],
                                                                              channel['MomProdID'],
                                                                              str(channel['proddetail'])))
  
  with open("tables.html","w") as of:
    of.write("""<!DOCTYPE html>
<html>
 <head>
<title> Production summary </title>
</head>
<body>
""")
    if len(detectors['gen']):
      of.write("<h1>gen prods</h1>\n")
      table = Table(header_row = ('Channel', 'Energy','ProdID','Tasks','Average Evts/task','Statistics','Cross Section (fb)','Comment'))
      for item in detectors['gen']:
        table.rows.append( item )
      of.write(str(table))
      gLogger.info("Gen prods")
      gLogger.info(str(table))

    if len(detectors['ILD']):
      of.write("<h1>ILD prods</h1>\n")
      for ptype in detectors['ILD'].keys():
        if len(detectors['ILD'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['ILD'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("ILC CDR prods %s" % ptype)
          gLogger.info(str(table))

    if len(detectors['SID']):
      of.write("<h1>SID prods</h1>\n")
      for ptype in detectors['SID'].keys():
        if len(detectors['SID'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['SID'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("SID CDR prods %s"%ptype)
          gLogger.info(str(table))

    if len(detectors['sid']):
      of.write("<h1>sid dbd prods</h1>\n")
      for ptype in detectors['sid'].keys():
        if len(detectors['sid'][ptype]):
          of.write("<h2>%s</h2>\n"%ptype)
          table = Table(header_row = ('Channel', 'Energy','Detector','ProdID','Number of Files','Events/File','Statistics','Cross Section (fb)','Origin ProdID','Comment'))
          for item in detectors['sid'][ptype]:
            table.rows.append( item )
          of.write(str(table))
          gLogger.info("sid DBD prods %s"%ptype)
          gLogger.info(str(table))

    of.write("""
</body>
</html>
""")
  gLogger.notice("Check ./tables.html in any browser for the results")
  dexit(0)
Example #21
0
def _getInfo():
  """gets info about transformation"""
  clip = _Params()
  clip.registerSwitches()
  Script.parseCommandLine()

  if not clip.prodid and not clip.filename:
    Script.showHelp()
    dexit(1)

  from DIRAC import gLogger
  import os

  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()

  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  fc = FileCatalogClient()
  fmeta = {}
  trans = None
  info = []

  if clip.prodid:
    res = tc.getTransformation(clip.prodid)
    if not res['OK']:
      gLogger.error(res['Message'])
      dexit(1)
    trans = res['Value']
    res = tc.getTransformationInputDataQuery( clip.prodid )
    if res['OK']:
      trans['InputDataQuery'] = res['Value']
    res = tc.getAdditionalParameters ( clip.prodid )
    if res['OK']:
      trans['AddParams'] = res['Value']
    #do something with transf
    res1 = fc.findDirectoriesByMetadata({'ProdID':clip.prodid})
    if res1['OK'] and len(res1['Value'].values()):
      gLogger.verbose("Found %i directory matching the metadata" % len(res1['Value'].values()) )
      for dirs in res1['Value'].values():
        res = fc.getDirectoryUserMetadata(dirs)
        if res['OK']:
          fmeta.update(res['Value'])
        else:
          gLogger.error("Failed to get metadata for %s, SKIPPING" % dirs)
          continue
        res = fc.listDirectory(dirs)
        if not res['OK']:
          continue
        content = res['Value']['Successful'][dirs]
        if content["Files"]:
          for f_ex in content["Files"].keys():
            res = fc.getFileUserMetadata(f_ex)
            if res['OK']:
              fmeta.update(res['Value'])
              break

    #here we have trans and fmeta
    info.append("")
    info.append("Production %s has the following parameters:" % trans['TransformationID'])
    info.extend(_createTransfoInfo(trans))

    if fmeta:
      info.append('The files created by this production have the following metadata:')
      info.extend(_createFileInfo(fmeta))
      info.append("It's possible that some meta data was not brought back,")
      info.append("in particular file level metadata, so check some individual files")

  if clip.filename:
    pid = ""
    if clip.filename.count("/"):
      fpath = os.path.dirname(clip.filename)
      res = fc.getDirectoryUserMetadata(fpath)
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(0)
      fmeta.update(res['Value'])
      res = fc.getFileUserMetadata(clip.filename)
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
      fmeta.update(res['Value'])
      if 'ProdID' in fmeta:
        pid = str(fmeta['ProdID'])
      res = fc.getFileAncestors([clip.filename], 1)
      if res["OK"]:
        for dummy_lfn,ancestorsDict in res['Value']['Successful'].items():
          if ancestorsDict.keys():
            fmeta["Ancestors"] = ancestorsDict.keys()
      res = fc.getFileDescendents([clip.filename], 1)
      if res["OK"]:
        for dummy_lfn,descendDict in res['Value']['Successful'].items():
          if descendDict.keys():
            fmeta['Descendants'] = descendDict.keys()
    else:
      ext = clip.filename.split(".")[-1]
      fitems = []
      for i in clip.filename.split('.')[:-1]:
        fitems.extend(i.split('_'))
      pid = ''
      if ext == 'stdhep':
        pid = fitems[fitems.index('gen')+1]
      if ext == 'slcio':
        if 'rec' in fitems:
          pid = fitems[fitems.index('rec')+1]
        elif 'dst' in fitems:
          pid = fitems[fitems.index('dst')+1]
        elif 'sim' in fitems:
          pid = fitems[fitems.index('sim')+1]
        else:
          gLogger.error("This file does not follow the ILCDIRAC production conventions!")
          gLogger.error("Please specify a prod ID directly or check the file.")
          dexit(0)
      if not pid:
        gLogger.error("This file does not follow the ILCDIRAC production conventions!")
        gLogger.error("Please specify a prod ID directly or check the file.")
        dexit(0)
      #as task follows the prod id, to get it we need
      tid = fitems[fitems.index(pid)+1]
      last_folder = str(int(tid)/1000).zfill(3)
      res = fc.findDirectoriesByMetadata({'ProdID':int(pid)})
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
      dir_ex = res['Value'].values()[0]
      fpath = ""
      if int(dir_ex.split("/")[-1]) == int(pid):
        fpath = dir_ex+last_folder+"/"
      elif int(dir_ex.split("/")[-2]) == int(pid):
        fpath = "/".join(dir_ex.split('/')[:-2])+"/"+pid.zfill(8)+"/"+last_folder+"/"
      else:
        gLogger.error('Path does not follow conventions, will not get file family')

      if fpath:
        fpath += clip.filename
        res = fc.getFileAncestors([fpath], 1)
        if res["OK"]:
          for dummy_lfn,ancestorsDict in res['Value']['Successful'].items():
            fmeta["Ancestors"] = ancestorsDict.keys()
        res = fc.getFileDescendents([fpath], 1)
        if res["OK"]:
          for dummy_lfn,descendDict in res['Value']['Successful'].items():
            fmeta['Descendants'] = descendDict.keys()

      res = fc.getDirectoryUserMetadata(dir_ex)
      if not res['OK']:
        gLogger.error(res['Message'])
      else:
        fmeta.update(res['Value'])
    res = tc.getTransformation(pid)
    if not res['OK']:
      gLogger.error(res['Message'])
      gLogger.error('Will proceed anyway')
    else:
      trans = res['Value']
      res = tc.getTransformationInputDataQuery( pid )
      if res['OK']:
        trans['InputDataQuery'] = res['Value']
      res = tc.getAdditionalParameters ( pid )
      if res['OK']:
        trans['AddParams'] = res['Value']
    info.append("")
    info.append("Input file has the following properties:")
    info.extend(_createFileInfo(fmeta))
    info.append("")
    info.append('It was created with the production %s:' % pid)
    if trans:
      info.extend(_createTransfoInfo(trans))

  gLogger.notice("\n".join(info))

  dexit(0)
Example #22
0
class TransformationCLI( cmd.Cmd, API ):

  def __init__( self ):
    self.server = TransformationClient()
    self.indentSpace = 4
    cmd.Cmd.__init__( self )
    API.__init__( self )

  def printPair( self, key, value, separator = ":" ):
    valueList = value.split( "\n" )
    print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() )
    for valueLine in valueList[ 1:-1 ]:
      print "%s  %s" % ( " " * self.indentSpace, valueLine.strip() )

  def do_exit( self, args ):
    """ Exits the shell.
        usage: exit
    """
    sys.exit( 0 )

  def do_quit( self, *args ):
    """ Exits the shell.
        Usage: quit
    """
    sys.exit( 0 )

  def do_help( self, args ):
    """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commands"""
    cmd.Cmd.do_help( self, args )

  # overriting default help command
  def do_helpall( self, args ):
    """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
    if len( args ) == 0:
      print "\nAvailable commands:\n"
      attrList = dir( self )
      attrList.sort()
      for attribute in attrList:
        if attribute.find( "do_" ) == 0:
          self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] )
          print ""
    else:
      command = args.split()[0].strip()
      try:
        obj = getattr( self, "do_%s" % command )
      except:
        print "There's no such %s command" % command
        return
      self.printPair( command, obj.__doc__[1:] )

  def do_shell( self, args ):
    """Execute a shell command

       usage !<shell_command>
    """
    comm = args
    res = shellCall( 0, comm )
    if res['OK'] and res['Value'][0] == 0:
      _returnCode, stdOut, stdErr = res['Value']
      print "%s\n%s" % ( stdOut, stdErr )
    else:
      print res['Message']

  def check_params( self, args, num ):
    """Checks if the number of parameters correct"""
    argss = args.split()
    length = len( argss )
    if length < num:
      print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num )
      return ( False, length )
    return ( argss, length )

  def check_id_or_name( self, id_or_name ):
    """resolve name or Id by converting type of argument """
    if id_or_name.isdigit():
      return long( id_or_name ) # its look like id
    return id_or_name

  ####################################################################
  #
  # These are the methods for transformation manipulation
  #

  def do_getall( self, args ):
    """Get transformation details

       usage: getall [Status] [Status]
    """
    oTrans = Transformation()
    oTrans.getTransformations( transStatus = args.split(), printOutput = True )

  def do_getAllByUser( self, args ):
    """Get all transformations created by a given user

The first argument is the authorDN or username. The authorDN
is preferred: it need to be inside quotes because contains
white spaces. Only authorDN should be quoted.

When the username is provided instead, 
the authorDN is retrieved from the uploaded proxy,
so that the retrieved transformations are those created by
the user who uploaded that proxy: that user could be different
that the username provided to the function.

       usage: getAllByUser authorDN or username [Status] [Status]
    """
    oTrans = Transformation()
    argss = args.split()
    username = ""
    author = ""
    status = []
    if not len( argss ) > 0:
      print self.do_getAllByUser.__doc__
      return

    # if the user didnt quoted the authorDN ends
    if '=' in argss[0] and argss[0][0] not in ["'", '"']:
      print "AuthorDN need to be quoted (just quote that argument)"
      return

    if argss[0][0] in ["'", '"']: # authorDN given
      author = argss[0]
      status_idx = 1
      for arg in argss[1:]:
        author += ' ' + arg
        status_idx +=1
        if arg[-1] in ["'", '"']:
          break
      # At this point we should have something like 'author'
      if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']:
        print "AuthorDN need to be quoted (just quote that argument)"
        return
      else:
        author = author[1:-1] # throw away the quotes
      # the rest are the requested status
      status = argss[ status_idx: ]
    else: # username given
      username = argss[0]
      status = argss[ 1: ]

    oTrans.getTransformationsByUser( authorDN = author, userName = username, transStatus = status, printOutput = True )

  def do_summaryTransformations( self, args ):
    """Show the summary for a list of Transformations

    Fields starting with 'F' ('J')  refers to files (jobs).
    Proc. stand for processed.

        Usage: summaryTransformations <ProdID> [<ProdID> ...]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print self.do_summaryTransformations.__doc__
      return

    transid = argss
    oTrans = Transformation()
    oTrans.getSummaryTransformations( transID = transid )

  def do_getStatus( self, args ):
    """Get transformation details

       usage: getStatus <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.getTransformation( transName )
      if not res['OK']:
        print "Getting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s: %s" % ( transName, res['Value']['Status'] )

  def do_setStatus( self, args ):
    """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
    argss = args.split()
    if not len( argss ) > 1:
      print "transformation and status not supplied"
      return
    status = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, 'Status', status )
      if not res['OK']:
        print "Setting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s set to %s" % ( transName, status )

  def do_start( self, args ):
    """Start transformation

       usage: start <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Active' )
      if not res['OK']:
        print "Setting Status of %s failed: %s" % ( transName, res['Message'] )
      else:
        res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' )
        if not res['OK']:
          print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] )
        else:
          print "%s started" % transName

  def do_stop( self, args ):
    """Stop transformation

       usage: stop <transID|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' )
      if not res['OK']:
        print "Stopping of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s stopped" % transName

  def do_flush( self, args ):
    """Flush transformation

       usage: flush <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Flush' )
      if not res['OK']:
        print "Flushing of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s flushing" % transName

  def do_get( self, args ):
    """Get transformation definition

    usage: get <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Body' )
      printDict( res['Value'] )

  def do_getBody( self, args ):
    """Get transformation body

    usage: getBody <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      print res['Value']['Body']

  def do_getFileStat( self, args ):
    """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationStats( transName )
    if not res['OK']:
      print "Failed to get statistics for %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Total' )
      printDict( res['Value'] )

  def do_modMask( self, args ):
    """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    mask = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, "FileMask", mask )
      if not res['OK']:
        print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] )
      else:
        print "Updated %s filemask" % transName

  def do_getFiles( self, args ):
    """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    status = argss[1:]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      if status:
        selectDict['Status'] = status
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                      'LFN', 'LFN' )
      else:
        print "No files found"

  def do_getFileStatus( self, args ):
    """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
    argss = args.split()
    if len( argss ) < 2:
      print "transformation and file not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]

    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        filesList = []
        for fileDict in res['Value']:
          if fileDict['LFN'] in lfns:
            filesList.append( fileDict )
        if  filesList:
          self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                        'LFN', 'LFN' )
        else:
          print "Could not find any LFN in", lfns, "for transformation", transName
      else:
        print "No files found"

  def do_getOutputFiles( self, args ):
    """Get output files for the transformation

    usage: getOutputFiles <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      fc = FileCatalog()
      meta = {}
      meta ['ProdID'] = transName
      res = fc.findFilesByMetadata( meta )
      if not res['OK']:
        print res['Message']
        return
      if not len( res['Value'] ) > 0:
        print 'No output files yet for transformation %d' %int(transName)
        return
      else:
        for lfn in res['Value']:
          print lfn

  def do_getInputDataQuery( self, args ):
    """Get input data query for the transformation

    usage: getInputDataQuery <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationInputDataQuery( transName )
    if not res['OK']:
      print "Failed to get transformation input data query: %s" % res['Message']
    else:
      print res['Value']

  def do_setFileStatus( self, args ):
    """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
    argss = args.split()
    if not len( argss ) == 3:
      print "transformation file and status not supplied"
      return
    transName = argss[0]
    lfn = argss[1]
    status = argss[2]
    res = self.server.setFileStatusForTransformation( transName, status, [lfn] )
    if not res['OK']:
      print "Failed to update file status: %s" % res['Message']
    else:
      print "Updated file status to %s" % status

  def do_resetFile( self, args ):
    """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfns>
    """
    argss = args.split()
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if 'Failed' in res['Value']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  def do_resetProcessedFile( self, args ):
    """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
    argss = args.split() 
    
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  ####################################################################
  #
  # These are the methods for file manipulation
  #

  def do_addDirectory( self, args ):
    """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no directory supplied"
      return
    for directory in argss:
      res = self.server.addDirectory( directory, force = True )
      if not res['OK']:
        print 'failed to add directory %s: %s' % ( directory, res['Message'] )
      else:
        print 'added %s files for %s' % ( res['Value'], directory )

  def do_replicas( self, args ):
    """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.getReplicas( argss )
    if not res['OK']:
      print "failed to get any replica information: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to get replica information for %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      ses = sorted( res['Value']['Successful'][lfn].keys() )
      outStr = "%s :" % lfn.ljust( 100 )
      for se in ses:
        outStr = "%s %s" % ( outStr, se.ljust( 15 ) )
      print outStr

  def do_addFile( self, args ):
    """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    lfnDict = {}
    for lfn in argss:
      lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID',
                      'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addFile( lfnDict, force = True )
    if not res['OK']:
      print "failed to add any files: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeFile( self, args ):
    """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.removeFile( argss )
    if not res['OK']:
      print "failed to remove any files: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_addReplica( self, args ):
    """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
    argss = args.split()
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addReplica( lfnDict, force = True )
    if not res['OK']:
      print "failed to add replica: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add replica: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeReplica( self, args ):
    """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
    argss = args.split()
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.removeReplica( lfnDict )
    if not res['OK']:
      print "failed to remove replica: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove replica: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_setReplicaStatus( self, args ):
    """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
    argss = args.split()
    if not len( argss ) > 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    status = argss[1]
    se = argss[2]
    lfnDict = {}
    lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.setReplicaStatus( lfnDict )
    if not res['OK']:
      print "failed to set replica status: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to set replica status: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "updated replica status %s" % lfn
Example #23
0
def _getInfo():
  """gets info about transformation"""
  clip = _Params()
  clip.registerSwitches()
  Script.parseCommandLine()

  if not clip.prodid and not clip.filename:
    Script.showHelp()
    dexit(1)

  from DIRAC import gLogger
  import os

  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()

  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  fc = FileCatalogClient()
  fmeta = {}
  trans = None
  info = []

  if clip.prodid:
    res = tc.getTransformation(clip.prodid)
    if not res['OK']:
      gLogger.error(res['Message'])
      dexit(1)
    trans = res['Value']
    res = tc.getTransformationInputDataQuery( clip.prodid )
    if res['OK']:
      trans['InputDataQuery'] = res['Value']
    res = tc.getAdditionalParameters ( clip.prodid )
    if res['OK']:
      trans['AddParams'] = res['Value']
    #do something with transf
    res1 = fc.findDirectoriesByMetadata({'ProdID':clip.prodid})
    if res1['OK'] and len(res1['Value'].values()):
      gLogger.verbose("Found %i directory matching the metadata" % len(res1['Value'].values()) )
      for dirs in res1['Value'].values():
        res = fc.getDirectoryUserMetadata(dirs)
        if res['OK']:
          fmeta.update(res['Value'])
        else:
          gLogger.error("Failed to get metadata for %s, SKIPPING" % dirs)
          continue
        res = fc.listDirectory(dirs)
        if not res['OK']:
          continue
        content = res['Value']['Successful'][dirs]
        if content["Files"]:
          for f_ex in content["Files"].keys():
            res = fc.getFileUserMetadata(f_ex)
            if res['OK']:
              fmeta.update(res['Value'])
              break

    #here we have trans and fmeta
    info.append("")
    info.append("Production %s has the following parameters:" % trans['TransformationID'])
    info.extend(_createTransfoInfo(trans))

    if fmeta:
      info.append('The files created by this production have the following metadata:')
      info.extend(_createFileInfo(fmeta))
      info.append("It's possible that some meta data was not brought back,")
      info.append("in particular file level metadata, so check some individual files")

  if clip.filename:
    pid = ""
    if clip.filename.count("/"):
      fpath = os.path.dirname(clip.filename)
      res = fc.getDirectoryUserMetadata(fpath)
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(0)
      fmeta.update(res['Value'])
      res = fc.getFileUserMetadata(clip.filename)
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
      fmeta.update(res['Value'])
      if 'ProdID' in fmeta:
        pid = str(fmeta['ProdID'])
      res = fc.getFileAncestors([clip.filename], 1)
      if res["OK"]:
        for dummy_lfn,ancestorsDict in res['Value']['Successful'].items():
          if ancestorsDict.keys():
            fmeta["Ancestors"] = ancestorsDict.keys()
      res = fc.getFileDescendents([clip.filename], 1)
      if res["OK"]:
        for dummy_lfn,descendDict in res['Value']['Successful'].items():
          if descendDict.keys():
            fmeta['Descendants'] = descendDict.keys()
    else:
      ext = clip.filename.split(".")[-1]
      fitems = []
      for i in clip.filename.split('.')[:-1]:
        fitems.extend(i.split('_'))
      pid = ''
      if ext == 'stdhep':
        pid = fitems[fitems.index('gen')+1]
      if ext == 'slcio':
        if 'rec' in fitems:
          pid = fitems[fitems.index('rec')+1]
        elif 'dst' in fitems:
          pid = fitems[fitems.index('dst')+1]
        elif 'sim' in fitems:
          pid = fitems[fitems.index('sim')+1]
        else:
          gLogger.error("This file does not follow the ILCDIRAC production conventions!")
          gLogger.error("Please specify a prod ID directly or check the file.")
          dexit(0)
      if not pid:
        gLogger.error("This file does not follow the ILCDIRAC production conventions!")
        gLogger.error("Please specify a prod ID directly or check the file.")
        dexit(0)
      #as task follows the prod id, to get it we need
      tid = fitems[fitems.index(pid)+1]
      last_folder = str(int(tid)/1000).zfill(3)
      res = fc.findDirectoriesByMetadata({'ProdID':int(pid)})
      if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
      dir_ex = res['Value'].values()[0]
      fpath = ""
      if int(dir_ex.split("/")[-1]) == int(pid):
        fpath = dir_ex+last_folder+"/"
      elif int(dir_ex.split("/")[-2]) == int(pid):
        fpath = "/".join(dir_ex.split('/')[:-2])+"/"+pid.zfill(8)+"/"+last_folder+"/"
      else:
        gLogger.error('Path does not follow conventions, will not get file family')

      if fpath:
        fpath += clip.filename
        res = fc.getFileAncestors([fpath], 1)
        if res["OK"]:
          for dummy_lfn,ancestorsDict in res['Value']['Successful'].items():
            fmeta["Ancestors"] = ancestorsDict.keys()
        res = fc.getFileDescendents([fpath], 1)
        if res["OK"]:
          for dummy_lfn,descendDict in res['Value']['Successful'].items():
            fmeta['Descendants'] = descendDict.keys()

      res = fc.getDirectoryUserMetadata(dir_ex)
      if not res['OK']:
        gLogger.error(res['Message'])
      else:
        fmeta.update(res['Value'])
    res = tc.getTransformation(pid)
    if not res['OK']:
      gLogger.error(res['Message'])
      gLogger.error('Will proceed anyway')
    else:
      trans = res['Value']
      res = tc.getTransformationInputDataQuery( pid )
      if res['OK']:
        trans['InputDataQuery'] = res['Value']
      res = tc.getAdditionalParameters ( pid )
      if res['OK']:
        trans['AddParams'] = res['Value']
    info.append("")
    info.append("Input file has the following properties:")
    info.extend(_createFileInfo(fmeta))
    info.append("")
    info.append('It was created with the production %s:' % pid)
    if trans:
      info.extend(_createTransfoInfo(trans))

  gLogger.notice("\n".join(info))

  dexit(0)
   if res['OK']:
     for transfs in res['Value']:
       prodids.append(transfs['TransformationID'])
  else:
    prodids.extend(prod)

  metadata = []
  
  gLogger.info("Will run on prods %s" % str(prodids))
  
  for prodID in prodids:
    if prodID<clip.minprod:
      continue
    meta = {}
    meta['ProdID']=prodID
    res = trc.getTransformation(str(prodID))
    if not res['OK']:
      gLogger.error("Error getting transformation %s" % prodID )
      continue
    prodtype = res['Value']['Type']
    proddetail = res['Value']['Description']
    if prodtype == 'MCReconstruction' or prodtype == 'MCReconstruction_Overlay' :
      meta['Datatype']='DST'
    elif prodtype == 'MCGeneration':
      meta['Datatype']='gen'
    elif prodtype == 'MCSimulation':
      meta['Datatype']='SIM'
    elif prodtype in ['Split','Merge']:
      gLogger.warn("Invalid query for %s productions" % prodtype)
      continue
    else:
Example #25
0
class TransformationAgent(AgentModule):
    def initialize(self):
        """ standard init
    """
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        self.maxFiles = self.am_getOption('MaxFiles', 5000)

        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        self.unusedFiles = {}
        return S_OK()

    def execute(self):
        """ get and process the transformations to be processed
    """
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("execute: Failed to obtain transformations: %s" %
                         res['Message'])
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("execute: Processing transformation %s." % transID)
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info("execute: Failed to process transformation: %s" %
                             res['Message'])
            else:
                gLogger.info(
                    "execute: Processed transformation in %.1f seconds" %
                    (time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed 
    """
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "getTransformations: Initializing general purpose agent.")
            res = self.transDB.getTransformations(
                {'Status': self.transformationStatus}, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformations: %s" %
                    res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "getTransformations: Obtained %d transformations to process" %
                len(transformations))
        else:
            gLogger.info(
                "getTransformations: Initializing for transformation %s." %
                transName)
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformation: %s." %
                    res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to obtain input data: %s." %
                res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']

        if not lfns:
            gLogger.info(
                "processTransformation: No 'Unused' files found for transformation."
            )
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "processTransformation: Failed to update transformation status to 'Active': %s."
                        % res['Message'])
                else:
                    gLogger.info(
                        "processTransformation: Updated transformation status to 'Active'."
                    )
            return S_OK()
        #Check if something new happened
        if len(lfns) == self.unusedFiles.get(
                transID, 0) and transDict['Status'] != 'Flush':
            gLogger.info(
                "processTransformation: No new 'Unused' files found for transformation."
            )
            return S_OK()

        replicateOrRemove = transDict['Type'].lower() in [
            "replication", "removal"
        ]
        # Limit the number of LFNs to be considered for replication or removal as they are treated individually
        if replicateOrRemove:
            lfns = lfns[0:self.maxFiles - 1]
        unusedFiles = len(lfns)
        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=not replicateOrRemove)
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to get data replicas: %s" %
                res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "processTransformation: Processing transformation with '%s' plug-in."
            % plugin)
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to generate tasks for transformation: %s"
                % res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to add task generated by plug-in: %s."
                    % res['Message'])
                allCreated = False
            else:
                created += 1
                unusedFiles -= len(lfns)
        if created:
            gLogger.info(
                "processTransformation: Successfully created %d tasks for transformation."
                % created)
        self.unusedFiles[transID] = unusedFiles

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to update transformation status to 'Active': %s."
                    % res['Message'])
            else:
                gLogger.info(
                    "processTransformation: Updated transformation status to 'Active'."
                )
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except ImportError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s"
                % (plugin, e))
            return S_ERROR()
        try:
            plugin_o = getattr(plugModule, 'TransformationPlugin')(
                '%s' % plugin,
                transClient=self.transDB,
                replicaManager=self.rm)
            return S_OK(plugin_o)
        except AttributeError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to create %s(): %s." %
                (plugin, e))
            return S_ERROR()
Example #26
0
def _getProductionSummary():
    clip = _Params()
    clip.registerSwitch()
    Script.parseCommandLine()
    from ILCDIRAC.Core.Utilities.HTML import Table
    from ILCDIRAC.Core.Utilities.ProcessList import ProcessList
    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
    from DIRAC import gConfig, gLogger
    prod = clip.prod
    full_detail = clip.full_det
    fc = FileCatalogClient()

    processlist = gConfig.getValue('/LocalSite/ProcessListPath')
    prl = ProcessList(processlist)
    processesdict = prl.getProcessesDict()

    trc = TransformationClient()
    prodids = []
    if not prod:
        conddict = {}
        conddict['Status'] = clip.statuses
        if clip.ptypes:
            conddict['Type'] = clip.ptypes
        res = trc.getTransformations(conddict)
        if res['OK']:
            for transfs in res['Value']:
                prodids.append(transfs['TransformationID'])
    else:
        prodids.extend(prod)

    metadata = []

    gLogger.info("Will run on prods %s" % str(prodids))

    for prodID in prodids:
        if prodID < clip.minprod:
            continue
        meta = {}
        meta['ProdID'] = prodID
        res = trc.getTransformation(str(prodID))
        if not res['OK']:
            gLogger.error("Error getting transformation %s" % prodID)
            continue
        prodtype = res['Value']['Type']
        proddetail = res['Value']['Description']
        if prodtype == 'MCReconstruction' or prodtype == 'MCReconstruction_Overlay':
            meta['Datatype'] = 'DST'
        elif prodtype == 'MCGeneration':
            meta['Datatype'] = 'gen'
        elif prodtype == 'MCSimulation':
            meta['Datatype'] = 'SIM'
        elif prodtype in ['Split', 'Merge']:
            gLogger.warn("Invalid query for %s productions" % prodtype)
            continue
        else:
            gLogger.error("Unknown production type %s" % prodtype)
            continue
        res = fc.findFilesByMetadata(meta)
        if not res['OK']:
            gLogger.error(res['Message'])
            continue
        lfns = res['Value']
        nb_files = len(lfns)
        path = ""
        if not len(lfns):
            gLogger.warn("No files found for prod %s" % prodID)
            continue
        path = os.path.dirname(lfns[0])
        res = fc.getDirectoryUserMetadata(path)
        if not res['OK']:
            gLogger.warn('No meta data found for %s' % path)
            continue
        dirmeta = {}
        dirmeta['proddetail'] = proddetail
        dirmeta['prodtype'] = prodtype
        dirmeta['nb_files'] = nb_files
        dirmeta.update(res['Value'])
        lumi = 0.
        nbevts = 0
        addinfo = None
        files = 0
        xsec = 0.0
        if not full_detail:
            lfn = lfns[0]
            info = _getFileInfo(lfn)
            nbevts = info[1] * len(lfns)
            lumi = info[0] * len(lfns)
            addinfo = info[2]
            if 'xsection' in addinfo:
                if 'sum' in addinfo['xsection']:
                    if 'xsection' in addinfo['xsection']['sum']:
                        xsec += addinfo['xsection']['sum']['xsection']
                        files += 1
        else:
            for lfn in lfns:
                info = _getFileInfo(lfn)
                lumi += info[0]
                nbevts += info[1]
                addinfo = info[2]
                if 'xsection' in addinfo:
                    if 'sum' in addinfo['xsection']:
                        if 'xsection' in addinfo['xsection']['sum']:
                            xsec += addinfo['xsection']['sum']['xsection']
                            files += 1
        if not lumi:
            xsec = 0
            files = 0
            depthDict = {}
            depSet = set()
            res = fc.getFileAncestors(lfns, [1, 2, 3, 4])
            temp_ancestorlist = []
            if res['OK']:
                for lfn, ancestorsDict in res['Value']['Successful'].items():
                    for ancestor, dep in ancestorsDict.items():
                        depthDict.setdefault(dep, [])
                        if ancestor not in temp_ancestorlist:
                            depthDict[dep].append(ancestor)
                            depSet.add(dep)
                            temp_ancestorlist.append(ancestor)
            depList = list(depSet)
            depList.sort()
            for ancestor in depthDict[depList[-1]]:
                info = _getFileInfo(ancestor)
                lumi += info[0]
                addinfo = info[2]
                if 'xsection' in addinfo:
                    if 'sum' in addinfo['xsection']:
                        if 'xsection' in addinfo['xsection']['sum']:
                            xsec += addinfo['xsection']['sum']['xsection']
                            files += 1
        if xsec and files:
            xsec /= files
            dirmeta['CrossSection'] = xsec
        else:
            dirmeta['CrossSection'] = 0.0

        if nbevts:
            dirmeta['NumberOfEvents'] = nbevts
        #if not lumi:
        #  dirmeta['Luminosity']=0
        #  dirmeta['CrossSection']=0
        #else:
        #  if nbevts:
        #    dirmeta['CrossSection']=nbevts/lumi
        #  else:
        #    dirmeta['CrossSection']=0
        #if addinfo:
        #  if 'xsection' in addinfo:
        #    if 'sum' in addinfo['xsection']:
        #      if 'xsection' in addinfo['xsection']['sum']:
        #        dirmeta['CrossSection']=addinfo['xsection']['sum']['xsection']
        if 'NumberOfEvents' not in dirmeta:
            dirmeta['NumberOfEvents'] = 0
        #print processesdict[dirmeta['EvtType']]
        dirmeta['detail'] = ''
        if dirmeta['EvtType'] in processesdict:
            if 'Detail' in processesdict[dirmeta['EvtType']]:
                detail = processesdict[dirmeta['EvtType']]['Detail']

        else:
            detail = dirmeta['EvtType']

        if not prodtype == 'MCGeneration':
            res = trc.getTransformationInputDataQuery(str(prodID))
            if res['OK']:
                if 'ProdID' in res['Value']:
                    dirmeta['MomProdID'] = res['Value']['ProdID']
        if 'MomProdID' not in dirmeta:
            dirmeta['MomProdID'] = 0
        dirmeta['detail'] = _translate(detail)

        metadata.append(dirmeta)

    detectors = {}
    detectors['ILD'] = {}
    corres = {
        "MCGeneration": 'gen',
        "MCSimulation": 'SIM',
        "MCReconstruction": "REC",
        "MCReconstruction_Overlay": "REC"
    }
    detectors['ILD']['SIM'] = []
    detectors['ILD']['REC'] = []
    detectors['SID'] = {}
    detectors['SID']['SIM'] = []
    detectors['SID']['REC'] = []
    detectors['sid'] = {}
    detectors['sid']['SIM'] = []
    detectors['sid']['REC'] = []
    detectors['gen'] = []
    for channel in metadata:
        if 'DetectorType' not in channel:
            detectors['gen'].append(
                (channel['detail'], channel['Energy'], channel['ProdID'],
                 channel['nb_files'],
                 channel['NumberOfEvents'] / channel['nb_files'],
                 channel['NumberOfEvents'], channel['CrossSection'],
                 str(channel['proddetail'])))
        else:
            if not channel['DetectorType'] in detectors:
                gLogger.error("This is unknown detector",
                              channel['DetectorType'])
                continue
            detectors[channel['DetectorType']][corres[
                channel['prodtype']]].append(
                    (channel['detail'], channel['Energy'],
                     channel['DetectorType'], channel['ProdID'],
                     channel['nb_files'],
                     channel['NumberOfEvents'] / channel['nb_files'],
                     channel['NumberOfEvents'], channel['CrossSection'],
                     channel['MomProdID'], str(channel['proddetail'])))

    with open("tables.html", "w") as of:
        of.write("""<!DOCTYPE html>
<html>
 <head>
<title> Production summary </title>
</head>
<body>
""")
        if len(detectors['gen']):
            of.write("<h1>gen prods</h1>\n")
            table = Table(header_row=('Channel', 'Energy', 'ProdID', 'Tasks',
                                      'Average Evts/task', 'Statistics',
                                      'Cross Section (fb)', 'Comment'))
            for item in detectors['gen']:
                table.rows.append(item)
            of.write(str(table))
            gLogger.info("Gen prods")
            gLogger.info(str(table))

        if len(detectors['ILD']):
            of.write("<h1>ILD prods</h1>\n")
            for ptype in detectors['ILD'].keys():
                if len(detectors['ILD'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['ILD'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("ILC CDR prods %s" % ptype)
                    gLogger.info(str(table))

        if len(detectors['SID']):
            of.write("<h1>SID prods</h1>\n")
            for ptype in detectors['SID'].keys():
                if len(detectors['SID'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['SID'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("SID CDR prods %s" % ptype)
                    gLogger.info(str(table))

        if len(detectors['sid']):
            of.write("<h1>sid dbd prods</h1>\n")
            for ptype in detectors['sid'].keys():
                if len(detectors['sid'][ptype]):
                    of.write("<h2>%s</h2>\n" % ptype)
                    table = Table(header_row=('Channel', 'Energy', 'Detector',
                                              'ProdID', 'Number of Files',
                                              'Events/File', 'Statistics',
                                              'Cross Section (fb)',
                                              'Origin ProdID', 'Comment'))
                    for item in detectors['sid'][ptype]:
                        table.rows.append(item)
                    of.write(str(table))
                    gLogger.info("sid DBD prods %s" % ptype)
                    gLogger.info(str(table))

        of.write("""
</body>
</html>
""")
    gLogger.notice("Check ./tables.html in any browser for the results")
    dexit(0)
Example #27
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                          'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                          'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                               'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception, x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)
Example #28
0
class TransformationAgent(AgentModule):
    def initialize(self):
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/ProductionManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        return S_OK()

    def execute(self):
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("%s.execute: Failed to obtain transformations: %s" %
                         (AGENT_NAME, res['Message']))
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("%s.execute: Processing transformation %s." %
                         (AGENT_NAME, transID))
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info(
                    "%s.execute: Failed to process transformation: %s" %
                    (AGENT_NAME, res['Message']))
            else:
                gLogger.info(
                    "%s.execute: Processed transformation in %.1f seconds" %
                    (AGENT_NAME, time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        # Obtain the transformations to be executed
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "%s.getTransformations: Initializing general purpose agent." %
                AGENT_NAME)
            res = self.transDB.getTransformations(
                {'Status': ['Active', 'Completing', 'Flush']},
                extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformations." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "%s.getTransformations: Obtained %d transformations to process"
                % (AGENT_NAME, len(transformations)))
        else:
            gLogger.info(
                "%s.getTransformations: Initializing for transformation %s." %
                (AGENT_NAME, transName))
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformation." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to obtain input data." %
                AGENT_NAME, res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']
        if not lfns:
            gLogger.info(
                "%s.processTransformation: No 'Unused' files found for transformation."
                % AGENT_NAME)
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "%s.execute: Failed to update transformation status to 'Active'."
                        % AGENT_NAME, res['Message'])
                else:
                    gLogger.info(
                        "%s.execute: Updated transformation status to 'Active'."
                        % AGENT_NAME)
            return S_OK()

        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=(transDict['Type'].lower()
                                             not in ["replication",
                                                     "removal"]))
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to get data replicas" %
                AGENT_NAME, res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "%s.processTransformation: Processing transformation with '%s' plug-in."
            % (AGENT_NAME, plugin))
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to generate tasks for transformation."
                % AGENT_NAME, res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "%s.processTransformation: Failed to add task generated by plug-in."
                    % AGENT_NAME, res['Message'])
                allCreated = False
            else:
                created += 1
        if created:
            gLogger.info(
                "%s.processTransformation: Successfully created %d tasks for transformation."
                % (AGENT_NAME, created))

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "%s.execute: Failed to update transformation status to 'Active'."
                    % AGENT_NAME, res['Message'])
            else:
                gLogger.info(
                    "%s.execute: Updated transformation status to 'Active'." %
                    AGENT_NAME)
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to import 'TransformationPlugin'"
                % AGENT_NAME, '', x)
            return S_ERROR()
        try:
            evalString = "plugModule.TransformationPlugin('%s')" % plugin
            return S_OK(eval(evalString))
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to create %s()." %
                (AGENT_NAME, plugin), '', x)
            return S_ERROR()
Example #29
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """c'tor"""
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": six.integer_types,
            "TransformationName": six.string_types,
            "Status": six.string_types,
            "Description": six.string_types,
            "LongDescription": six.string_types,
            "Type": six.string_types,
            "Plugin": six.string_types,
            "AgentType": six.string_types,
            "FileMask": six.string_types,
            "TransformationGroup": six.string_types,
            "GroupSize": six.integer_types + (float, ),
            "InheritedFrom": six.integer_types,
            "Body": six.string_types,
            "MaxNumberOfTasks": six.integer_types,
            "EventsPerTask": six.integer_types,
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }

        # the metaquery parameters are neither part of the transformation parameters nor the additional parameters, so
        # special treatment is necessary
        self.inputMetaQuery = None
        self.outputMetaQuery = None

        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins",
            ["Broadcast", "Standard", "BySize", "ByShare"])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError("TransformationID %d does not exist" %
                                     transID)
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def setBody(self, body):
        """check that the body is a string, or using the proper syntax for multiple operations,
        or is a BodyPlugin object

        :param body: transformation body, for example

          .. code :: python

            body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                     ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
                   ]

        :type body: string or list of tuples (or lists) of string and dictionaries or a Body plugin (:py:class:`DIRAC.TransformationSystem.Client.BodyPlugin.BaseBody.BaseBody`)
        :raises TypeError: If the structure is not as expected
        :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation`
                            is used
        :returns: S_OK, S_ERROR
        """
        self.item_called = "Body"

        # Simple single operation body case
        if isinstance(body, six.string_types):
            return self.__setParam(body)

        # BodyPlugin case
        elif isinstance(body, BaseBody):
            return self.__setParam(encode(body))

        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        # MultiOperation body case
        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], six.string_types):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].items():
                if not isinstance(par, six.string_types):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if par not in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val, six.string_types + six.integer_types +
                    (float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
        return self.__setParam(json.dumps(body))

    def setInputMetaQuery(self, query):
        """Set the input meta query.

        :param dict query: dictionary to use for input meta query
        """
        self.inputMetaQuery = query
        return S_OK()

    def setOutputMetaQuery(self, query):
        """Set the output meta query.

        :param dict query: dictionary to use for output meta query
        """
        self.outputMetaQuery = query
        return S_OK()

    def __setSE(self, seParam, seList):
        if isinstance(seList, six.string_types):
            try:
                seList = eval(seList)
            except Exception:
                seList = seList.split(",")
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(list(self.paramTypes))
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if isinstance(value, self.paramTypes[self.item_called]):
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"],
                "MessageDate", "MessageDate")
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation",
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation",
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation",
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats",
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats",
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks",
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se="Unknown",
                                 printOutput=False):
        return self.__executeOperation("addTaskForTransformation",
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus",
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """gets the AuthorDN and username of the transformation from the uploaded proxy"""
        username = ""
        author = ""
        res = getProxyInfo()
        if res["OK"]:
            author = res["Value"]["identity"]
            username = res["Value"]["username"]
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res["Message"])
            return S_ERROR(res["Message"])

        res = {"username": username, "authorDN": author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(
        self,
        authorDN="",
        userName="",
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate", "AuthorDN"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res["OK"]:
                gLogger.error(res["Message"])
                return S_ERROR(res["Message"])
            else:
                foundUserName = res["Value"]["username"]
                foundAuthor = res["Value"]["authorDN"]
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ", ".join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ", ".join(transStatus)))

        condDict["AuthorDN"] = authorDN
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

        Fields starting with 'F' ('J')  refers to files (jobs).
        Proc. stand for processed.
        """
        condDict = {"TransformationID": transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = [
            "TransformationID",
            "Type",
            "Status",
            "Files_Total",
            "Files_PercentProcessed",
            "Files_Processed",
            "Files_Unused",
            "Jobs_TotalCreated",
            "Jobs_Waiting",
            "Jobs_Running",
            "Jobs_Done",
            "Jobs_Failed",
            "Jobs_Stalled",
        ]
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = [
            "TransID",
            "Type",
            "Status",
            "F_Total",
            "F_Proc.(%)",
            "F_Proc.",
            "F_Unused",
            "J_Created",
            "J_Wait",
            "J_Run",
            "J_Done",
            "J_Fail",
            "J_Stalled",
        ]
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result["OK"]:
            self._prettyPrint(result)
            return result

        if result["Value"]["TotalRecords"] > 0:
            try:
                paramNames = result["Value"]["ParameterNames"]
                for paramValues in result["Value"]["Records"]:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print("Exception %s " % str(x))

        if not len(dictList) > 0:
            gLogger.error(
                "No found transformations satisfying input condition")
            return S_ERROR(
                "No found transformations satisfying input condition")
        else:
            print(
                self._printFormattedDictList(dictList, paramShowNamesShort,
                                             paramShowNamesShort[0],
                                             paramShowNamesShort[0]))

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        """Add transformation to the transformation system.

        Sets all parameters currently assigned to the transformation.

        :param bool addFiles: if True, immediately perform input data query
        :param bool printOutput: if True, print information about transformation
        """
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
            inputMetaQuery=self.inputMetaQuery,
            outputMetaQuery=self.outputMetaQuery,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res["Message"]))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """Few checks"""
        if self.paramValues["TransformationID"]:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            "TransformationName", "Description", "LongDescription", "Type"
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = six.moves.input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if plugin not in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter("Plugin",
                                                choices=self.supportedPlugins,
                                                default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (", ".join(["SourceSE", "TargetSE"])))
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = six.moves.input("Please enter " + requiredParam +
                                             " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res,
                                     "Failed to get possible StorageElements")
        missing = set(seList) - set(res["Value"])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default="",
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
Example #30
0
class TransformationAgent(AgentModule, TransformationAgentsUtilities):
    """ Usually subclass of AgentModule
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)
        TransformationAgentsUtilities.__init__(self)

        # few parameters
        self.pluginLocation = ''
        self.transformationStatus = []
        self.maxFiles = 0
        self.transformationTypes = []

        # clients (out of the threads)
        self.transfClient = None

        # parameters for the threading
        self.transQueue = Queue.Queue()
        self.transInQueue = []

        # parameters for caching
        self.workDirectory = ''
        self.cacheFile = ''
        self.controlDirectory = ''

        self.lastFileOffset = {}
        # Validity of the cache
        self.replicaCache = None
        self.replicaCacheValidity = None
        self.writingCache = False
        self.removedFromCache = 0

        self.noUnusedDelay = 0
        self.unusedFiles = {}
        self.unusedTimeStamp = {}

        self.debug = False
        self.transInThread = {}
        self.pluginTimeout = {}

    def initialize(self):
        """ standard initialize
    """
        # few parameters
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        self.maxFiles = self.am_getOption('MaxFiles', 5000)

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)

        # clients
        self.transfClient = TransformationClient()

        # for caching using a pickle file
        self.workDirectory = self.am_getWorkDirectory()
        self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
        self.controlDirectory = self.am_getControlDirectory()

        # remember the offset if any in TS
        self.lastFileOffset = {}

        # Validity of the cache
        self.replicaCache = {}
        self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity',
                                                      2)

        self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

        # Get it threaded
        maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        self.log.info("Will treat the following transformation types: %s" %
                      str(self.transformationTypes))

        return S_OK()

    def finalize(self):
        """ graceful finalization
    """
        method = 'finalize'
        if self.transInQueue:
            self.transInQueue = []
            self._logInfo(
                "Wait for threads to get empty before terminating the agent (%d tasks)"
                % len(self.transInThread),
                method=method)
            self._logInfo(
                'Remaining transformations: ' +
                ','.join([str(transID) for transID in self.transInThread]),
                method=method)
            while self.transInThread:
                time.sleep(2)
            self._logInfo("Threads are empty, terminating the agent...",
                          method=method)
        self.__writeCache()
        return S_OK()

    def execute(self):
        """ Just puts transformations in the queue
    """
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            self._logError("Failed to obtain transformations:", res['Message'])
            return S_OK()
        # Process the transformations
        count = 0
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            if transDict.get('InheritedFrom'):
                # Try and move datasets from the ancestor production
                res = self.transfClient.moveFilesToDerivedTransformation(
                    transDict)
                if not res['OK']:
                    self._logError(
                        "Error moving files from an inherited transformation",
                        res['Message'],
                        transID=transID)
                else:
                    parentProd, movedFiles = res['Value']
                    if movedFiles:
                        self._logInfo(
                            "Successfully moved files from %d to %d:" %
                            (parentProd, transID),
                            transID=transID)
                        for status, val in movedFiles.items():
                            self._logInfo("\t%d files to status %s" %
                                          (val, status),
                                          transID=transID)
            if transID not in self.transInQueue:
                count += 1
                self.transInQueue.append(transID)
                self.transQueue.put(transDict)
        self._logInfo("Out of %d transformations, %d put in thread queue" %
                      (len(res['Value']), count))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
        transName = self.am_getOption('Transformation', 'All')
        method = 'getTransformations'
        if transName == 'All':
            self._logInfo("Getting all transformations%s, status %s." %
                          (' of type %s' % str(self.transformationTypes)
                           if self.transformationTypes else '',
                           str(self.transformationStatus)),
                          method=method)
            transfDict = {'Status': self.transformationStatus}
            if self.transformationTypes:
                transfDict['Type'] = self.transformationTypes
            res = self.transfClient.getTransformations(transfDict,
                                                       extraParams=True)
            if not res['OK']:
                return res
            transformations = res['Value']
            self._logInfo("Obtained %d transformations to process" %
                          len(transformations),
                          method=method)
        else:
            self._logInfo("Getting transformation %s." % transName,
                          method=method)
            res = self.transfClient.getTransformation(transName,
                                                      extraParams=True)
            if not res['OK']:
                self._logError("Failed to get transformation:",
                               res['Message'],
                               method=method)
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def _getClients(self):
        """ returns the clients used in the threads
    """
        threadTransformationClient = TransformationClient()
        threadDataManager = DataManager()

        return {
            'TransformationClient': threadTransformationClient,
            'DataManager': threadDataManager
        }

    def _execute(self, threadID):
        """ thread - does the real job: processing the transformations to be processed
    """

        # Each thread will have its own clients
        clients = self._getClients()

        while True:
            transDict = self.transQueue.get()
            try:
                transID = long(transDict['TransformationID'])
                if transID not in self.transInQueue:
                    break
                self.transInThread[transID] = ' [Thread%d] [%s] ' % (
                    threadID, str(transID))
                self._logInfo("Processing transformation %s." % transID,
                              transID=transID)
                startTime = time.time()
                res = self.processTransformation(transDict, clients)
                if not res['OK']:
                    self._logInfo("Failed to process transformation:",
                                  res['Message'],
                                  transID=transID)
            except Exception, x:
                self._logException('%s' % x, transID=transID)
            finally:
    Script.showHelp()
    exit(1)


status = ['Problematic']

switches = Script.getUnprocessedSwitches()
for switch in switches:
    if switch[0] == 't' or switch[0] == 'status':
        status = switch[1].split(',')
        status = [s.strip() for s in status]

tc = TransformationClient()

for t in args:
    res = tc.getTransformation(t)
    if not res['OK']:
        gLogger.error('Failed to get transformation information for %s: %s' % (t, res['Message']))
        continue

    selectDict = {'TransformationID': res['Value']['TransformationID']}
    if status:
        selectDict['Status'] = status
    res = tc.getTransformationFiles(condDict=selectDict)
    if not res['OK']:
        gLogger.error('Failed to get transformation files: %s' % res['Message'])
        continue
    if not res['Value']:
        gLogger.debug('No file found for transformation %s' % t)
        continue
Example #32
0
class TransformationAgent( AgentModule, TransformationAgentsUtilities ):
  """ Usually subclass of AgentModule
  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )
    TransformationAgentsUtilities.__init__( self )

    #few parameters
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )

    #clients
    self.transfClient = TransformationClient()

    #for the threading
    self.transQueue = Queue.Queue()
    self.transInQueue = []

    #for caching using a pickle file
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join( self.workDirectory, 'ReplicaCache.pkl' )
    self.dateWriteCache = datetime.datetime.utcnow()

    # Validity of the cache
    self.replicaCache = None
    self.replicaCacheValidity = self.am_getOption( 'ReplicaCacheValidity', 2 )
    self.writingCache = False

    self.noUnusedDelay = self.am_getOption( 'NoUnusedDelay', 6 )
    self.unusedFiles = {}
    self.unusedTimeStamp = {}

    self.debug = False
    self.transInThread = {}

  def initialize( self ):
    """ standard initialize
    """

    self.__readCache()
    self.dateWriteCache = datetime.datetime.utcnow()

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    # Get it threaded
    maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.info( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    self.log.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )

    return S_OK()

  def finalize( self ):
    """ graceful finalization
    """
    if self.transInQueue:
      self._logInfo( "Wait for threads to get empty before terminating the agent (%d tasks)" % len( self.transInThread ) )
      self.transInQueue = []
      while self.transInThread:
        time.sleep( 2 )
      self.log.info( "Threads are empty, terminating the agent..." )
    self.__writeCache( force = True )
    return S_OK()

  def execute( self ):
    """ Just puts transformations in the queue
    """
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      self._logError( "Failed to obtain transformations: %s" % ( res['Message'] ) )
      return S_OK()
    # Process the transformations
    count = 0
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      if transDict.get( 'InheritedFrom' ):
        # Try and move datasets from the ancestor production
        res = self.transfClient.moveFilesToDerivedTransformation( transDict )
        if not res['OK']:
          self._logError( "Error moving files from an inherited transformation", res['Message'], transID = transID )
        else:
          parentProd, movedFiles = res['Value']
          if movedFiles:
            self._logInfo( "Successfully moved files from %d to %d:" % ( parentProd, transID ), transID = transID )
            for status, val in movedFiles.items():
              self._logInfo( "\t%d files to status %s" % ( val, status ), transID = transID )
      if transID not in self.transInQueue:
        count += 1
        self.transInQueue.append( transID )
        self.transQueue.put( transDict )
    self._logInfo( "Out of %d transformations, %d put in thread queue" % ( len( res['Value'] ), count ) )
    return S_OK()

  def getTransformations( self ):
    """ Obtain the transformations to be executed - this is executed at the start of every loop (it's really the
        only real thing in the execute()
    """
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      self._logInfo( "Initializing general purpose agent.", method = 'getTransformations' )
      transfDict = {'Status': self.transformationStatus }
      if self.transformationTypes:
        transfDict['Type'] = self.transformationTypes
      res = self.transfClient.getTransformations( transfDict, extraParams = True )
      if not res['OK']:
        self._logError( "Failed to get transformations: %s" % res['Message'], method = 'getTransformations' )
        return res
      transformations = res['Value']
      self._logInfo( "Obtained %d transformations to process" % len( transformations ), method = 'getTransformations' )
    else:
      self._logInfo( "Initializing for transformation %s." % transName, method = "getTransformations" )
      res = self.transfClient.getTransformation( transName, extraParams = True )
      if not res['OK']:
        self._logError( "Failed to get transformation: %s." % res['Message'], method = 'getTransformations' )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def _getClients( self ):
    """ returns the clients used in the threads
    """
    threadTransformationClient = TransformationClient()
    threadReplicaManager = ReplicaManager()

    return {'TransformationClient': threadTransformationClient,
            'ReplicaManager': threadReplicaManager}

  def _execute( self, threadID ):
    """ thread - does the real job: processing the transformations to be processed
    """

    #Each thread will have its own clients
    clients = self._getClients()

    while True:
      transDict = self.transQueue.get()
      try:
        transID = long( transDict['TransformationID'] )
        if transID not in self.transInQueue:
          break
        self.transInThread[transID] = ' [Thread%d] [%s] ' % ( threadID, str( transID ) )
        self._logInfo( "Processing transformation %s." % transID, transID = transID )
        startTime = time.time()
        res = self.processTransformation( transDict, clients )
        if not res['OK']:
          self._logInfo( "Failed to process transformation: %s" % res['Message'], transID = transID )
      except Exception, x:
        self._logException( '%s' % x, transID = transID )
      finally:
Example #33
0
class TransformationCLI(cmd.Cmd, API):
    def __init__(self):
        self.server = TransformationClient()
        self.indentSpace = 4
        cmd.Cmd.__init__(self)

    def printPair(self, key, value, separator=":"):
        valueList = value.split("\n")
        print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)),
                             separator, valueList[0].strip())
        for valueLine in valueList[1:-1]:
            print "%s  %s" % (" " * self.indentSpace, valueLine.strip())

    def do_exit(self, args):
        """ Exits the shell.
        usage: exit
    """
        sys.exit(0)

    def do_quit(self, *args):
        """ Exits the shell.
        Usage: quit
    """
        sys.exit(0)

    def do_help(self, args):
        """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commans"""
        cmd.Cmd.do_help(self, args)

    # overriting default help command
    def do_helpall(self, args):
        """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
        if len(args) == 0:
            print "\nAvailable commands:\n"
            attrList = dir(self)
            attrList.sort()
            for attribute in attrList:
                if attribute.find("do_") == 0:
                    self.printPair(attribute[3:],
                                   getattr(self, attribute).__doc__[1:])
                    print ""
        else:
            command = args.split()[0].strip()
            try:
                obj = getattr(self, "do_%s" % command)
            except:
                print "There's no such %s command" % command
                return
            self.printPair(command, obj.__doc__[1:])

    def do_shell(self, args):
        """Execute a shell command

       usage !<shell_command>
    """
        comm = args
        res = shellCall(0, comm)
        if res['OK'] and res['Value'][0] == 0:
            returnCode, stdOut, stdErr = res['Value']
            print "%s\n%s" % (stdOut, stdErr)
        else:
            print res['Message']

    def check_params(self, args, num):
        """Checks if the number of parameters correct"""
        argss = string.split(args)
        length = len(argss)
        if length < num:
            print "Error: Number of arguments provided %d less that required %d, please correct." % (
                length, num)
            return (False, length)
        return (argss, length)

    def check_id_or_name(self, id_or_name):
        """resolve name or Id by converting type of argument """
        if id_or_name.isdigit():
            return long(id_or_name)  # its look like id
        return id_or_name

    def do_setServer(self, args):
        """ Set the destination server

        usage: setServer serverURL
    """
        argss = string.split(args)
        if len(argss) == 0:
            print "no server provided"
        self.serverURL = argss[0]
        self.server.setServer(self.serverURL)

    ####################################################################
    #
    # These are the methods for transformation manipulation
    #

    def do_getall(self, args):
        """Get transformation details

       usage: getall [Status] [Status]
    """
        oTrans = Transformation()
        oTrans.setServer(self.serverURL)
        oTrans.getTransformations(transStatus=string.split(args),
                                  printOutput=True)

    def do_getStatus(self, args):
        """Get transformation details

       usage: getStatus <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.getTransformation(transName)
            if not res['OK']:
                print "Getting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s: %s" % (transName, res['Value']['Status'])

    def do_setStatus(self, args):
        """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
        argss = string.split(args)
        if not len(argss) > 1:
            print "transformation and status not supplied"
            return
        status = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, 'Status', status)
            if not res['OK']:
                print "Setting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s set to %s" % (transName, status)

    def do_start(self, args):
        """Start transformation

       usage: start <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Active')
            if not res['OK']:
                print "Setting Status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                res = self.server.setTransformationParameter(
                    transName, 'AgentType', 'Automatic')
                if not res['OK']:
                    print "Setting AgentType of %s failed: %s" % (
                        transName, res['Message'])
                else:
                    print "%s started" % transName

    def do_stop(self, args):
        """Stop transformation

       usage: stop <transID|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'AgentType', 'Manual')
            if not res['OK']:
                print "Stopping of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s stopped" % transName

    def do_flush(self, args):
        """Flush transformation

       usage: flush <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Flush')
            if not res['OK']:
                print "Flushing of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s flushing" % transName

    def do_get(self, args):
        """Get transformation definition

    usage: get <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            res['Value'].pop('Body')
            printDict(res['Value'])

    def do_getBody(self, args):
        """Get transformation body

    usage: getBody <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            print res['Value']['Body']

    def do_getFileStat(self, args):
        """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationStats(transName)
        if not res['OK']:
            print "Failed to get statistics for %s: %s" % (transName,
                                                           res['Message'])
        else:
            res['Value'].pop('Total')
            printDict(res['Value'])

    def do_modMask(self, args):
        """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        mask = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, "FileMask", mask)
            if not res['OK']:
                print "Failed to modify input file mask for %s: %s" % (
                    transName, res['Message'])
            else:
                print "Updated %s filemask" % transName

    def do_getFiles(self, args):
        """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        status = argss[1:]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            if status:
                selectDict['Status'] = status
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                self._printFormattedDictList(
                    res['Value'],
                    ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                    'LFN', 'LFN')
            else:
                print "No files found"

    def do_getFileStatus(self, args):
        """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
        argss = string.split(args)
        if len(argss) < 2:
            print "transformation and file not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]

        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                filesList = []
                for fileDict in res['Value']:
                    if fileDict['LFN'] in lfns:
                        filesList.append(fileDict)
                if filesList:
                    self._printFormattedDictList(filesList, [
                        'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'
                    ], 'LFN', 'LFN')
                else:
                    print "Could not find any LFN in", lfns, "for transformation", transName
            else:
                print "No files found"

    def do_setFileStatus(self, args):
        """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
        argss = string.split(args)
        if not len(argss) == 3:
            print "transformation file and status not supplied"
            return
        transName = argss[0]
        lfn = argss[1]
        status = argss[2]
        res = self.server.setFileStatusForTransformation(
            transName, status, [lfn])
        if not res['OK']:
            print "Failed to update file status: %s" % res['Message']
        else:
            print "Updated file status to %s" % status

    def do_resetFile(self, args):
        """Reset file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn>
    """
        argss = string.split(args)
        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(
            transName, 'Unused', lfns)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            print "Updated file statuses to 'Unused' for %d file(s)" % len(
                lfns)

    ####################################################################
    #
    # These are the methods for file manipulation
    #

    def do_addDirectory(self, args):
        """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no directory supplied"
            return
        for directory in argss:
            res = self.server.addDirectory(directory, force=True)
            if not res['OK']:
                print 'failed to add directory %s: %s' % (directory,
                                                          res['Message'])
            else:
                print 'added %s files for %s' % (res['Value'], directory)

    def do_replicas(self, args):
        """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.getReplicas(argss)
        if not res['OK']:
            print "failed to get any replica information: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to get replica information for %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            ses = sortList(res['Value']['Successful'][lfn].keys())
            outStr = "%s :" % lfn.ljust(100)
            for se in ses:
                outStr = "%s %s" % (outStr, se.ljust(15))
            print outStr

    def do_addFile(self, args):
        """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        lfnDict = {}
        for lfn in argss:
            lfnDict[lfn] = {
                'PFN': 'IGNORED-PFN',
                'SE': 'IGNORED-SE',
                'Size': 0,
                'GUID': 'IGNORED-GUID',
                'Checksum': 'IGNORED-CHECKSUM'
            }
        res = self.server.addFile(lfnDict, force=True)
        if not res['OK']:
            print "failed to add any files: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeFile(self, args):
        """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.removeFile(argss)
        if not res['OK']:
            print "failed to remove any files: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_addReplica(self, args):
        """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
        argss = string.split(args)
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.addReplica(lfnDict, force=True)
        if not res['OK']:
            print "failed to add replica: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add replica: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeReplica(self, args):
        """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
        argss = string.split(args)
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.removeReplica(lfnDict)
        if not res['OK']:
            print "failed to remove replica: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove replica: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_setReplicaStatus(self, args):
        """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
        argss = string.split(args)
        if not len(argss) > 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        status = argss[1]
        se = argss[2]
        lfnDict = {}
        lfnDict[lfn] = {
            'Status': status,
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.setReplicaStatus(lfnDict)
        if not res['OK']:
            print "failed to set replica status: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to set replica status: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "updated replica status %s" % lfn
Example #34
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError, 'TransformationID %d does not exist' % transID
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(',', ' ').split()
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called, value, type(value),
                        self.paramTypes[self.item_called])
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if not plugin in self.supportedPlugins:
            gLogger.info(
                "The selected Plugin (%s) is not known to the transformation agent."
                % plugin)
            res = self.__promptForParameter('Plugin',
                                            choices=self.supportedPlugins,
                                            default='Standard')
            if not res['OK']:
                return res
            self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']
        #checkPlugin = "_check%sPlugin" % plugin
        #fcn = None
        #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
        #  fcn = getattr( self, checkPlugin )
        #if not fcn:
        #  return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
        #res = fcn()
        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if (groupSize <= 0):
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (
                    not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = []
        for se in seList:
            if not se in res['Value']:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Example #35
0
class TarTheProdLogsAgent( AgentModule ):
  '''
  Tar the prod logs, and send them to whatever storage element you want
  '''


  def __init__( self, *args, **kwargs ):
    '''
    Constructor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    self.name = "TarTheProdLogsAgent"
    self.log = gLogger
    self.basepath = ""
    self.baselogpath = ""
    self.transclient = None
    self.ops = None
    self.storageElement = None
    self.baselfn = ""

  def initialize(self):
    """Sets defaults
    """
    self.am_setModuleParam("shifterProxy", "ProductionManager")

    self.basepath = self.am_getOption("BasePath", "")
    if not self.basepath:
      return S_ERROR("Missing mandatory option BasePath")

    self.baselogpath = self.am_getOption("BaseLogPath", "")
    if not self.baselogpath:
      return S_ERROR("Missing mandatory option BaseLogPath")

    self.ops = Operations()

    dest_se = self.ops.getValue("Transformations/ArchiveSE", "")
    if not dest_se:
      return S_ERROR("Missing mandatory option ArchiveSE")
    self.storageElement = StorageElement( dest_se )
    
    baselfn = self.ops.getValue("Transformations/BaseLogLFN", "")
    if not baselfn:
      return S_ERROR("Missing mandatory option Transformations/BaseLogLFN")
    self.baselfn = baselfn
    
    self.transclient = TransformationClient()
    
    self.log.info("Running ")
    return S_OK()
  
  def execute(self):
    """ Run it!
    """
    res = self.cleanupPrevious()
    if not res["OK"]:
      self.log.error("Failed to clean up previous run:", res["Message"])
      return res
    
    res = self.getDirectories()
    if not res["OK"]:
      return res
    
    res = self.getProductionIDs(res["Value"])
    if not res["OK"]:
      return res
    
    prods = res['Value']
    for prod, files in prods.items():
      res = self.transIsStopped(prod)
      if not res['OK']:
        continue
      if not res["Value"]:
        continue
      
      res = self.createTarBallAndCleanTheLogs(prod, files)
      if not res["OK"]:
        self.log.error("Could not get the tar ball:", res["Message"])
        continue
      
      tarBall = res["Value"]
      res = self.uploadToStorage(prod, tarBall)
      if not res["OK"]:
        self.log.error("Failed putting the file to storage:", res["Message"])
      else:
        res = self.cleanTarBall(tarBall)
        if not res["OK"]:
          self.log.error("Failed removing the Tar Ball", res["Message"])
          
      
    return S_OK()
  
  def cleanupPrevious(self):
    """ Look for previously created tar files, and try to upload them again.
    Also, create the work dir
    """
    logs_dir = os.path.join(self.basepath, "LogsTars")
    if not os.path.isdir(logs_dir):
      try:
        os.mkdir(logs_dir)
      except OSError:
        return S_ERROR("Could not produce the directory")
    
    for root, dummy_dirs, files in os.walk(logs_dir):
      if not len(files):
        continue
      prod = root.rstrip("/").split("/")[-1]
      for tfile in files:
        tarballpath = os.path.join(root, tfile)
        res = self.uploadToStorage(prod, tarballpath)
        if not res['OK']:
          self.log.error("Failed to upload again %s to the SE")
          continue
        res = self.cleanTarBall(tarballpath)
        if not res["OK"]:
          self.log.error("Failed to remove the tar ball")
            
    return S_OK()
  
  def getDirectories(self):
    """ List the directories below the base
    """
    final_dirs = {}
    for root, dirs, dummy_files in os.walk(self.baselogpath):
      if root.split("/")[-1] == "LOG" and len(dirs) > 1:
        logDirs = sorted(dirs) ## sort them so we can remove the last one
        del logDirs[-1]
        final_dirs[root]=logDirs

    return S_OK(final_dirs)

  def getProductionIDs(self, directories_and_files):
    """ Given input directories, get the prods_dict {prod:files}, where files is sorted per taskID
    This is used for he tar ball naming.
    """
    prods_dict = {}
    for path, files in directories_and_files.items():
      f_list = []
      for logfile in files:
        f_name = os.path.join(path, logfile)
        prodid = int(logfile.split("_")[2])
        if prodid not in prods_dict:
          prods_dict[prodid] = []
        f_list.append(f_name)
        
      prods_dict[prodid] = sorted(f_list, key = self.__sortbyJob )
    

    return S_OK(prods_dict)
  
  def __sortbyJob(self, f_name):
    """ returns the taskID given a file name. Used for the sorting above
    """
    return int(f_name.split("_")[-2])
  
  def transIsStopped(self, prod):
    """ Check from the TS if the prod is Active or not
    """
    res = self.transclient.getTransformation(prod)
    if not res['OK']:
      return res
    
    trans = res["Value"]
    if trans["Status"] in ACTIVE_STATUS:
      return S_OK(False)
    #meaning the prods are neither Active nor Completing
    return S_OK(True)
  
  def cleanTarBall(self, tarballpath):
    """ Physically remove the tar ball that was created to free disk space
    """
    try:
      os.unlink(tarballpath)
    except OSError, x:
      return S_ERROR("Failed with %s" % str(x))
    if os.path.exists(tarballpath):
      self.log.error("The tar ball still exists while it should have be removed: ", tarballpath)
    return S_OK()
Example #36
0
class ConsistencyInspector(object):
  """ A class for handling some consistency checks
  """

  def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None):
    """ c'tor
        interactive: Data Manager (True) or DIRAC Agente (False)
        transClient: TransformationClient() if None, else transClient params
        dm: DataManager() if None, else dm params
        fc: FileCatalog() if None, else fc params
        One object for every production/directoriesList...
    """
    self.interactive = interactive
    self.transClient = TransformationClient() if transClient is None else transClient
    self.dataManager = dm if dm else DataManager()
    self.fileCatalog = fc if fc else FileCatalog()
    self.dic = dic if dic else DataIntegrityClient()
    self.dirac = Dirac()

    # Base elements from which to start the consistency checks
    self._prod = 0
    self._bkQuery = None
    self._fileType = []
    self._fileTypesExcluded = []
    self._lfns = []
    self.noLFC = False
    self.directories = []

    # Accessory elements
    self.runsList = []
    self.runStatus = None
    self.fromProd = None
    self.transType = ''
    self.cachedReplicas = {}

    self.prcdWithDesc = []
    self.prcdWithoutDesc = []
    self.prcdWithMultDesc = []
    self.nonPrcdWithDesc = []
    self.nonPrcdWithoutDesc = []
    self.nonPrcdWithMultDesc = []
    self.descForPrcdLFNs = []
    self.descForNonPrcdLFNs = []
    self.removedFiles = []

    self.absentLFNsInFC = []
    self.existLFNsNoSE = {}
    self.existLFNsBadReplicas = {}
    self.existLFNsBadFiles = {}
    self.existLFNsNotExisting = {}
    self.commonAncestors = {}
    self.multipleDescendants = {}
    self.ancestors = {}

    self._verbose = False

  def __logVerbose(self, msg, msg1=''):
    """ logger helper for verbose information """
    if self._verbose:
      newMsg = '[ConsistencyChecks] ' + ('[%s] ' % str(self.prod)) if self.prod else ''
      # Add that prefix to all lines of the message
      newMsg1 = msg1.replace('\n', '\n' + newMsg)
      newMsg += msg.replace('\n', '\n' + newMsg)
      gLogger.notice(newMsg, newMsg1)
    else:
      gLogger.verbose(msg, msg1)

  ##########################################################################

  def checkFC2SE(self):
    """ check files vs SE information """
    repDict = self.compareChecksum(self.lfns)
    self.existLFNsNoSE = repDict['MissingReplica']
    self.existLFNsNotExisting = repDict['MissingAllReplicas']
    self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted']
    self.existLFNsBadFiles = repDict['AllReplicasCorrupted']

  def getReplicasPresence(self, lfns):
    """ get the replicas using the standard FileCatalog.getReplicas()
    """
    present = set()
    notPresent = set()

    chunkSize = 100
    printProgress = (len(lfns) > chunkSize)
    startTime = time.time()
    self.__write("Checking replicas for %d files%s" %
                 (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... '))
    for chunk in breakListIntoChunks(lfns, chunkSize):
      if printProgress:
        self.__write('.')
      for _ in xrange(1, 10):
        res = self.fileCatalog.getReplicas(chunk)
        if res['OK']:
          present.update(res['Value']['Successful'])
          self.cachedReplicas.update(res['Value']['Successful'])
          notPresent.update(res['Value']['Failed'])
          break
        else:
          time.sleep(0.1)
    self.__write(' (%.1f seconds)\n' % (time.time() - startTime))

    if notPresent:
      self.__logVerbose("Files without replicas:",
                        '\n'.join([''] + sorted(notPresent)))
    return list(present), list(notPresent)

  ##########################################################################

  def getReplicasPresenceFromDirectoryScan(self, lfns):
    """ Get replicas scanning the directories. Might be faster.
    """

    dirs = {}
    present = []
    notPresent = []
    compare = True

    for lfn in lfns:
      dirN = os.path.dirname(lfn)
      if lfn == dirN + '/':
        compare = False
      dirs.setdefault(dirN, []).append(lfn)

    if compare:
      self.__write("Checking File Catalog for %d files from %d directories " % (
          len(lfns), len(dirs)))
    else:
      self.__write("Getting files from %d directories " % len(dirs))
    startTime = time.time()

    for dirN in sorted(dirs):
      startTime1 = time.time()
      self.__write('.')
      lfnsFound = self._getFilesFromDirectoryScan(dirN)
      gLogger.verbose("Obtained %d files in %.1f seconds" %
                      (len(lfnsFound), time.time() - startTime1))
      if compare:
        pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound)
        notPresent += notPr
        present += pr
      else:
        present += lfnsFound

    self.__write(' (%.1f seconds)\n' % (time.time() - startTime))
    gLogger.info("Found %d files with replicas and %d without" %
                 (len(present), len(notPresent)))
    return present, notPresent

  ##########################################################################

  def __compareLFNLists(self, lfns, lfnsFound):
    """ return files in both lists and files in lfns and not in lfnsFound
    """
    present = []
    notPresent = lfns
    startTime = time.time()
    self.__logVerbose("Comparing list of %d LFNs with second list of %d" % (
        len(lfns), len(lfnsFound)))
    if lfnsFound:
      setLfns = set(lfns)
      setLfnsFound = set(lfnsFound)
      present = list(setLfns & setLfnsFound)
      notPresent = list(setLfns - setLfnsFound)
    self.__logVerbose("End of comparison: %.1f seconds" %
                      (time.time() - startTime))
    return present, notPresent

  def _getFilesFromDirectoryScan(self, dirs):
    """ calls dm.getFilesFromDirectory
    """

    level = gLogger.getLevel()
    gLogger.setLevel('FATAL')
    res = self.dataManager.getFilesFromDirectory(dirs)
    gLogger.setLevel(level)
    if not res['OK']:
      if 'No such file or directory' not in res['Message']:
        gLogger.error("Error getting files from directories %s:" %
                      dirs, res['Message'])
      return []
    if res['Value']:
      lfnsFound = res['Value']
    else:
      lfnsFound = []

    return lfnsFound

  ##########################################################################

  def _getTSFiles(self):
    """ Helper function - get files from the TS
    """

    selectDict = {'TransformationID': self.prod}
    if self._lfns:
      selectDict['LFN'] = self._lfns
    elif self.runStatus and self.fromProd:
      res = self.transClient.getTransformationRuns(
          {'TransformationID': self.fromProd, 'Status': self.runStatus})
      if not res['OK']:
        gLogger.error("Failed to get runs for transformation %d" % self.prod)
      else:
        if res['Value']:
          self.runsList.extend(
              [run['RunNumber'] for run in res['Value'] if run['RunNumber'] not in self.runsList])
          gLogger.notice("%d runs selected" % len(res['Value']))
        elif not self.runsList:
          gLogger.notice("No runs selected, check completed")
          DIRAC.exit(0)
    if not self._lfns and self.runsList:
      selectDict['RunNumber'] = self.runsList

    res = self.transClient.getTransformation(self.prod)
    if not res['OK']:
      gLogger.error("Failed to find transformation %s" % self.prod)
      return [], [], []
    status = res['Value']['Status']
    if status not in ('Active', 'Stopped', 'Completed', 'Idle'):
      gLogger.notice("Transformation %s in status %s, will not check if files are processed" % (
          self.prod, status))
      processedLFNs = []
      nonProcessedLFNs = []
      nonProcessedStatuses = []
      if self._lfns:
        processedLFNs = self._lfns
    else:
      res = self.transClient.getTransformationFiles(selectDict)
      if not res['OK']:
        gLogger.error("Failed to get files for transformation %d" %
                      self.prod, res['Message'])
        return [], [], []
      else:
        processedLFNs = [item['LFN']
                         for item in res['Value'] if item['Status'] == 'Processed']
        nonProcessedLFNs = [item['LFN']
                            for item in res['Value'] if item['Status'] != 'Processed']
        nonProcessedStatuses = list(
            set(item['Status'] for item in res['Value'] if item['Status'] != 'Processed'))

    return processedLFNs, nonProcessedLFNs, nonProcessedStatuses

  def __getDirectories(self):
    """ get the directories where to look into (they are either given, or taken from the transformation ID
    """
    if self.directories:
      directories = []
      printout = False
      for directory in self.directories:
        if not directory.endswith('...'):
          directories.append(directory)
        else:
          printout = True
          topDir = os.path.dirname(directory)
          res = self.fileCatalog.listDirectory(topDir)
          if not res['OK']:
            # DError(errno.ENOENT, res['Message'] )
            return S_ERROR(errno.ENOENT, res['Message'])
          else:
            matchDir = directory.split('...')[0]
            directories += [d for d in res['Value']['Successful'].get(topDir, {}).get('SubDirs', [])
                            if d.startswith(matchDir)]
      if printout:
        gLogger.always('Expanded list of %d directories:\n%s' %
                       (len(directories), '\n'.join(directories)))
      return directories
    else:
      return S_ERROR(errno.ENOENT, 'Need to specify the directories')
  ##########################################################################

  def __write(self, text):
    if self.interactive:
      sys.stdout.write(text)
      sys.stdout.flush()

  ##########################################################################

  def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None):
    """ Select only those files from the values of lfnDict that have a certain type
    """
    if not lfnDict:
      return {}
    if not fileTypes:
      fileTypes = self.fileType
    if not fileTypesExcluded:
      fileTypesExcluded = self.fileTypesExcluded
    else:
      fileTypesExcluded += [
          ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded]
    # lfnDict is a dictionary of dictionaries including the metadata, create a
    # deep copy to get modified
    ancDict = dict(lfnDict)
    if fileTypes == ['']:
      fileTypes = []
    # and loop on the original dictionaries
    for ancestor in lfnDict:
      for desc in lfnDict[ancestor].keys():
        ft = lfnDict[ancestor][desc]['FileType']
        if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes):
          ancDict[ancestor].pop(desc)
      if not len(ancDict[ancestor]):
        ancDict.pop(ancestor)
    return ancDict

  @staticmethod
  def _getFileTypesCount(lfnDict):
    """ return file types count
    """
    ft_dict = {}
    for ancestor in lfnDict:
      t_dict = {}
      for desc in lfnDict[ancestor]:
        ft = lfnDict[ancestor][desc]['FileType']
        t_dict[ft] = t_dict.setdefault(ft, 0) + 1
      ft_dict[ancestor] = t_dict

    return ft_dict

  def __getLFNsFromFC(self):
    """ Check if a list of LFNs is in the FC or not """
    if not self.lfns:
      directories = []
      for dirName in self.__getDirectories():
        if not dirName.endswith('/'):
          dirName += '/'
        directories.append(dirName)
      present, notPresent = self.getReplicasPresenceFromDirectoryScan(
          directories)
    else:
      present, notPresent = self.getReplicasPresence(self.lfns)
    return present, notPresent

  def compareChecksum(self, lfns):
    """compare the checksum of the file in the FC and the checksum of the physical replicas.
       Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with
       files with all replicas corrupted, and one with files with some replicas corrupted and at least
       one good replica
    """
    retDict = {'AllReplicasCorrupted': {},
               'SomeReplicasCorrupted': {},
               'MissingReplica': {},
               'MissingAllReplicas': {},
               'NoReplicas': {}}

    chunkSize = 100
    replicas = {}
    setLfns = set(lfns)
    cachedLfns = setLfns & set(self.cachedReplicas)
    for lfn in cachedLfns:
      replicas[lfn] = self.cachedReplicas[lfn]
    lfnsLeft = list(setLfns - cachedLfns)
    if lfnsLeft:
      self.__write("Get replicas for %d files (chunks of %d): " %
                   (len(lfnsLeft), chunkSize))
      for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize):
        self.__write('.')
        replicasRes = self.fileCatalog.getReplicas(lfnChunk)
        if not replicasRes['OK']:
          gLogger.error("error:  %s" % replicasRes['Message'])
          return S_ERROR(errno.ENOENT, "error:  %s" % replicasRes['Message'])
        replicasRes = replicasRes['Value']
        if replicasRes['Failed']:
          retDict['NoReplicas'].update(replicasRes['Failed'])
        replicas.update(replicasRes['Successful'])

    self.__write("Get FC metadata for %d files to be checked: " % len(lfns))
    metadata = {}
    for lfnChunk in breakListIntoChunks(replicas, chunkSize):
      self.__write('.')
      res = self.fileCatalog.getFileMetadata(lfnChunk)
      if not res['OK']:
        return S_ERROR(errno.ENOENT, "error %s" % res['Message'])
      metadata.update(res['Value']['Successful'])

    gLogger.notice("Check existence and compare checksum file by file...")
    csDict = {}
    seFiles = {}
    # Reverse the LFN->SE dictionary
    nReps = 0
    for lfn in replicas:
      csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get(
          lfn, {}).get('Checksum')
      for se in replicas[lfn]:
        seFiles.setdefault(se, []).append(lfn)
        nReps += 1

    gLogger.notice('Getting checksum of %d replicas in %d SEs' %
                   (nReps, len(seFiles)))
    checkSum = {}
    lfnNotExisting = {}
    lfnNoInfo = {}
    logLevel = gLogger.getLevel()
    gLogger.setLevel('FATAL')
    for num, se in enumerate(sorted(seFiles)):
      self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se])))
      oSe = StorageElement(se)
      notFound = 0
      for surlChunk in breakListIntoChunks(seFiles[se], chunkSize):
        self.__write('.')
        metadata = oSe.getFileMetadata(surlChunk)
        if not metadata['OK']:
          gLogger.error("Error: getFileMetadata returns %s. Ignore those replicas" % (
              metadata['Message']))
          # Remove from list of replicas as we don't know whether it is OK or
          # not
          for lfn in seFiles[se]:
            lfnNoInfo.setdefault(lfn, []).append(se)
        else:
          metadata = metadata['Value']
          notFound += len(metadata['Failed'])
          for lfn in metadata['Failed']:
            lfnNotExisting.setdefault(lfn, []).append(se)
          for lfn in metadata['Successful']:
            checkSum.setdefault(
                lfn, {})[se] = metadata['Successful'][lfn]['Checksum']
      if notFound:
        gLogger.error('%d files not found' % notFound)

    gLogger.setLevel(logLevel)

    gLogger.notice('Verifying checksum of %d files' % len(replicas))
    for lfn in replicas:
      # get the lfn checksum from the FC
      replicaDict = replicas[lfn]
      oneGoodReplica = False
      allGoodReplicas = True
      lfcChecksum = csDict[lfn].pop('LFCChecksum')
      for se in replicaDict:
        # If replica doesn't exist skip check
        if se in lfnNotExisting.get(lfn, []):
          allGoodReplicas = False
          continue
        if se in lfnNoInfo.get(lfn, []):
          # If there is no info, a priori it could be good
          oneGoodReplica = True
          continue
        # get the surls metadata and compare the checksum
        surlChecksum = checkSum.get(lfn, {}).get(se, '')
        if not surlChecksum or not compareAdler(lfcChecksum, surlChecksum):
          # if lfcChecksum does not match surlChecksum
          csDict[lfn][se] = {'PFNChecksum': surlChecksum}
          gLogger.info("ERROR!! checksum mismatch at %s for LFN %s:  LFC checksum: %s , PFN checksum : %s "
                       % (se, lfn, lfcChecksum, surlChecksum))
          allGoodReplicas = False
        else:
          oneGoodReplica = True
      if not oneGoodReplica:
        if lfn in lfnNotExisting:
          gLogger.info("=> All replicas are missing", lfn)
          retDict['MissingAllReplicas'][lfn] = 'All'
        else:
          gLogger.info("=> All replicas have bad checksum", lfn)
          retDict['AllReplicasCorrupted'][lfn] = csDict[lfn]
      elif not allGoodReplicas:
        if lfn in lfnNotExisting:
          gLogger.info("=> At least one replica missing", lfn)
          retDict['MissingReplica'][lfn] = lfnNotExisting[lfn]
        else:
          gLogger.info("=> At least one replica with good Checksum", lfn)
          retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn]

    return S_OK(retDict)

  ##########################################################################
  # properties

  def set_prod(self, value):
    """ Setter """
    if value:
      value = int(value)
      res = self.transClient.getTransformation(value, extraParams=False)
      if not res['OK']:
        S_ERROR(errno.ENOENT, "Couldn't find transformation %d: %s" %
                (value, res['Message']))
      else:
        self.transType = res['Value']['Type']
      if self.interactive:
        gLogger.info("Production %d has type %s" % (value, self.transType))
    else:
      value = 0
    self._prod = value

  def get_prod(self):
    """ Getter """
    return self._prod
  prod = property(get_prod, set_prod)

  def set_fileType(self, value):
    """ Setter """
    self._fileType = [ft.upper() for ft in value]

  def get_fileType(self):
    """ Getter """
    return self._fileType
  fileType = property(get_fileType, set_fileType)

  def set_fileTypesExcluded(self, value):
    """ Setter """
    self._fileTypesExcluded = [ft.upper() for ft in value]

  def get_fileTypesExcluded(self):
    """ Getter """
    return self._fileTypesExcluded
  fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded)

  def set_lfns(self, value):
    """ Setter """
    if isinstance(value, basestring):
      value = [value]
    value = [v.replace(' ', '').replace('//', '/') for v in value]
    self._lfns = value

  def get_lfns(self):
    """ Getter """
    return self._lfns
  lfns = property(get_lfns, set_lfns)

  ##########################################################################
  #
  #  This part was backported from DataIntegrityClient
  #
  #
  #  This section contains the specific methods for File Catalog->SE checks
  #

  def catalogDirectoryToSE(self, lfnDir):
    """ This obtains the replica and metadata information from the catalog
        for the supplied directory and checks against the storage elements.
    """
    gLogger.info("-" * 40)
    gLogger.info("Performing the FC->SE check")
    gLogger.info("-" * 40)
    if isinstance(lfnDir, basestring):
      lfnDir = [lfnDir]
    res = self._getCatalogDirectoryContents(lfnDir)
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.checkPhysicalFiles(replicas, catalogMetadata)
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas}
    return S_OK(resDict)

  def catalogFileToSE(self, lfns):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info("-" * 40)
    gLogger.info("Performing the FC->SE check")
    gLogger.info("-" * 40)
    if isinstance(lfns, basestring):
      lfns = [lfns]
    res = self._getCatalogMetadata(lfns)
    if not res['OK']:
      return res
    catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value']
    res = self._getCatalogReplicas(catalogMetadata.keys())
    if not res['OK']:
      return res
    replicas, _zeroReplicaFiles = res['Value']
    res = self.checkPhysicalFiles(replicas, catalogMetadata)
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas}
    return S_OK(resDict)

  def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None):
    """ This method takes the supplied replica and metadata information obtained
        from the catalog and checks against the storage elements.
    """

    # FIXME: we better use the compareChecksum function instead of this one!
    # or maybe directly checkFC2SE

    gLogger.info("-" * 40)
    gLogger.info("Performing the LFC->SE check")
    gLogger.info("-" * 40)
    seLfns = {}
    for lfn, replicaDict in replicas.iteritems():
      for se, _url in replicaDict.iteritems():
        if (ses) and (se not in ses):
          continue
        seLfns.setdefault(se, []).append(lfn)
    gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

    for se in sorted(seLfns):
      files = len(seLfns[se])
      gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

      lfns = seLfns[se]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata(lfns, se)
      if not res['OK']:
        gLogger.error('Failed to get physical file metadata.', res['Message'])
        return res
      for lfn, metadata in res['Value'].iteritems():
        if lfn in catalogMetadata:
          # and ( metadata['Size'] != 0 ):
          if metadata['Size'] != catalogMetadata[lfn]['Size']:
            sizeMismatch.append(
                (lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch'))
      if sizeMismatch:
        self.dic.reportProblematicReplicas(
            sizeMismatch, se, 'CatalogPFNSizeMismatch')
    return S_OK()

  def __checkPhysicalFileMetadata(self, lfns, se):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info('Checking the integrity of %s physical files at %s' %
                 (len(lfns), se))

    res = StorageElement(se).getFileMetadata(lfns)

    if not res['OK']:
      gLogger.error('Failed to get metadata for lfns.', res['Message'])
      return res
    pfnMetadata = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for lfn, reason in res['Value']['Failed'].iteritems():
      if re.search('File does not exist', reason):
        missingReplicas.append((lfn, 'deprecatedUrl', se, 'PFNMissing'))
    if missingReplicas:
      self.dic.reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for lfn, metadata in pfnMetadata.iteritems():
      if metadata.get('Lost'):
        lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
      if metadata.get('Unavailable') or not metadata['Accessible']:
        unavailableReplicas.append(
            (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
      if not metadata['Size']:
        zeroSizeReplicas.append((lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
    if lostReplicas:
      self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost')
    if unavailableReplicas:
      self.dic.reportProblematicReplicas(
          unavailableReplicas, se, 'PFNUnavailable')
    if zeroSizeReplicas:
      self.dic.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize')
    gLogger.info(
        'Checking the integrity of physical files at %s complete' % se)
    return S_OK(pfnMetadata)

  ##########################################################################
  #
  # This section contains the specific methods for SE->File Catalog checks
  #

  def storageDirectoryToCatalog(self, lfnDir, storageElement):
    """ This obtains the file found on the storage element in the supplied directories
        and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info("-" * 40)
    gLogger.info("Performing the SE->FC check at %s" % storageElement)
    gLogger.info("-" * 40)
    if isinstance(lfnDir, basestring):
      lfnDir = [lfnDir]
    res = self.getStorageDirectoryContents(lfnDir, storageElement)
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement)
    return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

  def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
    gLogger.info('Checking %s storage files exist in the catalog' %
                 len(storageMetadata))

    res = self.fileCatalog.getReplicas(storageMetadata)
    if not res['OK']:
      gLogger.error("Failed to get replicas for LFN", res['Message'])
      return res
    failedLfns = res['Value']['Failed']
    successfulLfns = res['Value']['Successful']
    notRegisteredLfns = []

    for lfn in storageMetadata:
      if lfn in failedLfns:
        if 'No such file or directory' in failedLfns[lfn]:
          notRegisteredLfns.append(
              (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))
          failedLfns.pop(lfn)
      elif storageElement not in successfulLfns[lfn]:
        notRegisteredLfns.append(
            (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

    if notRegisteredLfns:
      self.dic.reportProblematicReplicas(
          notRegisteredLfns, storageElement, 'LFNNotRegistered')
    if failedLfns:
      return S_ERROR(errno.ENOENT, 'Failed to obtain replicas')

    # For the LFNs found to be registered obtain the file metadata from the
    # catalog and verify against the storage metadata
    res = self._getCatalogMetadata(storageMetadata)
    if not res['OK']:
      return res
    catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.iteritems():
      lfnStorageMetadata = storageMetadata[lfn]
      if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (lfnStorageMetadata['Size'] != 0):
        sizeMismatch.append(
            (lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch'))
    if sizeMismatch:
      self.dic.reportProblematicReplicas(
          sizeMismatch, storageElement, 'CatalogPFNSizeMismatch')
    gLogger.info('Checking storage files exist in the catalog complete')
    resDict = {'CatalogMetadata': catalogMetadata,
               'StorageMetadata': storageMetadata}
    return S_OK(resDict)

  def getStorageDirectoryContents(self, lfnDir, storageElement):
    """ This takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    gLogger.info('Obtaining the contents for %s directories at %s' %
                 (len(lfnDir), storageElement))

    se = StorageElement(storageElement)

    res = se.exists(lfnDir)
    if not res['OK']:
      gLogger.error(
          "Failed to obtain existance of directories", res['Message'])
      return res
    for directory, error in res['Value']['Failed'].iteritems():
      gLogger.error('Failed to determine existance of directory',
                    '%s %s' % (directory, error))
    if res['Value']['Failed']:
      return S_ERROR(errno.ENOENT, 'Failed to determine existance of directory')
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sorted(directoryExists):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append(directory)
    allFiles = {}
    while len(activeDirs) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory(currentDir)
      activeDirs.remove(currentDir)
      if not res['OK']:
        gLogger.error('Failed to get directory contents', res['Message'])
        return res
      elif currentDir in res['Value']['Failed']:
        gLogger.error('Failed to get directory contents', '%s %s' %
                      (currentDir, res['Value']['Failed'][currentDir]))
        return S_ERROR(errno.ENOENT, res['Value']['Failed'][currentDir])
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend(se.getLFNFromURL(dirContents['SubDirs']).get(
            'Value', {}).get('Successful', []))
        fileURLMetadata = dirContents['Files']
        fileMetadata = {}
        res = se.getLFNFromURL(fileURLMetadata)
        if not res['OK']:
          gLogger.error('Failed to get directory content LFNs', res['Message'])
          return res

        for url, error in res['Value']['Failed'].iteritems():
          gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error))
        if res['Value']['Failed']:
          return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs")
        urlLfns = res['Value']['Successful']
        for urlLfn, lfn in urlLfns.iteritems():
          fileMetadata[lfn] = fileURLMetadata[urlLfn]
        allFiles.update(fileMetadata)

    zeroSizeFiles = []

    for lfn in sorted(allFiles):
      if os.path.basename(lfn) == 'dirac_directory':
        allFiles.pop(lfn)
      else:
        metadata = allFiles[lfn]
        if not metadata['Size']:
          zeroSizeFiles.append(
              (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
    if zeroSizeFiles:
      self.dic.reportProblematicReplicas(
          zeroSizeFiles, storageElement, 'PFNZeroSize')

    gLogger.info('Obtained at total of %s files for directories at %s' %
                 (len(allFiles), storageElement))
    return S_OK(allFiles)

  def _getCatalogDirectoryContents(self, lfnDirs):
    """ Obtain the contents of the supplied directory, recursively
    """

    def _getDirectoryContent(directory):
      """ Inner function: recursively scan a directory, returns list of LFNs
      """
      filesInDirectory = {}

      gLogger.debug("Examining %s" % directory)

      res = self.fileCatalog.listDirectory(directory)
      if not res['OK']:
        gLogger.error('Failed to get directory contents', res['Message'])
        return res
      if directory in res['Value']['Failed']:
        gLogger.error('Failed to get directory content', '%s %s' %
                      (directory, res['Value']['Failed'][directory]))
        return S_ERROR('Failed to get directory content')
      if directory not in res['Value']['Successful']:
        return S_ERROR('Directory not existing?')

      # first, adding the files found in the current directory
      gLogger.debug("Files in %s: %d" % (directory, len(
          res['Value']['Successful'][directory]['Files'])))
      filesInDirectory.update(res['Value']['Successful'][directory]['Files'])

      # then, looking for subDirectories content
      if res['Value']['Successful'][directory]['SubDirs']:
        for l_dir in res['Value']['Successful'][directory]['SubDirs']:
          # recursion here
          subDirContent = _getDirectoryContent(l_dir)
          if not subDirContent['OK']:
            return subDirContent
          else:
            filesInDirectory.update(subDirContent['Value'])

      return S_OK(filesInDirectory)

    gLogger.info(
        'Obtaining the catalog contents for %d directories' % len(lfnDirs))

    allFiles = {}
    for lfnDir in lfnDirs:
      dirContent = _getDirectoryContent(lfnDir)
      if not dirContent['OK']:
        return dirContent
      else:
        gLogger.debug("Content of directory %s: %d files" %
                      (lfnDir, len(dirContent['Value'])))
        allFiles.update(dirContent['Value'])

    gLogger.debug("Content of directories examined: %d files" % len(allFiles))

    replicas = self.fileCatalog.getReplicas(list(allFiles))
    if not replicas['OK']:
      return replicas
    if replicas['Value']['Failed']:
      return S_ERROR("Failures in replicas discovery")

    return S_OK({'Metadata': allFiles, 'Replicas': replicas['Value']['Successful']})

  def _getCatalogReplicas(self, lfns):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info('Obtaining the replicas for %s files' % len(lfns))

    zeroReplicaFiles = []
    res = self.fileCatalog.getReplicas(lfns, allStatus=True)
    if not res['OK']:
      gLogger.error('Failed to get catalog replicas', res['Message'])
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].iteritems():
      if re.search('File has zero replicas', error):
        zeroReplicaFiles.append(lfn)
    gLogger.info('Obtaining the replicas for files complete')
    return S_OK((allReplicas, zeroReplicaFiles))

  def _getCatalogMetadata(self, lfns):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK({})
    gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fileCatalog.getFileMetadata(lfns)
    if not res['OK']:
      gLogger.error('Failed to get catalog metadata', res['Message'])
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].iteritems():
      if re.search('No such file or directory', error):
        missingCatalogFiles.append(lfn)
    gLogger.info('Obtaining the catalog metadata complete')
    return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
Example #37
0
    dexit(1)
  
  from DIRAC import gLogger
  import os
  
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()  

  from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
  fc = FileCatalogClient()
  fmeta = {}
  trans = None
  info = []
  
  if clip.prodid:
    res = tc.getTransformation(clip.prodid)
    if not res['OK']:
      gLogger.error(res['Message'])
      dexit(1)
    trans = res['Value']
    res = tc.getTransformationInputDataQuery( clip.prodid )
    if res['OK']:
      trans['InputDataQuery'] = res['Value']
    res = tc.getAdditionalParameters ( clip.prodid )
    if res['OK']:
      trans['AddParams'] = res['Value']
    #do something with transf
    res = fc.findDirectoriesByMetadata({'ProdID':clip.prodid})
    if res['OK']:
      if len(res['Value'].values()):
        gLogger.verbose("Found some directory matching the metadata")
Example #38
0
class TransformationAgent( AgentModule ):

  def initialize( self ):
    """ standard init
    """
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    self.unusedFiles = {}
    return S_OK()

  def execute( self ):
    """ get and process the transformations to be processed
    """
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "execute: Failed to obtain transformations: %s" % res['Message'] )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "execute: Processing transformation %s." % transID )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "execute: Failed to process transformation: %s" % res['Message'] )
      else:
        gLogger.info( "execute: Processed transformation in %.1f seconds" % ( time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    """ Obtain the transformations to be executed 
    """
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "getTransformations: Initializing general purpose agent." )
      res = self.transDB.getTransformations( {'Status':self.transformationStatus}, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "getTransformations: Obtained %d transformations to process" % len( transformations ) )
    else:
      gLogger.info( "getTransformations: Initializing for transformation %s." % transName )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']

    if not lfns:
      gLogger.info( "processTransformation: No 'Unused' files found for transformation." )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
        else:
          gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
      return S_OK()
    #Check if something new happened
    if len( lfns ) == self.unusedFiles.get( transID, 0 ) and transDict['Status'] != 'Flush':
      gLogger.info( "processTransformation: No new 'Unused' files found for transformation." )
      return S_OK()

    replicateOrRemove = transDict['Type'].lower() in ["replication", "removal"]
    # Limit the number of LFNs to be considered for replication or removal as they are treated individually
    if replicateOrRemove:
      lfns = lfns[0:self.maxFiles - 1]
    unusedFiles = len( lfns )
    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = not replicateOrRemove )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message'] )
        allCreated = False
      else:
        created += 1
        unusedFiles -= len( lfns )
    if created:
      gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created )
    self.unusedFiles[transID] = unusedFiles

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
      else:
        gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except ImportError, e:
      gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % ( plugin, e ) )
      return S_ERROR()
    try:
      plugin_o = getattr( plugModule, 'TransformationPlugin' )( '%s' % plugin,
                                                                transClient = self.transDB,
                                                                replicaManager = self.rm )
      return S_OK( plugin_o )
    except AttributeError, e:
      gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % ( plugin, e ) )
      return S_ERROR()
Example #39
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": [types.IntType, types.LongType],
            "TransformationName": types.StringTypes,
            "Status": types.StringTypes,
            "Description": types.StringTypes,
            "LongDescription": types.StringTypes,
            "Type": types.StringTypes,
            "Plugin": types.StringTypes,
            "AgentType": types.StringTypes,
            "FileMask": types.StringTypes,
            "TransformationGroup": types.StringTypes,
            "GroupSize": [types.IntType, types.LongType, types.FloatType],
            "InheritedFrom": [types.IntType, types.LongType],
            "Body": types.StringTypes,
            "MaxNumberOfTasks": [types.IntType, types.LongType],
            "EventsPerTask": [types.IntType, types.LongType],
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"]
        )
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError, "TransformationID %d does not exist" % transID
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)
                )

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(",", " ").split()
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(self.paramTypes.keys())
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called,
                        value,
                        type(value),
                        self.paramTypes[self.item_called],
                    )
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" % self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error("Unable to invoke setter %s, it isn't a member function" % setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate"
            )
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation", printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation", printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats", printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput)

    def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False):
        return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR("Unable to invoke %s, it isn't a member funtion of TransformationClient")
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=["TransformationID", "Status", "AgentType", "TransformationName", "CreationDate"],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info("Will attempt to create transformation with the following parameters")
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"]))
                    gLogger.notice("To add this parameter later please execute the following.")
                    gLogger.notice("oTransformation = Transformation(%d)" % transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues["TransformationID"]:
            gLogger.info("You are currently working with an active transformation definition.")
            gLogger.info("If you wish to create a new transformation reset the TransformationID.")
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = ["TransformationName", "Description", "LongDescription", "Type"]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info("%s is not defined for this transformation. This is required..." % parameter)
                self.paramValues[parameter] = raw_input("Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info("The selected Plugin (%s) is not known to the transformation agent." % plugin)
                res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info("The GroupSize was found to be less than zero. It has been set to 1.")
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"]))
        )
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR("Unable to invoke %s, this function hasn't been implemented." % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res, "Failed to get possible StorageElements")
        missing = []
        for se in seList:
            if not se in res["Value"]:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self, parameter, choices=[], default="", insert=True):
        res = promptUser("Please enter %s" % parameter, choices=choices, default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR("Unable to invoke %s, it isn't a member function of Transformation!")
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
Example #40
0
class TransformationCLI( cmd.Cmd, API ):

  def __init__( self ):
    self.server = TransformationClient()
    self.indentSpace = 4
    cmd.Cmd.__init__( self )
    API.__init__( self )

  def printPair( self, key, value, separator = ":" ):
    valueList = value.split( "\n" )
    print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() )
    for valueLine in valueList[ 1:-1 ]:
      print "%s  %s" % ( " " * self.indentSpace, valueLine.strip() )

  def do_exit( self, args ):
    """ Exits the shell.
        usage: exit
    """
    sys.exit( 0 )

  def do_quit( self, *args ):
    """ Exits the shell.
        Usage: quit
    """
    sys.exit( 0 )

  def do_help( self, args ):
    """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commans"""
    cmd.Cmd.do_help( self, args )

  # overriting default help command
  def do_helpall( self, args ):
    """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
    if len( args ) == 0:
      print "\nAvailable commands:\n"
      attrList = dir( self )
      attrList.sort()
      for attribute in attrList:
        if attribute.find( "do_" ) == 0:
          self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] )
          print ""
    else:
      command = args.split()[0].strip()
      try:
        obj = getattr( self, "do_%s" % command )
      except:
        print "There's no such %s command" % command
        return
      self.printPair( command, obj.__doc__[1:] )

  def do_shell( self, args ):
    """Execute a shell command

       usage !<shell_command>
    """
    comm = args
    res = shellCall( 0, comm )
    if res['OK'] and res['Value'][0] == 0:
      returnCode, stdOut, stdErr = res['Value']
      print "%s\n%s" % ( stdOut, stdErr )
    else:
      print res['Message']

  def check_params( self, args, num ):
    """Checks if the number of parameters correct"""
    argss = string.split( args )
    length = len( argss )
    if length < num:
      print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num )
      return ( False, length )
    return ( argss, length )

  def check_id_or_name( self, id_or_name ):
    """resolve name or Id by converting type of argument """
    if id_or_name.isdigit():
      return long( id_or_name ) # its look like id
    return id_or_name

  def do_setServer( self, args ):
    """ Set the destination server

        usage: setServer serverURL
    """
    argss = string.split( args )
    if len( argss ) == 0:
      print "no server provided"
    self.serverURL = argss[0]
    self.server.setServer( self.serverURL )

  ####################################################################
  #
  # These are the methods for transformation manipulation
  #

  def do_getall( self, args ):
    """Get transformation details

       usage: getall [Status] [Status]
    """
    oTrans = Transformation()
    oTrans.setServer( self.serverURL )
    oTrans.getTransformations( transStatus = string.split( args ), printOutput = True )

  def do_getStatus( self, args ):
    """Get transformation details

       usage: getStatus <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.getTransformation( transName )
      if not res['OK']:
        print "Getting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s: %s" % ( transName, res['Value']['Status'] )

  def do_setStatus( self, args ):
    """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and status not supplied"
      return
    status = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, 'Status', status )
      if not res['OK']:
        print "Setting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s set to %s" % ( transName, status )

  def do_start( self, args ):
    """Start transformation

       usage: start <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Active' )
      if not res['OK']:
        print "Setting Status of %s failed: %s" % ( transName, res['Message'] )
      else:
        res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' )
        if not res['OK']:
          print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] )
        else:
          print "%s started" % transName

  def do_stop( self, args ):
    """Stop transformation

       usage: stop <transID|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' )
      if not res['OK']:
        print "Stopping of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s stopped" % transName

  def do_flush( self, args ):
    """Flush transformation

       usage: flush <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Flush' )
      if not res['OK']:
        print "Flushing of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s flushing" % transName

  def do_get( self, args ):
    """Get transformation definition

    usage: get <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Body' )
      printDict( res['Value'] )

  def do_getBody( self, args ):
    """Get transformation body

    usage: getBody <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      print res['Value']['Body']

  def do_getFileStat( self, args ):
    """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationStats( transName )
    if not res['OK']:
      print "Failed to get statistics for %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Total' )
      printDict( res['Value'] )

  def do_modMask( self, args ):
    """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    mask = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, "FileMask", mask )
      if not res['OK']:
        print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] )
      else:
        print "Updated %s filemask" % transName

  def do_getFiles( self, args ):
    """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    status = argss[1:]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      if status:
        selectDict['Status'] = status
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' )
      else:
        print "No files found"

  def do_getFileStatus( self, args ):
    """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
    argss = string.split( args )
    if len( argss ) < 2:
      print "transformation and file not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]

    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        filesList = []
        for fileDict in res['Value']:
          if fileDict['LFN'] in lfns:
            filesList.append( fileDict )
        if  filesList:
          self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' )
        else:
          print "Could not find any LFN in", lfns, "for transformation", transName
      else:
        print "No files found"

  def do_setFileStatus( self, args ):
    """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
    argss = string.split( args )
    if not len( argss ) == 3:
      print "transformation file and status not supplied"
      return
    transName = argss[0]
    lfn = argss[1]
    status = argss[2]
    res = self.server.setFileStatusForTransformation( transName, status, [lfn] )
    if not res['OK']:
      print "Failed to update file status: %s" % res['Message']
    else:
      print "Updated file status to %s" % status

  def do_resetFile( self, args ):
    """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfn>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  def do_resetProcessedFile( self, args ):
    """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  ####################################################################
  #
  # These are the methods for file manipulation
  #

  def do_addDirectory( self, args ):
    """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no directory supplied"
      return
    for directory in argss:
      res = self.server.addDirectory( directory, force = True )
      if not res['OK']:
        print 'failed to add directory %s: %s' % ( directory, res['Message'] )
      else:
        print 'added %s files for %s' % ( res['Value'], directory )

  def do_replicas( self, args ):
    """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.getReplicas( argss )
    if not res['OK']:
      print "failed to get any replica information: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to get replica information for %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      ses = sortList( res['Value']['Successful'][lfn].keys() )
      outStr = "%s :" % lfn.ljust( 100 )
      for se in ses:
        outStr = "%s %s" % ( outStr, se.ljust( 15 ) )
      print outStr

  def do_addFile( self, args ):
    """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    lfnDict = {}
    for lfn in argss:
      lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addFile( lfnDict, force = True )
    if not res['OK']:
      print "failed to add any files: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeFile( self, args ):
    """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.removeFile( argss )
    if not res['OK']:
      print "failed to remove any files: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_addReplica( self, args ):
    """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
    argss = string.split( args )
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addReplica( lfnDict, force = True )
    if not res['OK']:
      print "failed to add replica: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add replica: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeReplica( self, args ):
    """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
    argss = string.split( args )
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.removeReplica( lfnDict )
    if not res['OK']:
      print "failed to remove replica: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove replica: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_setReplicaStatus( self, args ):
    """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
    argss = string.split( args )
    if not len( argss ) > 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    status = argss[1]
    se = argss[2]
    lfnDict = {}
    lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.setReplicaStatus( lfnDict )
    if not res['OK']:
      print "failed to set replica status: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to set replica status: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "updated replica status %s" % lfn
class TransformationAgent( AgentModule ):

  def initialize( self ):
    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/ProductionManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    return S_OK()

  def execute( self ):
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "%s.execute: Failed to obtain transformations: %s" % ( AGENT_NAME, res['Message'] ) )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "%s.execute: Processing transformation %s." % ( AGENT_NAME, transID ) )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "%s.execute: Failed to process transformation: %s" % ( AGENT_NAME, res['Message'] ) )
      else:
        gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % ( AGENT_NAME, time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    # Obtain the transformations to be executed
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME )
      res = self.transDB.getTransformations( {'Status':['Active', 'Completing', 'Flush']}, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % ( AGENT_NAME, len( transformations ) ) )
    else:
      gLogger.info( "%s.getTransformations: Initializing for transformation %s." % ( AGENT_NAME, transName ) )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']
    if not lfns:
      gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
        else:
          gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
      return S_OK()

    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = ( transDict['Type'].lower() not in ["replication", "removal"] ) )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % ( AGENT_NAME, plugin ) )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message'] )
        allCreated = False
      else:
        created += 1
    if created:
      gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % ( AGENT_NAME, created ) )

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
      else:
        gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x )
      return S_ERROR()
    try:
      evalString = "plugModule.TransformationPlugin('%s')" % plugin
      return S_OK( eval( evalString ) )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % ( AGENT_NAME, plugin ), '', x )
      return S_ERROR()