Example #1
0
class ProdValidator(object):
    def __init__(self):
        self.transClient = TransformationClient()

    def checkTransStatus(self, transID):
        """ Check if the status of the transformation is valid for the transformation to be added to a production.
        New is the only valid status

    :param int transID: the TransformationID
    """
        res = self.transClient.getTransformationParameters(transID, 'Status')
        if not res['OK']:
            return res
        status = res['Value']
        if status != 'New':
            return S_ERROR(
                "checkTransStatus failed : Invalid transformation status: %s" %
                status)

        return S_OK()

    def checkTransDependency(self, transID, parentTransID):
        """ Check if the transformation and the parent transformation are linked

    :param int transID: the TransformationID
    :param int parentTransID: the parent TransformationID
    """
        res = self.transClient.getTransformationMetaQuery(transID, 'Input')
        if not res['OK']:
            return res
        inputquery = res['Value']
        if not inputquery:
            return S_ERROR("No InputMetaQuery defined for transformation %s" %
                           transID)

        res = self.transClient.getTransformationMetaQuery(
            parentTransID, 'Output')
        if not res['OK']:
            return res
        parentoutputquery = res['Value']
        if not parentoutputquery:
            return S_ERROR(
                "No OutputMetaQuery defined for parent transformation %s" %
                parentTransID)

        # Check the matching between inputquery and parent outputmeta query
        # Currently very simplistic: just support expression with "=" and "in" operators
        gLogger.notice("Applying checkMatchQuery")
        res = self.checkMatchQuery(inputquery, parentoutputquery)

        if not res['OK']:
            gLogger.error("checkMatchQuery failed")
            return res
        if not res['Value']:
            return S_ERROR("checkMatchQuery result is False")

        return S_OK()

    def checkMatchQuery(self, mq, mqParent):
        """ Check the logical intersection between the two metaqueries

    :param dict mq: a dictionary of the MetaQuery to be checked against the mqParent
    :param dict mqParent: a dictionary of the parent MetaQuery to be checked against the mq
    """
        # Get the metadata types defined in the catalog
        catalog = FileCatalog()
        res = catalog.getMetadataFields()
        if not res['OK']:
            gLogger.error("Error in getMetadataFields: %s" % res['Message'])
            return res
        if not res['Value']:
            gLogger.error("Error: no metadata fields defined")
            return res

        MetaTypeDict = res['Value']['FileMetaFields']
        MetaTypeDict.update(res['Value']['DirectoryMetaFields'])

        res = self.checkformatQuery(mq)
        if not res['OK']:
            return res
        MetaQueryDict = res['Value']

        res = self.checkformatQuery(mqParent)
        if not res['OK']:
            return res
        ParentMetaQueryDict = res['Value']

        for meta, value in MetaQueryDict.items():
            if meta not in MetaTypeDict:
                msg = 'Metadata %s is not defined in the Catalog' % meta
                return S_ERROR(msg)
            mtype = MetaTypeDict[meta]
            if mtype.lower() not in ['varchar(128)', 'int', 'float']:
                msg = 'Metatype %s is not supported' % mtype.lower()
                return S_ERROR(msg)
            if meta not in ParentMetaQueryDict:
                msg = 'Metadata %s is not in parent transformation query' % meta
                return S_ERROR(msg)
            if self.compareValues(value, ParentMetaQueryDict[meta]):
                continue
            else:
                msg = "Metadata values %s do not match with %s" % (
                    value, ParentMetaQueryDict[meta])
                gLogger.error(msg)
                return S_OK(False)

        return S_OK(True)

    def checkformatQuery(self, MetaQueryDict):
        """ Check the format query and transform all dict values in dict for uniform treatment

    :param dict MetaQueryDict: a dictionary of the MetaQuery
    """
        for meta, value in MetaQueryDict.items():
            values = []
            if isinstance(value, dict):
                operation = value.keys()[0]
                if operation not in ['=', 'in']:
                    msg = 'Operation %s is not supported' % operation
                    return S_ERROR(msg)
                else:
                    if not isinstance(value.values()[0], list):
                        MetaQueryDict[meta] = {"in": value.values()}
            else:
                values.append(value)
                MetaQueryDict[meta] = {"in": values}

        return S_OK(MetaQueryDict)

    def compareValues(self, value, parentValue):
        """ Very simple comparison. To be improved

    :param dict value: a dictionary with meta data values to be compared with the parentValues
    :param dict parentValue: a dictionary with meta data parentValues be compared with values
    """
        return set(value.values()[0]).issubset(set(
            parentValue.values()[0])) or set(parentValue.values()[0]).issubset(
                set(value.values()[0]))
Example #2
0
class TransformationCLI(CLI, API):

  def __init__(self):
    self.transClient = TransformationClient()
    self.indentSpace = 4
    CLI.__init__(self)
    API.__init__(self)

  def printPair(self, key, value, separator=":"):
    valueList = value.split("\n")
    print("%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip()))
    for valueLine in valueList[1:-1]:
      print("%s  %s" % (" " * self.indentSpace, valueLine.strip()))

  def do_help(self, args):
    """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commands"""
    CLI.do_help(self, args)

  # overriting default help command
  def do_helpall(self, args):
    """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
    if len(args) == 0:
      print("\nAvailable commands:\n")
      attrList = sorted(dir(self))
      for attribute in attrList:
        if attribute.find("do_") == 0:
          self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:])
          print("")
    else:
      command = args.split()[0].strip()
      try:
        obj = getattr(self, "do_%s" % command)
      except BaseException:
        print("There's no such %s command" % command)
        return
      self.printPair(command, obj.__doc__[1:])

  def do_shell(self, args):
    """Execute a shell command

       usage !<shell_command>
    """
    comm = args
    res = shellCall(0, comm)
    if res['OK'] and res['Value'][0] == 0:
      _returnCode, stdOut, stdErr = res['Value']
      print("%s\n%s" % (stdOut, stdErr))
    else:
      print(res['Message'])

  def check_params(self, args, num):
    """Checks if the number of parameters correct"""
    argss = args.split()
    length = len(argss)
    if length < num:
      print("Error: Number of arguments provided %d less that required %d, please correct." % (length, num))
      return (False, length)
    return (argss, length)

  def check_id_or_name(self, id_or_name):
    """resolve name or Id by converting type of argument """
    if id_or_name.isdigit():
      return int(id_or_name)  # its look like id
    return id_or_name

  ####################################################################
  #
  # These are the methods for transformation manipulation
  #

  def do_getall(self, args):
    """Get transformation details

       usage: getall [Status] [Status]
    """
    oTrans = Transformation()
    oTrans.getTransformations(transStatus=args.split(), printOutput=True)

  def do_getAllByUser(self, args):
    """Get all transformations created by a given user

The first argument is the authorDN or username. The authorDN
is preferred: it need to be inside quotes because contains
white spaces. Only authorDN should be quoted.

When the username is provided instead,
the authorDN is retrieved from the uploaded proxy,
so that the retrieved transformations are those created by
the user who uploaded that proxy: that user could be different
that the username provided to the function.

       usage: getAllByUser authorDN or username [Status] [Status]
    """
    oTrans = Transformation()
    argss = args.split()
    username = ""
    author = ""
    status = []
    if not len(argss) > 0:
      print(self.do_getAllByUser.__doc__)
      return

    # if the user didnt quoted the authorDN ends
    if '=' in argss[0] and argss[0][0] not in ["'", '"']:
      print("AuthorDN need to be quoted (just quote that argument)")
      return

    if argss[0][0] in ["'", '"']:  # authorDN given
      author = argss[0]
      status_idx = 1
      for arg in argss[1:]:
        author += ' ' + arg
        status_idx += 1
        if arg[-1] in ["'", '"']:
          break
      # At this point we should have something like 'author'
      if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']:
        print("AuthorDN need to be quoted (just quote that argument)")
        return
      else:
        author = author[1:-1]  # throw away the quotes
      # the rest are the requested status
      status = argss[status_idx:]
    else:  # username given
      username = argss[0]
      status = argss[1:]

    oTrans.getTransformationsByUser(authorDN=author, userName=username, transStatus=status, printOutput=True)

  def do_summaryTransformations(self, args):
    """Show the summary for a list of Transformations

    Fields starting with 'F' ('J')  refers to files (jobs).
    Proc. stand for processed.

        Usage: summaryTransformations <ProdID> [<ProdID> ...]
    """
    argss = args.split()
    if not len(argss) > 0:
      print(self.do_summaryTransformations.__doc__)
      return

    transid = argss
    oTrans = Transformation()
    oTrans.getSummaryTransformations(transID=transid)

  def do_getStatus(self, args):
    """Get transformation details

       usage: getStatus <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    for transName in argss:
      res = self.transClient.getTransformation(transName)
      if not res['OK']:
        print("Getting status of %s failed: %s" % (transName, res['Message']))
      else:
        print("%s: %s" % (transName, res['Value']['Status']))

  def do_setStatus(self, args):
    """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
    argss = args.split()
    if not len(argss) > 1:
      print("transformation and status not supplied")
      return
    status = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.transClient.setTransformationParameter(transName, 'Status', status)
      if not res['OK']:
        print("Setting status of %s failed: %s" % (transName, res['Message']))
      else:
        print("%s set to %s" % (transName, status))

  def do_start(self, args):
    """Start transformation

       usage: start <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    for transName in argss:
      res = self.transClient.setTransformationParameter(transName, 'Status', 'Active')
      if not res['OK']:
        print("Setting Status of %s failed: %s" % (transName, res['Message']))
      else:
        res = self.transClient.setTransformationParameter(transName, 'AgentType', 'Automatic')
        if not res['OK']:
          print("Setting AgentType of %s failed: %s" % (transName, res['Message']))
        else:
          print("%s started" % transName)

  def do_stop(self, args):
    """Stop transformation

       usage: stop <transID|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    for transName in argss:
      res = self.transClient.setTransformationParameter(transName, 'AgentType', 'Manual')
      if not res['OK']:
        print("Stopping of %s failed: %s" % (transName, res['Message']))
      else:
        print("%s stopped" % transName)

  def do_flush(self, args):
    """Flush transformation

       usage: flush <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    for transName in argss:
      res = self.transClient.setTransformationParameter(transName, 'Status', 'Flush')
      if not res['OK']:
        print("Flushing of %s failed: %s" % (transName, res['Message']))
      else:
        print("%s flushing" % transName)

  def do_get(self, args):
    """Get transformation definition

    usage: get <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    res = self.transClient.getTransformation(transName)
    if not res['OK']:
      print("Failed to get %s: %s" % (transName, res['Message']))
    else:
      res['Value'].pop('Body')
      printDict(res['Value'])

  def do_getBody(self, args):
    """Get transformation body

    usage: getBody <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    res = self.transClient.getTransformation(transName)
    if not res['OK']:
      print("Failed to get %s: %s" % (transName, res['Message']))
    else:
      print(res['Value']['Body'])

  def do_getFileStat(self, args):
    """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    res = self.transClient.getTransformationStats(transName)
    if not res['OK']:
      print("Failed to get statistics for %s: %s" % (transName, res['Message']))
    else:
      res['Value'].pop('Total')
      printDict(res['Value'])

  def do_modMask(self, args):
    """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    mask = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.transClient.setTransformationParameter(transName, "FileMask", mask)
      if not res['OK']:
        print("Failed to modify input file mask for %s: %s" % (transName, res['Message']))
      else:
        print("Updated %s filemask" % transName)

  def do_getFiles(self, args):
    """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    status = argss[1:]
    res = self.transClient.getTransformation(transName)
    if not res['OK']:
      print("Failed to get transformation information: %s" % res['Message'])
    else:
      selectDict = {'TransformationID': res['Value']['TransformationID']}
      if status:
        selectDict['Status'] = status
      res = self.transClient.getTransformationFiles(condDict=selectDict)
      if not res['OK']:
        print("Failed to get transformation files: %s" % res['Message'])
      elif res['Value']:
        self._printFormattedDictList(res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                     'LFN', 'LFN')
      else:
        print("No files found")

  def do_getFileStatus(self, args):
    """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
    argss = args.split()
    if len(argss) < 2:
      print("transformation and file not supplied")
      return
    transName = argss[0]
    lfns = argss[1:]

    res = self.transClient.getTransformation(transName)
    if not res['OK']:
      print("Failed to get transformation information: %s" % res['Message'])
    else:
      selectDict = {'TransformationID': res['Value']['TransformationID']}
      res = self.transClient.getTransformationFiles(condDict=selectDict)
      if not res['OK']:
        print("Failed to get transformation files: %s" % res['Message'])
      elif res['Value']:
        filesList = []
        for fileDict in res['Value']:
          if fileDict['LFN'] in lfns:
            filesList.append(fileDict)
        if filesList:
          self._printFormattedDictList(filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                       'LFN', 'LFN')
        else:
          print("Could not find any LFN in", lfns, "for transformation", transName)
      else:
        print("No files found")

  def do_getOutputFiles(self, args):
    """Get output files for the transformation

    usage: getOutputFiles <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    res = self.transClient.getTransformation(transName)
    if not res['OK']:
      print("Failed to get transformation information: %s" % res['Message'])
    else:
      fc = FileCatalog()
      meta = {}
      meta['ProdID'] = transName
      res = fc.findFilesByMetadata(meta)
      if not res['OK']:
        print(res['Message'])
        return
      if not len(res['Value']) > 0:
        print('No output files yet for transformation %d' % int(transName))
        return
      else:
        for lfn in res['Value']:
          print(lfn)

  def do_getInputDataQuery(self, args):
    """Get input data query for the transformation

    usage: getInputDataQuery <transName|ID>
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no transformation supplied")
      return
    transName = argss[0]
    # res = self.transClient.getTransformationInputDataQuery( transName )
    res = self.transClient.getTransformationMetaQuery(transName, 'Input')
    if not res['OK']:
      print("Failed to get transformation input data query: %s" % res['Message'])
    else:
      print(res['Value'])

  def do_setFileStatus(self, args):
    """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
    argss = args.split()
    if not len(argss) == 3:
      print("transformation file and status not supplied")
      return
    transName = argss[0]
    lfn = argss[1]
    status = argss[2]
    res = self.transClient.setFileStatusForTransformation(transName, status, [lfn])
    if not res['OK']:
      print("Failed to update file status: %s" % res['Message'])
    else:
      print("Updated file status to %s" % status)

  def do_resetFile(self, args):
    """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfns>
    """
    argss = args.split()
    if not len(argss) > 1:
      print("transformation and file(s) not supplied")
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.transClient.setFileStatusForTransformation(transName, 'Unused', lfns)
    if not res['OK']:
      print("Failed to reset file status: %s" % res['Message'])
    else:
      if 'Failed' in res['Value']:
        print("Could not reset some files: ")
        for lfn, reason in res['Value']['Failed'].items():
          print(lfn, reason)
      else:
        print("Updated file statuses to 'Unused' for %d file(s)" % len(lfns))

  def do_resetProcessedFile(self, args):
    """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
    argss = args.split()

    if not len(argss) > 1:
      print("transformation and file(s) not supplied")
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.transClient.setFileStatusForTransformation(transName, 'Unused', lfns, force=True)
    if not res['OK']:
      print("Failed to reset file status: %s" % res['Message'])
    else:
      if 'Failed' in res['Value'] and res['Value']['Failed']:
        print("Could not reset some files: ")
        for lfn, reason in res['Value']['Failed'].items():
          print(lfn, reason)
      else:
        print("Updated file statuses to 'Unused' for %d file(s)" % len(lfns))

  ####################################################################
  #
  # These are the methods for file manipulation
  #

  def do_addDirectory(self, args):
    """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no directory supplied")
      return
    for directory in argss:
      res = self.transClient.addDirectory(directory, force=True)
      if not res['OK']:
        print('failed to add directory %s: %s' % (directory, res['Message']))
      else:
        print('added %s files for %s' % (res['Value'], directory))

  def do_replicas(self, args):
    """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no files supplied")
      return
    res = self.transClient.getReplicas(argss)
    if not res['OK']:
      print("failed to get any replica information: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to get replica information for %s: %s" % (lfn, error))
    for lfn in sorted(res['Value']['Successful']):
      ses = sorted(res['Value']['Successful'][lfn])
      outStr = "%s :" % lfn.ljust(100)
      for se in ses:
        outStr = "%s %s" % (outStr, se.ljust(15))
      print(outStr)

  def do_addFile(self, args):
    """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no files supplied")
      return
    lfnDict = {}
    for lfn in argss:
      lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID',
                      'Checksum': 'IGNORED-CHECKSUM'}
    res = self.transClient.addFile(lfnDict, force=True)
    if not res['OK']:
      print("failed to add any files: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to add %s: %s" % (lfn, error))
    for lfn in sorted(res['Value']['Successful']):
      print("added %s" % lfn)

  def do_removeFile(self, args):
    """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
    argss = args.split()
    if not len(argss) > 0:
      print("no files supplied")
      return
    res = self.transClient.removeFile(argss)
    if not res['OK']:
      print("failed to remove any files: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to remove %s: %s" % (lfn, error))
    for lfn in sorted(res['Value']['Successful']):
      print("removed %s" % lfn)

  def do_addReplica(self, args):
    """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
    argss = args.split()
    if not len(argss) == 2:
      print("no file info supplied")
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'}
    res = self.transClient.addReplica(lfnDict, force=True)
    if not res['OK']:
      print("failed to add replica: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to add replica: %s" % (error))
    for lfn in sorted(res['Value']['Successful']):
      print("added %s" % lfn)

  def do_removeReplica(self, args):
    """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
    argss = args.split()
    if not len(argss) == 2:
      print("no file info supplied")
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM'}
    res = self.transClient.removeReplica(lfnDict)
    if not res['OK']:
      print("failed to remove replica: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to remove replica: %s" % (error))
    for lfn in sorted(res['Value']['Successful']):
      print("removed %s" % lfn)

  def do_setReplicaStatus(self, args):
    """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
    argss = args.split()
    if not len(argss) > 2:
      print("no file info supplied")
      return
    lfn = argss[0]
    status = argss[1]
    se = argss[2]
    lfnDict = {}
    lfnDict[lfn] = {
        'Status': status,
        'PFN': 'IGNORED-PFN',
        'SE': se,
        'Size': 0,
        'GUID': 'IGNORED-GUID',
        'Checksum': 'IGNORED-CHECKSUM'}
    res = self.transClient.setReplicaStatus(lfnDict)
    if not res['OK']:
      print("failed to set replica status: %s" % res['Message'])
      return
    for lfn in sorted(res['Value']['Failed']):
      error = res['Value']['Failed'][lfn]
      print("failed to set replica status: %s" % (error))
    for lfn in sorted(res['Value']['Successful']):
      print("updated replica status %s" % lfn)
Example #3
0
class InputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''
        AgentModule.__init__(self, *args, **kwargs)

        self.fileLog = {}
        self.timeLog = {}
        self.fullTimeLog = {}

        self.pollingTime = self.am_getOption('PollingTime', 120)
        self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', 86400)
        self.refreshonly = self.am_getOption('RefreshOnly', False)
        self.dateKey = self.am_getOption('DateKey', None)

        self.transClient = TransformationClient()
        self.metadataClient = FileCatalogClient()
        self.transformationTypes = None

    #############################################################################
    def initialize(self):
        ''' Make the necessary initializations
    '''
        gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME,
                                  "Loops/min", gMonitor.OP_SUM)
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)
        extendables = Operations().getValue(
            'Transformations/ExtendableTransfTypes', [])
        if extendables:
            for extendable in extendables:
                if extendable in self.transformationTypes:
                    self.transformationTypes.remove(extendable)
                    # This is because the Extendables do not use this Agent (have no Input data query)

        return S_OK()

    ##############################################################################
    def execute(self):
        ''' Main execution method
    '''

        gMonitor.addMark('Iteration', 1)
        # Get all the transformations
        result = self.transClient.getTransformations({
            'Status':
            'Active',
            'Type':
            self.transformationTypes
        })
        if not result['OK']:
            self.log.error(
                "InputDataAgent.execute: Failed to get transformations.",
                result['Message'])
            return S_OK()

        # Process each transformation
        for transDict in result['Value']:
            transID = long(transDict['TransformationID'])
            # res = self.transClient.getTransformationInputDataQuery( transID )
            res = self.transClient.getTransformationMetaQuery(transID, 'Input')
            if not res['OK']:
                if cmpError(res, ENOENT):
                    self.log.info(
                        "InputDataAgent.execute: No input data query found for transformation",
                        transID)
                else:
                    self.log.error(
                        "InputDataAgent.execute: Failed to get input data query",
                        "for %d: %s" % (transID, res['Message']))
                continue
            inputDataQuery = res['Value']

            if self.refreshonly:
                # Determine the correct time stamp to use for this transformation
                if transID in self.timeLog:
                    if transID in self.fullTimeLog:
                        # If it is more than a day since the last reduced query, make a full query just in case
                        if (datetime.datetime.utcnow() -
                                self.fullTimeLog[transID]
                            ) < datetime.timedelta(
                                seconds=self.fullUpdatePeriod):
                            timeStamp = self.timeLog[transID]
                            if self.dateKey:
                                inputDataQuery[self.dateKey] = (
                                    timeStamp - datetime.timedelta(seconds=10)
                                ).strftime('%Y-%m-%d %H:%M:%S')
                            else:
                                self.log.error(
                                    "DateKey was not set in the CS, cannot use the RefreshOnly"
                                )
                        else:
                            self.fullTimeLog[
                                transID] = datetime.datetime.utcnow()
                self.timeLog[transID] = datetime.datetime.utcnow()
                if transID not in self.fullTimeLog:
                    self.fullTimeLog[transID] = datetime.datetime.utcnow()

            # Perform the query to the metadata catalog
            self.log.verbose("Using input data query for transformation",
                             "%d: %s" % (transID, str(inputDataQuery)))
            start = time.time()
            result = self.metadataClient.findFilesByMetadata(inputDataQuery)
            rtime = time.time() - start
            self.log.verbose("Metadata catalog query time",
                             ": %.2f seconds." % (rtime))
            if not result['OK']:
                self.log.error(
                    "InputDataAgent.execute: Failed to get response from the metadata catalog",
                    result['Message'])
                continue
            lfnList = result['Value']

            # Check if the number of files has changed since the last cycle
            nlfns = len(lfnList)
            self.log.info(
                "files returned for transformation from the metadata catalog: ",
                "%d -> %d" % (int(transID), nlfns))
            if nlfns == self.fileLog.get(transID):
                self.log.verbose(
                    'No new files in metadata catalog since last check')
            self.fileLog[transID] = nlfns

            # Add any new files to the transformation
            addedLfns = []
            if lfnList:
                self.log.verbose('Processing lfns for transformation:',
                                 "%d -> %d" % (transID, len(lfnList)))
                # Add the files to the transformation
                self.log.verbose('Adding lfns for transformation:',
                                 "%d -> %d" % (transID, len(lfnList)))
                result = self.transClient.addFilesToTransformation(
                    transID, sorted(lfnList))
                if not result['OK']:
                    self.log.warn(
                        "InputDataAgent.execute: failed to add lfns to transformation",
                        result['Message'])
                    self.fileLog[transID] = 0
                else:
                    if result['Value']['Failed']:
                        for lfn, error in res['Value']['Failed'].items():
                            self.log.warn(
                                "InputDataAgent.execute: Failed to add to transformation:",
                                "%s: %s" % (lfn, error))
                    if result['Value']['Successful']:
                        for lfn, status in result['Value']['Successful'].items(
                        ):
                            if status == 'Added':
                                addedLfns.append(lfn)
                        self.log.info(
                            "InputDataAgent.execute: Added files to transformation",
                            "(%d)" % len(addedLfns))

        return S_OK()
Example #4
0
class InputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """c'tor"""
        AgentModule.__init__(self, *args, **kwargs)

        self.fileLog = {}
        self.timeLog = {}
        self.fullTimeLog = {}

        self.pollingTime = self.am_getOption("PollingTime", 120)
        self.fullUpdatePeriod = self.am_getOption("FullUpdatePeriod", 86400)
        self.refreshonly = self.am_getOption("RefreshOnly", False)
        self.dateKey = self.am_getOption("DateKey", None)

        self.transClient = TransformationClient()
        self.metadataClient = FileCatalogClient()
        self.transformationTypes = None

    #############################################################################
    def initialize(self):
        """Make the necessary initializations"""
        agentTSTypes = self.am_getOption("TransformationTypes", [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue("Transformations/DataProcessing",
                                             ["MCSimulation", "Merge"])
            dataManip = Operations().getValue(
                "Transformations/DataManipulation", ["Replication", "Removal"])
            self.transformationTypes = sorted(dataProc + dataManip)
        extendables = Operations().getValue(
            "Transformations/ExtendableTransfTypes", [])
        if extendables:
            for extendable in extendables:
                if extendable in self.transformationTypes:
                    self.transformationTypes.remove(extendable)
                    # This is because the Extendables do not use this Agent (have no Input data query)

        return S_OK()

    ##############################################################################
    def execute(self):
        """Main execution method"""

        # Get all the transformations
        result = self.transClient.getTransformations({
            "Status":
            "Active",
            "Type":
            self.transformationTypes
        })
        if not result["OK"]:
            self.log.error(
                "InputDataAgent.execute: Failed to get transformations.",
                result["Message"])
            return S_OK()

        # Process each transformation
        for transDict in result["Value"]:
            transID = int(transDict["TransformationID"])
            # res = self.transClient.getTransformationInputDataQuery( transID )
            res = self.transClient.getTransformationMetaQuery(transID, "Input")
            if not res["OK"]:
                if cmpError(res, ENOENT):
                    self.log.info(
                        "InputDataAgent.execute: No input data query found for transformation",
                        transID)
                else:
                    self.log.error(
                        "InputDataAgent.execute: Failed to get input data query",
                        "for %d: %s" % (transID, res["Message"]),
                    )
                continue
            inputDataQuery = res["Value"]

            if self.refreshonly:
                # Determine the correct time stamp to use for this transformation
                if transID in self.timeLog:
                    if transID in self.fullTimeLog:
                        # If it is more than a day since the last reduced query, make a full query just in case
                        if (datetime.datetime.utcnow() -
                                self.fullTimeLog[transID]
                            ) < datetime.timedelta(
                                seconds=self.fullUpdatePeriod):
                            timeStamp = self.timeLog[transID]
                            if self.dateKey:
                                inputDataQuery[self.dateKey] = (
                                    timeStamp - datetime.timedelta(seconds=10)
                                ).strftime("%Y-%m-%d %H:%M:%S")
                            else:
                                self.log.error(
                                    "DateKey was not set in the CS, cannot use the RefreshOnly"
                                )
                        else:
                            self.fullTimeLog[
                                transID] = datetime.datetime.utcnow()
                self.timeLog[transID] = datetime.datetime.utcnow()
                if transID not in self.fullTimeLog:
                    self.fullTimeLog[transID] = datetime.datetime.utcnow()

            # Perform the query to the metadata catalog
            self.log.verbose("Using input data query for transformation",
                             "%d: %s" % (transID, str(inputDataQuery)))
            start = time.time()
            result = self.metadataClient.findFilesByMetadata(inputDataQuery)
            rtime = time.time() - start
            self.log.verbose("Metadata catalog query time",
                             ": %.2f seconds." % (rtime))
            if not result["OK"]:
                self.log.error(
                    "InputDataAgent.execute: Failed to get response from the metadata catalog",
                    result["Message"])
                continue
            lfnList = result["Value"]

            # Check if the number of files has changed since the last cycle
            nlfns = len(lfnList)
            self.log.info(
                "files returned for transformation from the metadata catalog: ",
                "%d -> %d" % (int(transID), nlfns))
            if nlfns == self.fileLog.get(transID):
                self.log.verbose(
                    "No new files in metadata catalog since last check")
            self.fileLog[transID] = nlfns

            # Add any new files to the transformation
            addedLfns = []
            if lfnList:
                self.log.verbose("Processing lfns for transformation:",
                                 "%d -> %d" % (transID, len(lfnList)))
                # Add the files to the transformation
                self.log.verbose("Adding lfns for transformation:",
                                 "%d -> %d" % (transID, len(lfnList)))
                result = self.transClient.addFilesToTransformation(
                    transID, sorted(lfnList))
                if not result["OK"]:
                    self.log.warn(
                        "InputDataAgent.execute: failed to add lfns to transformation",
                        result["Message"])
                    self.fileLog[transID] = 0
                else:
                    if result["Value"]["Failed"]:
                        for lfn, error in res["Value"]["Failed"].items():
                            self.log.warn(
                                "InputDataAgent.execute: Failed to add to transformation:",
                                "%s: %s" % (lfn, error))
                    if result["Value"]["Successful"]:
                        for lfn, status in result["Value"]["Successful"].items(
                        ):
                            if status == "Added":
                                addedLfns.append(lfn)
                        self.log.info(
                            "InputDataAgent.execute: Added files to transformation",
                            "(%d)" % len(addedLfns))

        return S_OK()