Beispiel #1
0
  def putRequest( self, userName, userDN, userGroup, sourceSE, targetSE1, targetSE2 ):
    """ test case for user """

    req = self.buildRequest( userName, userGroup, sourceSE, targetSE1, targetSE2 )

    req.RequestName = "test%s-%s" % ( userName, userGroup )
    req.OwnerDN = userDN
    req.OwnerGroup = userGroup

    gLogger.always( "putRequest: request '%s'" % req.RequestName )
    for op in req:
      gLogger.always( "putRequest: => %s %s %s" % ( op.Order, op.Type, op.TargetSE ) )
      for f in op:
        gLogger.always( "putRequest: ===> file %s" % f.LFN )

    reqClient = ReqClient()

    delete = reqClient.deleteRequest( req.RequestName )
    if not delete["OK"]:
      gLogger.error( "putRequest: %s" % delete["Message"] )
      return delete
    put = reqClient.putRequest( req )
    if not put["OK"]:
      gLogger.error( "putRequest: %s" % put["Message"] )
    return put
Beispiel #2
0
def myRequest():
  """Create a request and put it to the db"""

  request = Request()
  request.RequestName = 'myAwesomeRemovalRequest.xml'
  request.JobID = 0
  request.SourceComponent = "myScript"

  remove = Operation()
  remove.Type = "RemoveFile"

  lfn = "/ilc/user/s/sailer/test.txt"
  rmFile = File()
  rmFile.LFN = lfn
  remove.addFile( rmFile )

  request.addOperation( remove )
  isValid = RequestValidator().validate( request )
  if not isValid['OK']:
    raise RuntimeError( "Failover request is not valid: %s" % isValid['Message'] )
  else:
    print("It is a GOGOGO")
    requestClient = ReqClient()
    result = requestClient.putRequest( request )
    print(result)
  def commitRequest( self ):
    """ Send request to the Request Management Service
    """
    if self.request.isEmpty():
      return S_OK()

    isValid = RequestValidator().validate( self.request )
    if not isValid["OK"]:
      return S_ERROR( "Failover request is not valid: %s" % isValid["Message"] )
    else:
      requestClient = ReqClient()
      result = requestClient.putRequest( self.request )
      return result
Beispiel #4
0
  def __init__(self, transClient=None, logger=None, requestClient=None,
               requestClass=None, requestValidator=None,
               ownerDN=None, ownerGroup=None):
    """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

    if not logger:
      logger = gLogger.getSubLogger('RequestTasks')

    super(RequestTasks, self).__init__(transClient, logger)
    useCertificates = True if (bool(ownerDN) and bool(ownerGroup)) else False

    if not requestClient:
      self.requestClient = ReqClient(useCertificates=useCertificates,
                                     delegatedDN=ownerDN,
                                     delegatedGroup=ownerGroup)
    else:
      self.requestClient = requestClient

    if not requestClass:
      self.requestClass = Request
    else:
      self.requestClass = requestClass

    if not requestValidator:
      self.requestValidator = RequestValidator()
    else:
      self.requestValidator = requestValidator
  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    # FIXME: double client: only ReqClient will survive in the end
    self.requestClient = RequestClient()
    self.reqClient = ReqClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None
Beispiel #6
0
  def __init__( self, transClient = None, logger = None, requestClient = None,
                requestClass = None, requestValidator = None ):
    """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      self.requestClient = ReqClient()
    else:
      self.requestClient = requestClient

    if not requestClass:
      self.requestClass = Request
    else:
      self.requestClass = requestClass

    if not requestValidator:
      self.requestValidator = RequestValidator()
    else:
      self.requestValidator = requestValidator
Beispiel #7
0
  def callback(self):
    """ Trigger the callback once all the FTS interactions are done
        and update the status of the Operation to 'Finished' if successful
    """
    self.reqClient = ReqClient()

    res = self._callback()

    if res['OK']:
      self.status = 'Finished'

    return res
Beispiel #8
0
  def __init__(
          self,
          requestJSON,
          handlersDict,
          csPath,
          agentName,
          standalone=False,
          requestClient=None):
    """c'tor

    :param self: self reference
    :param str requestJSON: request serialized to JSON
    :param dict opHandlers: operation handlers
    """
    self.request = Request(requestJSON)
    # # csPath
    self.csPath = csPath
    # # agent name
    self.agentName = agentName
    # # standalone flag
    self.standalone = standalone
    # # handlers dict
    self.handlersDict = handlersDict
    # # handlers class def
    self.handlers = {}
    # # own sublogger
    self.log = gLogger.getSubLogger("pid_%s/%s" % (os.getpid(), self.request.RequestName))
    # # get shifters info
    self.__managersDict = {}
    shifterProxies = self.__setupManagerProxies()
    if not shifterProxies["OK"]:
      self.log.error(shifterProxies["Message"])

    # # initialize gMonitor
    gMonitor.setComponentType(gMonitor.COMPONENT_AGENT)
    gMonitor.setComponentName(self.agentName)
    gMonitor.initialize()

    # # own gMonitor activities
    gMonitor.registerActivity("RequestAtt", "Requests processed",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RequestFail", "Requests failed",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RequestOK", "Requests done",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)

    if requestClient is None:
      self.requestClient = ReqClient()
    else:
      self.requestClient = requestClient
  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = Operations().getValue( '/LogStorage/LogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
#     self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()
  def initialize(self):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    # See cleanCatalogContents method: this proxy will be used ALSO when the file catalog used
    # is the DIRAC File Catalog (DFC).
    # This is possible because of unset of the "UseServerCertificate" option
    self.shifterProxy = self.am_getOption('shifterProxy', None)

    # # transformations types
    self.dataProcTTypes = Operations().getValue('Transformations/DataProcessing', self.dataProcTTypes)
    self.dataManipTTypes = Operations().getValue('Transformations/DataManipulation', self.dataManipTTypes)
    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes)
    self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes))
    # # directory locations
    self.directoryLocations = sorted(self.am_getOption('DirectoryLocations', self.directoryLocations))
    self.log.info("Will search for directories in the following locations: %s" % str(self.directoryLocations))
    # # transformation metadata
    self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta)
    self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta)
    # # archive periof in days
    self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter)  # days
    self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter)
    # # active SEs
    self.activeStorages = sorted(self.am_getOption('ActiveSEs', self.activeStorages))
    if self.activeStorages:
      self.log.info("Will check the following storage elements: %s" % str(self.activeStorages))
    # # transformation log SEs
    self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
    self.log.info("Will remove logs found on storage element: %s" % self.logSE)

    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()
Beispiel #11
0
    def __setRemovalRequest(self, lfn, ownerDN, ownerGroup):
        """ Set removal request with the given credentials
    """
        oRequest = Request()
        oRequest.OwnerDN = ownerDN
        oRequest.OwnerGroup = ownerGroup
        oRequest.RequestName = os.path.basename(
            lfn).strip() + '_removal_request.xml'
        oRequest.SourceComponent = 'JobCleaningAgent'

        removeFile = Operation()
        removeFile.Type = 'RemoveFile'

        removedFile = File()
        removedFile.LFN = lfn

        removeFile.addFile(removedFile)
        oRequest.addOperation(removeFile)

        return ReqClient().putRequest(oRequest)
  def __init__(self, *args, **kwargs):
    AgentModule.__init__(self, *args, **kwargs)
    self.name = 'FileStatusTransformationAgent'
    self.enabled = False
    self.shifterProxy = 'DataManager'
    self.transformationTypes = ["Replication"]
    self.transformationStatuses = ["Active"]
    self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"]

    self.addressTo = ["*****@*****.**"]
    self.addressFrom = "*****@*****.**"
    self.emailSubject = "FileStatusTransformationAgent"

    self.accounting = defaultdict(list)
    self.errors = []

    self.fcClient = FileCatalogClient()
    self.tClient = TransformationClient()
    self.reqClient = ReqClient()
    self.nClient = NotificationClient()
Beispiel #13
0
    def __init__(self, requestJSON, handlersDict, csPath, agentName):
        """c'tor

    :param self: self reference
    :param str requestJSON: request serialized to JSON
    :param dict opHandlers: operation handlers
    """
        self.request = Request(requestJSON)
        # # csPath
        self.csPath = csPath
        # # agent name
        self.agentName = agentName
        # # handlers dict
        self.handlersDict = handlersDict
        # # handlers class def
        self.handlers = {}
        # # own sublogger
        self.log = gLogger.getSubLogger(self.request.RequestName)
        # # get shifters info
        self.__managersDict = {}
        shifterProxies = self.__setupManagerProxies()
        if not shifterProxies["OK"]:
            self.log.error(shifterProxies["Message"])

        # # initialize gMonitor
        gMonitor.setComponentType(gMonitor.COMPONENT_AGENT)
        gMonitor.setComponentName(self.agentName)
        gMonitor.initialize()

        # # own gMonitor activities
        gMonitor.registerActivity("RequestAtt", "Requests processed",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RequestFail", "Requests failed",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RequestOK", "Requests done",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        self.requestClient = ReqClient()
Beispiel #14
0
    def __deleteSandboxFromExternalBackend(self, SEName, SEPFN):
        if self.getCSOption("DelayedExternalDeletion", True):
            gLogger.info("Setting deletion request")
            try:

                # We need the hostDN used in order to pass these credentials to the
                # SandboxStoreDB..
                hostCertLocation, _ = Locations.getHostCertificateAndKeyLocation()
                hostCert = X509Certificate.X509Certificate()
                hostCert.loadFromFile(hostCertLocation)
                hostDN = hostCert.getSubjectDN().get("Value")

                # use the host authentication to fetch the data
                result = self.sandboxDB.getSandboxOwner(SEName, SEPFN, hostDN, "hosts")
                if not result["OK"]:
                    return result
                _owner, ownerDN, ownerGroup = result["Value"]

                request = Request()
                request.RequestName = "RemoteSBDeletion:%s|%s:%s" % (SEName, SEPFN, time.time())
                request.OwnerDN = ownerDN
                request.OwnerGroup = ownerGroup
                physicalRemoval = Operation()
                physicalRemoval.Type = "PhysicalRemoval"
                physicalRemoval.TargetSE = SEName
                fileToRemove = File()
                fileToRemove.PFN = SEPFN
                physicalRemoval.addFile(fileToRemove)
                request.addOperation(physicalRemoval)
                return ReqClient().putRequest(request)
            except Exception as e:
                gLogger.exception("Exception while setting deletion request")
                return S_ERROR("Cannot set deletion request: %s" % str(e))
        else:
            gLogger.info("Deleting external Sandbox")
            try:
                return StorageElement(SEName).removeFile(SEPFN)
            except Exception:
                gLogger.exception("RM raised an exception while trying to delete a remote sandbox")
                return S_ERROR("RM raised an exception while trying to delete a remote sandbox")
Beispiel #15
0
    def __setRemovalRequest(self, lfn, ownerDN, ownerGroup):
        """Set removal request with the given credentials"""
        oRequest = Request()
        oRequest.OwnerDN = ownerDN
        oRequest.OwnerGroup = ownerGroup
        oRequest.RequestName = os.path.basename(
            lfn).strip() + "_removal_request.xml"
        oRequest.SourceComponent = "JobCleaningAgent"

        removeFile = Operation()
        removeFile.Type = "RemoveFile"

        removedFile = File()
        removedFile.LFN = lfn

        removeFile.addFile(removedFile)
        oRequest.addOperation(removeFile)

        # put the request with the owner certificate to make sure it's still a valid DN
        return ReqClient(useCertificates=True,
                         delegatedDN=ownerDN,
                         delegatedGroup=ownerGroup).putRequest(oRequest)
Beispiel #16
0
    def setUp(self):
        """ test case set up """

        gLogger.setLevel('INFO')

        self.file = File()
        self.file.LFN = "/lhcb/user/c/cibak/testFile"
        self.file.Checksum = "123456"
        self.file.ChecksumType = "ADLER32"

        self.file2 = File()
        self.file2.LFN = "/lhcb/user/f/fstagni/testFile"
        self.file2.Checksum = "654321"
        self.file2.ChecksumType = "ADLER32"

        self.operation = Operation()
        self.operation.Type = "ReplicateAndRegister"
        self.operation.TargetSE = "CERN-USER"
        self.operation.addFile(self.file)
        self.operation.addFile(self.file2)

        proxyInfo = getProxyInfo()['Value']
        self.request = Request()
        self.request.RequestName = "RequestManagerHandlerTests"
        self.request.OwnerDN = proxyInfo['identity']
        self.request.OwnerGroup = proxyInfo['group']
        self.request.JobID = 123
        self.request.addOperation(self.operation)

        # # JSON representation of a whole request
        self.jsonStr = self.request.toJSON()['Value']
        # # request client
        self.requestClient = ReqClient()

        self.stressRequests = 1000
        self.bulkRequest = 1000
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(" SE:   StorageElement|All")
    Script.registerArgument(["LFN:  LFN or file containing a List of LFNs"])
    Script.parseCommandLine(ignoreErrors=False)

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    args = Script.getPositionalArgs()

    targetSE = args.pop(0)

    lfns = []
    for inputFileName in args:
        if os.path.exists(inputFileName):
            with open(inputFileName, "r") as inputFile:
                string = inputFile.read()
            lfns.extend([lfn.strip() for lfn in string.splitlines()])
        else:
            lfns.append(inputFileName)

    from DIRAC.Resources.Storage.StorageElement import StorageElement
    import DIRAC

    # Check is provided SE is OK
    if targetSE != "All":
        se = StorageElement(targetSE)
        if not se.valid:
            print(se.errorReason)
            print()
            Script.showHelp()

    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    reqClient = ReqClient()
    fc = FileCatalog()

    requestOperation = "RemoveReplica"
    if targetSE == "All":
        requestOperation = "RemoveFile"

    for lfnList in breakListIntoChunks(lfns, 100):

        oRequest = Request()
        requestName = "%s_%s" % (
            md5(repr(time.time()).encode()).hexdigest()[:16],
            md5(repr(time.time()).encode()).hexdigest()[:16],
        )
        oRequest.RequestName = requestName

        oOperation = Operation()
        oOperation.Type = requestOperation
        oOperation.TargetSE = targetSE

        res = fc.getFileMetadata(lfnList)
        if not res["OK"]:
            print("Can't get file metadata: %s" % res["Message"])
            DIRAC.exit(1)
        if res["Value"]["Failed"]:
            print(
                "Could not get the file metadata of the following, so skipping them:"
            )
            for fFile in res["Value"]["Failed"]:
                print(fFile)

        lfnMetadata = res["Value"]["Successful"]

        for lfn in lfnMetadata:
            rarFile = File()
            rarFile.LFN = lfn
            rarFile.Size = lfnMetadata[lfn]["Size"]
            rarFile.Checksum = lfnMetadata[lfn]["Checksum"]
            rarFile.GUID = lfnMetadata[lfn]["GUID"]
            rarFile.ChecksumType = "ADLER32"
            oOperation.addFile(rarFile)

        oRequest.addOperation(oOperation)

        isValid = RequestValidator().validate(oRequest)
        if not isValid["OK"]:
            print("Request is not valid: ", isValid["Message"])
            DIRAC.exit(1)

        result = reqClient.putRequest(oRequest)
        if result["OK"]:
            print("Request %d Submitted" % result["Value"])
        else:
            print("Failed to submit Request: ", result["Message"])
Beispiel #18
0
      targetSE = set(switch[1].split(','))

  if reset and not force:
    status = 'Failed'
  if fixJob:
    status = 'Done'
  if terse:
    verbose = True
  if status:
    if not until:
      until = datetime.datetime.utcnow()
    if not since:
      since = until - datetime.timedelta(hours=24)
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
  reqClient = ReqClient()
  if transID:
    if not taskIDs:
      gLogger.fatal("If Transformation is set, a list of Tasks should also be set")
      Script.showHelp(exitCode=2)
    # In principle, the task name is unique, so the request name should be unique as well
    # If ever this would not work anymore, we would need to use the transformationClient
    # to fetch the ExternalID
    requests = ['%08d_%08d' % (transID, task) for task in taskIDs]
    allR = True

  elif not jobs:
    requests = []
    # Get full list of arguments, with and without comma
    for arg in [x.strip() for arg in Script.getPositionalArgs() for x in arg.split(',')]:
      if os.path.exists(arg):
    def initialize(self):
        """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
        # # shifter proxy
        # See cleanCatalogContents method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption('shifterProxy', None)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            'Transformations/DataProcessing', self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            'Transformations/DataManipulation', self.dataManipTTypes)
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # active SEs
        self.activeStorages = sorted(
            self.am_getOption('ActiveSEs', self.activeStorages))
        if self.activeStorages:
            self.log.info("Will check the following storage elements: %s" %
                          str(self.activeStorages))
        # # transformation log SEs
        self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()

        return S_OK()
Beispiel #20
0
      since = convertDate( switch[1] )
    elif switch[0] == 'Until':
      until = convertDate( switch[1] )

  if reset:
    status = 'Failed'
  if terse:
    verbose = True
  if status:
    if not until:
      until = datetime.datetime.utcnow()
    if not since:
      since = until - datetime.timedelta( hours = 24 )
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
  reqClient = ReqClient()
  if transID:
    if not taskIDs:
      gLogger.fatal( "If Transformation is set, a list of Tasks should also be set" )
      Script.showHelp()
      DIRAC.exit( 2 )
    # In principle, the task name is unique, so the request name should be unique as well
    # If ever this would not work anymore, we would need to use the transformationClient
    # to fetch the ExternalID
    requests = ['%08d_%08d' % ( transID, task ) for task in taskIDs]
    allR = True

  elif not jobs:
    args = Script.getPositionalArgs()
    if len( args ) == 1:
      allR = True
Beispiel #21
0
      targetSE = set(switch[1].split(','))

  if reset and not force:
    status = 'Failed'
  if fixJob:
    status = 'Done'
  if terse:
    verbose = True
  if status:
    if not until:
      until = datetime.datetime.utcnow()
    if not since:
      since = until - datetime.timedelta(hours=24)
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
  reqClient = ReqClient()
  if transID:
    if not taskIDs:
      gLogger.fatal("If Transformation is set, a list of Tasks should also be set")
      Script.showHelp()
      DIRAC.exit(2)
    # In principle, the task name is unique, so the request name should be unique as well
    # If ever this would not work anymore, we would need to use the transformationClient
    # to fetch the ExternalID
    requests = ['%08d_%08d' % (transID, task) for task in taskIDs]
    allR = True

  elif not jobs:
    requests = []
    # Get full list of arguments, with and without comma
    for arg in [x.strip() for arg in Script.getPositionalArgs() for x in arg.split(',')]:
Beispiel #22
0
 def requestClient(cls):
     """ request client getter """
     if not cls.__requestClient:
         cls.__requestClient = ReqClient()
     return cls.__requestClient
Beispiel #23
0
class RequestTasks(TaskBase):
    """
  Class for handling tasks for the RMS
  """
    def __init__(self,
                 transClient=None,
                 logger=None,
                 requestClient=None,
                 requestClass=None,
                 requestValidator=None,
                 ownerDN=None,
                 ownerGroup=None):
        """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

        if not logger:
            logger = gLogger.getSubLogger('RequestTasks')

        super(RequestTasks, self).__init__(transClient, logger)
        useCertificates = True if (bool(ownerDN)
                                   and bool(ownerGroup)) else False

        if not requestClient:
            self.requestClient = ReqClient(useCertificates=useCertificates,
                                           delegatedDN=ownerDN,
                                           delegatedGroup=ownerGroup)
        else:
            self.requestClient = requestClient

        if not requestClass:
            self.requestClass = Request
        else:
            self.requestClass = requestClass

        if not requestValidator:
            self.requestValidator = RequestValidator()
        else:
            self.requestValidator = requestValidator

    def prepareTransformationTasks(self,
                                   transBody,
                                   taskDict,
                                   owner='',
                                   ownerGroup='',
                                   ownerDN='',
                                   bulkSubmissionFlag=False):
        """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB
    """
        if not taskDict:
            return S_OK({})

        if (not owner) or (not ownerGroup):
            res = getProxyInfo(False, False)
            if not res['OK']:
                return res
            proxyInfo = res['Value']
            owner = proxyInfo['username']
            ownerGroup = proxyInfo['group']

        if not ownerDN:
            res = getDNForUsername(owner)
            if not res['OK']:
                return res
            ownerDN = res['Value'][0]

        try:
            transJson = json.loads(transBody)
            self._multiOperationsBody(transJson, taskDict, ownerDN, ownerGroup)
        except ValueError:  # #json couldn't load
            self._singleOperationsBody(transBody, taskDict, ownerDN,
                                       ownerGroup)

        return S_OK(taskDict)

    def _multiOperationsBody(self, transJson, taskDict, ownerDN, ownerGroup):
        """ deal with a Request that has multiple operations

    :param transJson: list of lists of string and dictionaries, e.g.:

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :param dict taskDict: dictionary of tasks, modified in this function
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests

    :returns: None
    """
        failedTasks = []
        for taskID, task in taskDict.items():
            transID = task['TransformationID']
            if not task.get('InputData'):
                self._logError("Error creating request for task",
                               "%s, No input data" % taskID,
                               transID=transID)
                taskDict.pop(taskID)
                continue
            files = []

            oRequest = Request()
            if isinstance(task['InputData'], list):
                files = task['InputData']
            elif isinstance(task['InputData'], basestring):
                files = task['InputData'].split(';')

            # create the operations from the json structure
            for operationTuple in transJson:
                op = Operation()
                op.Type = operationTuple[0]
                for parameter, value in operationTuple[1].iteritems():
                    setattr(op, parameter, value)

                for lfn in files:
                    opFile = File()
                    opFile.LFN = lfn
                    op.addFile(opFile)

                oRequest.addOperation(op)

            result = self._assignRequestToTask(oRequest, taskDict, transID,
                                               taskID, ownerDN, ownerGroup)
            if not result['OK']:
                failedTasks.append(taskID)
        # Remove failed tasks
        for taskID in failedTasks:
            taskDict.pop(taskID)

    def _singleOperationsBody(self, transBody, taskDict, ownerDN, ownerGroup):
        """ deal with a Request that has just one operation, as it was sofar

    :param transBody: string, can be an empty string
    :param dict taskDict: dictionary of tasks, modified in this function
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests

    :returns: None
    """

        requestOperation = 'ReplicateAndRegister'
        if transBody:
            try:
                _requestType, requestOperation = transBody.split(';')
            except AttributeError:
                pass
        failedTasks = []
        # Do not remove sorted, we might pop elements in the loop
        for taskID, task in taskDict.iteritems():

            transID = task['TransformationID']

            oRequest = Request()
            transfer = Operation()
            transfer.Type = requestOperation
            transfer.TargetSE = task['TargetSE']

            # If there are input files
            if task.get('InputData'):
                if isinstance(task['InputData'], list):
                    files = task['InputData']
                elif isinstance(task['InputData'], basestring):
                    files = task['InputData'].split(';')
                for lfn in files:
                    trFile = File()
                    trFile.LFN = lfn

                    transfer.addFile(trFile)

            oRequest.addOperation(transfer)
            result = self._assignRequestToTask(oRequest, taskDict, transID,
                                               taskID, ownerDN, ownerGroup)
            if not result['OK']:
                failedTasks.append(taskID)
        # Remove failed tasks
        for taskID in failedTasks:
            taskDict.pop(taskID)

    def _assignRequestToTask(self, oRequest, taskDict, transID, taskID,
                             ownerDN, ownerGroup):
        """set ownerDN and group to request, and add the request to taskDict if it is
    valid, otherwise remove the task from the taskDict

    :param oRequest: Request
    :param dict taskDict: dictionary of tasks, modified in this function
    :param int transID: Transformation ID
    :param int taskID: Task ID
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests

    :returns: None
    """

        oRequest.RequestName = self._transTaskName(transID, taskID)
        oRequest.OwnerDN = ownerDN
        oRequest.OwnerGroup = ownerGroup

        isValid = self.requestValidator.validate(oRequest)
        if not isValid['OK']:
            self._logError("Error creating request for task",
                           "%s %s" % (taskID, isValid),
                           transID=transID)
            return S_ERROR('Error creating request')
        taskDict[taskID]['TaskObject'] = oRequest
        return S_OK()

    def submitTransformationTasks(self, taskDict):
        """ Submit requests one by one
    """
        submitted = 0
        failed = 0
        startTime = time.time()
        method = 'submitTransformationTasks'
        for task in taskDict.itervalues():
            # transID is the same for all tasks, so pick it up every time here
            transID = task['TransformationID']
            if not task['TaskObject']:
                task['Success'] = False
                failed += 1
                continue
            res = self.submitTaskToExternal(task['TaskObject'])
            if res['OK']:
                task['ExternalID'] = res['Value']
                task['Success'] = True
                submitted += 1
            else:
                self._logError("Failed to submit task to RMS",
                               res['Message'],
                               transID=transID)
                task['Success'] = False
                failed += 1
        if submitted:
            self._logInfo('Submitted %d tasks to RMS in %.1f seconds' %
                          (submitted, time.time() - startTime),
                          transID=transID,
                          method=method)
        if failed:
            self._logWarn('Failed to submit %d tasks to RMS.' % (failed),
                          transID=transID,
                          method=method)
        return S_OK(taskDict)

    def submitTaskToExternal(self, oRequest):
        """
    Submits a request to RMS
    """
        if isinstance(oRequest, self.requestClass):
            return self.requestClient.putRequest(oRequest,
                                                 useFailoverProxy=False,
                                                 retryMainService=2)
        return S_ERROR("Request should be a Request object")

    def updateTransformationReservedTasks(self, taskDicts):
        requestNameIDs = {}
        noTasks = []
        for taskDict in taskDicts:
            requestName = self._transTaskName(taskDict['TransformationID'],
                                              taskDict['TaskID'])
            reqID = taskDict['ExternalID']
            if reqID:
                requestNameIDs[requestName] = reqID
            else:
                noTasks.append(requestName)
        return S_OK({'NoTasks': noTasks, 'TaskNameIDs': requestNameIDs})

    def getSubmittedTaskStatus(self, taskDicts):
        """
    Check if tasks changed status, and return a list of tasks per new status
    """
        updateDict = {}
        badRequestID = 0
        for taskDict in taskDicts:
            oldStatus = taskDict['ExternalStatus']
            # ExternalID is normally a string
            if taskDict['ExternalID'] and int(taskDict['ExternalID']):
                newStatus = self.requestClient.getRequestStatus(
                    taskDict['ExternalID'])
                if not newStatus['OK']:
                    log = self._logVerbose if 'not exist' in newStatus[
                        'Message'] else self._logWarn
                    log("getSubmittedTaskStatus: Failed to get requestID for request",
                        newStatus['Message'],
                        transID=taskDict['TransformationID'])
                else:
                    newStatus = newStatus['Value']
                    # We don't care updating the tasks to Assigned while the request is being processed
                    if newStatus != oldStatus and newStatus != 'Assigned':
                        updateDict.setdefault(newStatus,
                                              []).append(taskDict['TaskID'])
            else:
                badRequestID += 1
        if badRequestID:
            self._logWarn("%d requests have identifier 0" % badRequestID)
        return S_OK(updateDict)

    def getSubmittedFileStatus(self, fileDicts):
        """
    Check if transformation files changed status, and return a list of taskIDs per new status
    """
        # Don't try and get status of not submitted tasks!
        transID = None
        taskFiles = {}
        for fileDict in fileDicts:
            # There is only one transformation involved, get however the transID in the loop
            transID = fileDict['TransformationID']
            taskID = int(fileDict['TaskID'])
            taskFiles.setdefault(taskID, []).append(fileDict['LFN'])
        # Should not happen, but just in case there are no files, return
        if transID is None:
            return S_OK({})

        res = self.transClient.getTransformationTasks({
            'TransformationID':
            transID,
            'TaskID':
            taskFiles.keys()
        })
        if not res['OK']:
            return res
        requestFiles = {}
        for taskDict in res['Value']:
            taskID = taskDict['TaskID']
            externalID = taskDict['ExternalID']
            # Only consider tasks that are submitted, ExternalID is a string
            if taskDict['ExternalStatus'] != 'Created' and externalID and int(
                    externalID):
                requestFiles[externalID] = taskFiles[taskID]

        updateDict = {}
        for requestID, lfnList in requestFiles.iteritems():
            statusDict = self.requestClient.getRequestFileStatus(
                requestID, lfnList)
            if not statusDict['OK']:
                log = self._logVerbose if 'not exist' in statusDict[
                    'Message'] else self._logWarn
                log("Failed to get files status for request",
                    statusDict['Message'],
                    transID=transID,
                    method='getSubmittedFileStatus')
            else:
                for lfn, newStatus in statusDict['Value'].iteritems():
                    if newStatus == 'Done':
                        updateDict[lfn] = 'Processed'
                    elif newStatus == 'Failed':
                        updateDict[lfn] = 'Problematic'
        return S_OK(updateDict)
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(" sourceSE:   source SE")
    Script.registerArgument(" LFN:        LFN or file containing a List of LFNs")
    Script.registerArgument(["targetSE:   target SEs"])
    Script.parseCommandLine()

    import DIRAC
    from DIRAC import gLogger

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    args = Script.getPositionalArgs()

    sourceSE = args[0]
    lfnList = getLFNList(args[1])
    targetSEs = list(set([se for targetSE in args[2:] for se in targetSE.split(",")]))

    gLogger.info(
        "Will create request with 'MoveReplica' "
        "operation using %s lfns and %s target SEs" % (len(lfnList), len(targetSEs))
    )

    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from DIRAC.Core.Utilities.List import breakListIntoChunks

    lfnChunks = breakListIntoChunks(lfnList, 100)
    multiRequests = len(lfnChunks) > 1

    error = 0
    count = 0
    reqClient = ReqClient()
    fc = FileCatalog()
    for lfnChunk in lfnChunks:
        metaDatas = fc.getFileMetadata(lfnChunk)
        if not metaDatas["OK"]:
            gLogger.error("unable to read metadata for lfns: %s" % metaDatas["Message"])
            error = -1
            continue
        metaDatas = metaDatas["Value"]
        for failedLFN, reason in metaDatas["Failed"].items():
            gLogger.error("skipping %s: %s" % (failedLFN, reason))
        lfnChunk = set(metaDatas["Successful"])

        if not lfnChunk:
            gLogger.error("LFN list is empty!!!")
            error = -1
            continue

        if len(lfnChunk) > Operation.MAX_FILES:
            gLogger.error("too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES)
            error = -1
            continue

        count += 1

        request = Request()
        request.RequestName = "%s_%s" % (
            md5(repr(time.time()).encode()).hexdigest()[:16],
            md5(repr(time.time()).encode()).hexdigest()[:16],
        )

        moveReplica = Operation()
        moveReplica.Type = "MoveReplica"
        moveReplica.SourceSE = sourceSE
        moveReplica.TargetSE = ",".join(targetSEs)

        for lfn in lfnChunk:
            metaDict = metaDatas["Successful"][lfn]
            opFile = File()
            opFile.LFN = lfn
            opFile.Size = metaDict["Size"]

            if "Checksum" in metaDict:
                # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD'
                opFile.Checksum = metaDict["Checksum"]
                opFile.ChecksumType = "ADLER32"
            moveReplica.addFile(opFile)

        request.addOperation(moveReplica)

        result = reqClient.putRequest(request)
        if not result["OK"]:
            gLogger.error("Failed to submit Request: %s" % (result["Message"]))
            error = -1
            continue

        if not multiRequests:
            gLogger.always("Request %d submitted successfully" % result["Value"])

    if multiRequests:
        gLogger.always("%d requests have been submitted" % (count))
    DIRAC.exit(error)
from DIRAC.Core.Base import Script
Script.setUsageMessage('\n'.join([
    __doc__, 'Usage:',
    ' %s [option|cfgfile] <Request list>' % Script.scriptName
]))

if __name__ == "__main__":

    from DIRAC.Core.Base.Script import parseCommandLine
    parseCommandLine()

    import DIRAC
    requests = []

    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    reqClient = ReqClient()

    args = Script.getPositionalArgs()
    if len(args) == 1:
        requests = [reqName for reqName in args[0].split(',') if reqName]

    if not requests:
        DIRAC.gLogger.fatal("Need at least one request name")
        Script.showHelp()
        DIRAC.exit(1)

    for reqName in requests:
        reqName = reqName.strip()
        res = reqClient.cancelRequest(reqName)
        if res['OK']:
            DIRAC.gLogger.always("Request %s canceled" % reqName)
Beispiel #26
0
def main():
    """
    Main executive code
    """
    Script.registerSwitch("", "Job=", "   JobID[,jobID2,...]")
    Script.registerSwitch("", "Transformation=", "   transformation ID")
    Script.registerSwitch("", "Tasks=", "      Associated to --Transformation, list of taskIDs")
    Script.registerSwitch("", "Verbose", "   Print more information")
    Script.registerSwitch("", "Terse", "   Only print request status")
    Script.registerSwitch("", "Full", "   Print full request content")
    Script.registerSwitch("", "Status=", "   Select all requests in a given status")
    Script.registerSwitch(
        "", "Since=", "      Associated to --Status, start date yyyy-mm-dd or nb of days (default= -one day"
    )
    Script.registerSwitch("", "Until=", "      Associated to --Status, end date (default= now")
    Script.registerSwitch("", "Maximum=", "      Associated to --Status, max number of requests ")
    Script.registerSwitch("", "Reset", "   Reset Failed files to Waiting if any")
    Script.registerSwitch("", "Force", "   Force reset even if not Failed")
    Script.registerSwitch(
        "", "All", "      (if --Status Failed) all requests, otherwise exclude irrecoverable failures"
    )
    Script.registerSwitch("", "FixJob", "   Set job Done if the request is Done")
    Script.registerSwitch("", "Cancel", "   Cancel the request")
    Script.registerSwitch("", "ListJobs", " List the corresponding jobs")
    Script.registerSwitch("", "TargetSE=", " Select request only if that SE is in the targetSEs")
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(
        (
            "file:     a file containing a list of requests (Comma-separated on each line)",
            "request:  a request ID or a unique request name",
        ),
        mandatory=False,
    )
    Script.registerArgument(["request:  a request ID or a unique request name"], mandatory=False)
    Script.parseCommandLine()

    import DIRAC
    from DIRAC import gLogger

    jobs = []
    requestID = 0
    transID = None
    taskIDs = None
    tasks = None
    requests = []
    full = False
    verbose = False
    status = None
    until = None
    since = None
    terse = False
    allR = False
    reset = False
    fixJob = False
    maxRequests = 999999999999
    cancel = False
    listJobs = False
    force = False
    targetSE = set()
    for switch in Script.getUnprocessedSwitches():
        if switch[0] == "Job":
            jobs = []
            job = "Unknown"
            try:
                for arg in switch[1].split(","):
                    if os.path.exists(arg):
                        with open(arg, "r") as fp:
                            lines = fp.readlines()
                        for line in lines:
                            for job in line.split(","):
                                jobs += [int(job.strip())]
                        gLogger.notice("Found %d jobs in file %s" % (len(jobs), arg))
                    else:
                        jobs.append(int(arg))
            except TypeError:
                gLogger.fatal("Invalid jobID", job)
        elif switch[0] == "Transformation":
            try:
                transID = int(switch[1])
            except Exception:
                gLogger.fatal("Invalid transID", switch[1])
        elif switch[0] == "Tasks":
            try:
                taskIDs = [int(task) for task in switch[1].split(",")]
            except Exception:
                gLogger.fatal("Invalid tasks", switch[1])
        elif switch[0] == "Full":
            full = True
        elif switch[0] == "Verbose":
            verbose = True
        elif switch[0] == "Terse":
            terse = True
        elif switch[0] == "All":
            allR = True
        elif switch[0] == "Reset":
            reset = True
        elif switch[0] == "Force":
            force = True
        elif switch[0] == "Status":
            status = switch[1].capitalize()
        elif switch[0] == "Since":
            since = convertDate(switch[1])
        elif switch[0] == "Until":
            until = convertDate(switch[1])
        elif switch[0] == "FixJob":
            fixJob = True
        elif switch[0] == "Cancel":
            cancel = True
        elif switch[0] == "ListJobs":
            listJobs = True
        elif switch[0] == "Maximum":
            try:
                maxRequests = int(switch[1])
            except Exception:
                pass
        elif switch[0] == "TargetSE":
            targetSE = set(switch[1].split(","))

    if reset and not force:
        status = "Failed"
    if fixJob:
        status = "Done"
    if terse:
        verbose = True
    if status:
        if not until:
            until = datetime.datetime.utcnow()
        if not since:
            since = until - datetime.timedelta(hours=24)
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest

    reqClient = ReqClient()
    if transID:
        if not taskIDs:
            gLogger.fatal("If Transformation is set, a list of Tasks should also be set")
            Script.showHelp(exitCode=2)
        # In principle, the task name is unique, so the request name should be unique as well
        # If ever this would not work anymore, we would need to use the transformationClient
        # to fetch the ExternalID
        requests = ["%08d_%08d" % (transID, task) for task in taskIDs]
        allR = True

    elif not jobs:
        requests = []
        # Get full list of arguments, with and without comma
        for arg in [x.strip() for ar in Script.getPositionalArgs() for x in ar.split(",")]:
            if os.path.exists(arg):
                lines = open(arg, "r").readlines()
                requests += [reqID.strip() for line in lines for reqID in line.split(",")]
                gLogger.notice("Found %d requests in file" % len(requests))
            else:
                requests.append(arg)
            allR = True
    else:
        res = reqClient.getRequestIDsForJobs(jobs)
        if not res["OK"]:
            gLogger.fatal("Error getting request for jobs", res["Message"])
            DIRAC.exit(2)
        if res["Value"]["Failed"]:
            gLogger.error("No request found for jobs %s" % ",".join(sorted(str(job) for job in res["Value"]["Failed"])))
        requests = sorted(res["Value"]["Successful"].values())
        if requests:
            allR = True
        else:
            DIRAC.exit(0)

    if status and not requests:
        allR = allR or status != "Failed"
        res = reqClient.getRequestIDsList([status], limit=maxRequests, since=since, until=until)

        if not res["OK"]:
            gLogger.error("Error getting requests:", res["Message"])
            DIRAC.exit(2)
        requests = [reqID for reqID, _st, updTime in res["Value"] if updTime > since and updTime <= until and reqID]
        gLogger.notice("Obtained %d requests %s between %s and %s" % (len(requests), status, since, until))
    if not requests:
        gLogger.notice("No request selected....")
        Script.showHelp(exitCode=2)
    okRequests = []
    jobIDList = []
    for reqID in requests:
        # We allow reqID to be the requestName if it is unique
        try:
            # PEP-515 allows for underscore in numerical literals
            # So a request name 00123_00456
            # is interpreted as a requestID 12300456
            # Using an exception here for non-string is not an option
            if isinstance(reqID, str) and not reqID.isdigit():
                raise ValueError()

            requestID = int(reqID)
        except (ValueError, TypeError):
            requestID = reqClient.getRequestIDForName(reqID)
            if not requestID["OK"]:
                gLogger.notice(requestID["Message"])
                continue
            requestID = requestID["Value"]

        request = reqClient.peekRequest(requestID)
        if not request["OK"]:
            gLogger.error(request["Message"])
            DIRAC.exit(-1)

        request = request["Value"]
        if not request:
            gLogger.error("no such request %s" % requestID)
            continue
        # If no operation as the targetSE, skip
        if targetSE:
            found = False
            for op in request:
                if op.TargetSE and targetSE.intersection(op.TargetSE.split(",")):
                    found = True
                    break
            if not found:
                continue
        # keep a list of jobIDs if requested
        if request.JobID and listJobs:
            jobIDList.append(request.JobID)

        if status and request.Status != status:
            gLogger.notice(
                "Request %s is not in requested status %s%s" % (reqID, status, " (cannot be reset)" if reset else "")
            )
            continue

        if fixJob and request.Status == "Done" and request.JobID:
            # The request is for a job and is Done, verify that the job is in the proper status
            result = reqClient.finalizeRequest(request.RequestID, request.JobID, useCertificates=False)
            if not result["OK"]:
                gLogger.error("Error finalizing job", result["Message"])
            else:
                gLogger.notice("Job %d updated to %s" % (request.JobID, result["Value"]))
            continue

        if cancel:
            if request.Status not in ("Done", "Failed"):
                ret = reqClient.cancelRequest(requestID)
                if not ret["OK"]:
                    gLogger.error("Error canceling request %s" % reqID, ret["Message"])
                else:
                    gLogger.notice("Request %s cancelled" % reqID)
            else:
                gLogger.notice("Request %s is in status %s, not cancelled" % (reqID, request.Status))

        elif allR or recoverableRequest(request):
            okRequests.append(str(requestID))
            if reset:
                gLogger.notice("============ Request %s =============" % requestID)
                ret = reqClient.resetFailedRequest(requestID, allR=allR)
                if not ret["OK"]:
                    gLogger.error("Error resetting request %s" % requestID, ret["Message"])
            else:
                if len(requests) > 1:
                    gLogger.notice("\n===================================")
                dbStatus = reqClient.getRequestStatus(requestID).get("Value", "Unknown")
                printRequest(request, status=dbStatus, full=full, verbose=verbose, terse=terse)

    if listJobs:
        gLogger.notice("List of %d jobs:\n" % len(jobIDList), ",".join(str(jobID) for jobID in jobIDList))

    if status and okRequests:
        from DIRAC.Core.Utilities.List import breakListIntoChunks

        gLogger.notice("\nList of %d selected requests:" % len(okRequests))
        for reqs in breakListIntoChunks(okRequests, 100):
            gLogger.notice(",".join(reqs))
Beispiel #27
0
class RequestTask(object):
  """
  .. class:: RequestTask

  request's processing task
  """

  def __init__(
          self,
          requestJSON,
          handlersDict,
          csPath,
          agentName,
          standalone=False,
          requestClient=None):
    """c'tor

    :param self: self reference
    :param str requestJSON: request serialized to JSON
    :param dict opHandlers: operation handlers
    """
    self.request = Request(requestJSON)
    # # csPath
    self.csPath = csPath
    # # agent name
    self.agentName = agentName
    # # standalone flag
    self.standalone = standalone
    # # handlers dict
    self.handlersDict = handlersDict
    # # handlers class def
    self.handlers = {}
    # # own sublogger
    self.log = gLogger.getSubLogger("pid_%s/%s" % (os.getpid(), self.request.RequestName))
    # # get shifters info
    self.__managersDict = {}
    shifterProxies = self.__setupManagerProxies()
    if not shifterProxies["OK"]:
      self.log.error(shifterProxies["Message"])

    # # initialize gMonitor
    gMonitor.setComponentType(gMonitor.COMPONENT_AGENT)
    gMonitor.setComponentName(self.agentName)
    gMonitor.initialize()

    # # own gMonitor activities
    gMonitor.registerActivity("RequestAtt", "Requests processed",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RequestFail", "Requests failed",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RequestOK", "Requests done",
                              "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM)

    if requestClient is None:
      self.requestClient = ReqClient()
    else:
      self.requestClient = requestClient

  def __setupManagerProxies(self):
    """ setup grid proxy for all defined managers """
    oHelper = Operations()
    shifters = oHelper.getSections("Shifter")
    if not shifters["OK"]:
      self.log.error(shifters["Message"])
      return shifters
    shifters = shifters["Value"]
    for shifter in shifters:
      shifterDict = oHelper.getOptionsDict("Shifter/%s" % shifter)
      if not shifterDict["OK"]:
        self.log.error(shifterDict["Message"])
        continue
      userName = shifterDict["Value"].get("User", "")
      userGroup = shifterDict["Value"].get("Group", "")

      userDN = CS.getDNForUsername(userName)
      if not userDN["OK"]:
        self.log.error(userDN["Message"])
        continue
      userDN = userDN["Value"][0]
      vomsAttr = CS.getVOMSAttributeForGroup(userGroup)
      if vomsAttr:
        self.log.debug("getting VOMS [%s] proxy for shifter %s@%s (%s)" % (vomsAttr, userName,
                                                                           userGroup, userDN))
        getProxy = gProxyManager.downloadVOMSProxyToFile(userDN, userGroup,
                                                         requiredTimeLeft=1200,
                                                         cacheTime=4 * 43200)
      else:
        self.log.debug("getting proxy for shifter %s@%s (%s)" % (userName, userGroup, userDN))
        getProxy = gProxyManager.downloadProxyToFile(userDN, userGroup,
                                                     requiredTimeLeft=1200,
                                                     cacheTime=4 * 43200)
      if not getProxy["OK"]:
        self.log.error(getProxy["Message"])
        return S_ERROR("unable to setup shifter proxy for %s: %s" % (shifter, getProxy["Message"]))
      chain = getProxy["chain"]
      fileName = getProxy["Value"]
      self.log.debug("got %s: %s %s" % (shifter, userName, userGroup))
      self.__managersDict[shifter] = {"ShifterDN": userDN,
                                      "ShifterName": userName,
                                      "ShifterGroup": userGroup,
                                      "Chain": chain,
                                      "ProxyFile": fileName}
    return S_OK()

  def setupProxy(self):
    """ download and dump request owner proxy to file and env

    :return: S_OK with name of newly created owner proxy file and shifter name if any
    """
    self.__managersDict = {}
    shifterProxies = self.__setupManagerProxies()
    if not shifterProxies["OK"]:
      self.log.error(shifterProxies["Message"])

    ownerDN = self.request.OwnerDN
    ownerGroup = self.request.OwnerGroup
    isShifter = []
    for shifter, creds in self.__managersDict.items():
      if creds["ShifterDN"] == ownerDN and creds["ShifterGroup"] == ownerGroup:
        isShifter.append(shifter)
    if isShifter:
      proxyFile = self.__managersDict[isShifter[0]]["ProxyFile"]
      os.environ["X509_USER_PROXY"] = proxyFile
      return S_OK({"Shifter": isShifter, "ProxyFile": proxyFile})

    # # if we're here owner is not a shifter at all
    ownerProxyFile = gProxyManager.downloadVOMSProxyToFile(ownerDN, ownerGroup)
    if not ownerProxyFile["OK"] or not ownerProxyFile["Value"]:
      reason = ownerProxyFile.get("Message", "No valid proxy found in ProxyManager.")
      return S_ERROR("Change proxy error for '%s'@'%s': %s" % (ownerDN, ownerGroup, reason))

    ownerProxyFile = ownerProxyFile["Value"]
    os.environ["X509_USER_PROXY"] = ownerProxyFile
    return S_OK({"Shifter": isShifter, "ProxyFile": ownerProxyFile})

  @staticmethod
  def getPluginName(pluginPath):
    if not pluginPath:
      return ''
    if "/" in pluginPath:
      pluginPath = ".".join([chunk for chunk in pluginPath.split("/") if chunk])
    return pluginPath.split(".")[-1]

  @staticmethod
  def loadHandler(pluginPath):
    """ Create an instance of requested plugin class, loading and importing it when needed.
    This function could raise ImportError when plugin cannot be find or TypeError when
    loaded class object isn't inherited from BaseOperation class.

    :param str pluginName: dotted path to plugin, specified as in import statement, i.e.
        "DIRAC.CheesShopSystem.private.Cheddar" or alternatively in 'normal' path format
        "DIRAC/CheesShopSystem/private/Cheddar"

    :return: object instance

    This function try to load and instantiate an object from given path. It is assumed that:

      * `pluginPath` is pointing to module directory "importable" by python interpreter, i.e.: it's
        package's top level directory is in $PYTHONPATH env variable,
      * the module should consist a class definition following module name,
      *  the class itself is inherited from DIRAC.RequestManagementSystem.private.BaseOperation.BaseOperation

    If above conditions aren't meet, function is throwing exceptions:

    :raises ImportError: when class cannot be imported
    :raises TypeError: when class isn't inherited from OperationHandlerBase
    """
    if "/" in pluginPath:
      pluginPath = ".".join([chunk for chunk in pluginPath.split("/") if chunk])
    pluginName = pluginPath.split(".")[-1]
    if pluginName not in globals():
      mod = __import__(pluginPath, globals(), fromlist=[pluginName])
      pluginClassObj = getattr(mod, pluginName)
    else:
      pluginClassObj = globals()[pluginName]
    if not issubclass(pluginClassObj, OperationHandlerBase):
      raise TypeError(
          "operation handler '%s' isn't inherited from OperationHandlerBase class" %
          pluginName)
    for key, status in (("Att", "Attempted"), ("OK", "Successful"), ("Fail", "Failed")):
      gMonitor.registerActivity(
          "%s%s" %
          (pluginName, key), "%s operations %s" %
          (pluginName, status), "RequestExecutingAgent", "Operations/min", gMonitor.OP_SUM)
    # # return an instance
    return pluginClassObj

  def getHandler(self, operation):
    """ return instance of a handler for a given operation type on demand
        all created handlers are kept in self.handlers dict for further use

    :param ~Operation.Operation operation: Operation instance
    """
    if operation.Type not in self.handlersDict:
      return S_ERROR("handler for operation '%s' not set" % operation.Type)
    handler = self.handlers.get(operation.Type, None)
    if not handler:
      try:
        handlerCls = self.loadHandler(self.handlersDict[operation.Type])
        self.handlers[operation.Type] = handlerCls(
            csPath="%s/OperationHandlers/%s" % (self.csPath, operation.Type))
        handler = self.handlers[operation.Type]
      except (ImportError, TypeError) as error:
        self.log.exception("getHandler: %s" % str(error), lException=error)
        return S_ERROR(str(error))
    # # set operation for this handler
    handler.setOperation(operation)
    # # and return
    return S_OK(handler)

  def updateRequest(self):
    """ put back request to the RequestDB """
    updateRequest = self.requestClient.putRequest(
        self.request, useFailoverProxy=False, retryMainService=2)
    if not updateRequest["OK"]:
      self.log.error(updateRequest["Message"])
    return updateRequest

  def __call__(self):
    """ request processing """

    self.log.debug("about to execute request")
    gMonitor.addMark("RequestAtt", 1)

    # # setup proxy for request owner
    setupProxy = self.setupProxy()
    if not setupProxy["OK"]:
      self.request.Error = setupProxy["Message"]
      if 'has no proxy registered' in setupProxy["Message"]:
        self.log.error('Request set to Failed:', setupProxy["Message"])
        # If user is no longer registered, fail the request
        for operation in self.request:
          for opFile in operation:
            opFile.Status = 'Failed'
          operation.Status = 'Failed'
      else:
        self.log.error(setupProxy["Message"])
      return S_OK(self.request)
    shifter = setupProxy["Value"]["Shifter"]
    proxyFile = setupProxy["Value"]["ProxyFile"]

    error = None
    while self.request.Status == "Waiting":

      # # get waiting operation
      operation = self.request.getWaiting()
      if not operation["OK"]:
        self.log.error(operation["Message"])
        return operation
      operation = operation["Value"]
      self.log.info("executing operation #%s '%s'" % (operation.Order, operation.Type))

      # # and handler for it
      handler = self.getHandler(operation)
      if not handler["OK"]:
        self.log.error("unable to process operation %s: %s" % (operation.Type, handler["Message"]))
        # gMonitor.addMark( "%s%s" % ( operation.Type, "Fail" ), 1 )
        operation.Error = handler["Message"]
        break

      handler = handler["Value"]
      # # set shifters list in the handler
      handler.shifter = shifter
      # # and execute
      pluginName = self.getPluginName(self.handlersDict.get(operation.Type))
      if self.standalone:
        useServerCertificate = gConfig.useServerCertificate()
      else:
        # Always use server certificates if executed within an agent
        useServerCertificate = True
      try:
        if pluginName:
          gMonitor.addMark("%s%s" % (pluginName, "Att"), 1)
        # Always use request owner proxy
        if useServerCertificate:
          gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
        exe = handler()
        if useServerCertificate:
          gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')
        if not exe["OK"]:
          self.log.error("unable to process operation %s: %s" % (operation.Type, exe["Message"]))
          if pluginName:
            gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
          gMonitor.addMark("RequestFail", 1)
          if self.request.JobID:
            # Check if the job exists
            monitorServer = RPCClient("WorkloadManagement/JobMonitoring", useCertificates=True)
            res = monitorServer.getJobPrimarySummary(int(self.request.JobID))
            if not res["OK"]:
              self.log.error("RequestTask: Failed to get job %d status" % self.request.JobID)
            elif not res['Value']:
              self.log.warn(
                  "RequestTask: job %d does not exist (anymore): failed request" %
                  self.request.JobID)
              for opFile in operation:
                opFile.Status = 'Failed'
              if operation.Status != 'Failed':
                operation.Status = 'Failed'
              self.request.Error = 'Job no longer exists'
      except Exception as error:
        self.log.exception("hit by exception: %s" % str(error))
        if pluginName:
          gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
        gMonitor.addMark("RequestFail", 1)
        if useServerCertificate:
          gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')
        break

      # # operation status check
      if operation.Status == "Done" and pluginName:
        gMonitor.addMark("%s%s" % (pluginName, "OK"), 1)
      elif operation.Status == "Failed" and pluginName:
        gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
      elif operation.Status in ("Waiting", "Scheduled"):
        # # no update for waiting or all files scheduled
        break

    gMonitor.flush()

    if error:
      return S_ERROR(error)

    # # request done?
    if self.request.Status == "Done":
      # # update request to the RequestDB
      self.log.info('updating request with status %s' % self.request.Status)
      update = self.updateRequest()
      if not update["OK"]:
        self.log.error(update["Message"])
        return update
      self.log.info("request '%s' is done" % self.request.RequestName)
      gMonitor.addMark("RequestOK", 1)
      # # and there is a job waiting for it? finalize!
      if self.request.JobID:
        attempts = 0
        while True:
          finalizeRequest = self.requestClient.finalizeRequest(
              self.request.RequestID, self.request.JobID)  # pylint: disable=no-member
          if not finalizeRequest["OK"]:
            if not attempts:
              self.log.error(
                  "unable to finalize request %s: %s, will retry" %
                  (self.request.RequestName, finalizeRequest["Message"]))
            self.log.verbose("Waiting 10 seconds")
            attempts += 1
            if attempts == 10:
              self.log.error("giving up finalize request after %d attempts" % attempts)
              return S_ERROR('Could not finalize request')

            time.sleep(10)

          else:
            self.log.info(
                "request '%s' is finalized%s" %
                (self.request.RequestName,
                 (' after %d attempts' %
                  attempts) if attempts else ''))
            break

    # Request will be updated by the callBack method
    self.log.verbose("RequestTasks exiting, request %s" % self.request.Status)
    return S_OK(self.request)
class TransformationCleaningAgent( AgentModule ):
  """
  .. class:: TransformationCleaningAgent

  :param DataManger dm: DataManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    # # data manager
    self.dm = None
    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
    self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in self.dataManipTTypes:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )
  # FIXME If a classmethod, should it not have cls instead of self?
  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    """ append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    """ delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    """
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    """ wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    """
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )

    se = StorageElement( storageElement )

    res = se.getPfnForLfn( [directory] )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    if directory in res['Value']['Failed']:
      self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) )
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'][directory]

    res = returnSingleResult( se.exists( storageDirectory ) )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info( "No files are registered in the catalog directory %s" % directory )
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.dm.removeFile( filesFound, force = True )
    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str( reason ):
        self.log.warn( "File %s not found in some catalog: " % ( lfn ) )
      else:
        self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
        realFailure = True
    if realFailure:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = returnSingleResult( fc.listDirectory( currentDir ) )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info( "%s: %s" % ( currentDir, res['Message'] ) )
        else:
          self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.dm.removeFile( fileToRemove, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks( externalIDs )
      else:
        res = self.__removeRequests( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    """ This will remove requests from the (new) RMS system -

        #FIXME: if the old system is still installed, it won't remove anything!!!
        (we don't want to risk removing from the new RMS what is instead in the old)
    """
    # FIXME: checking if the old system is still installed!
    from DIRAC.ConfigurationSystem.Client import PathFinder
    if PathFinder.getServiceURL( "RequestManagement/RequestManager" ):
      self.log.warn( "NOT removing requests!!" )
      return S_OK()

    rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ]
    for requestName in rIDs:
      self.reqClient.deleteRequest( requestName )

    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    """ wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    failed = 0
    # FIXME: double request client: old/new -> only the new will survive sooner or later
    # this is the old
    try:
      res = RequestClient().getRequestForJobs( jobIDs )
      if not res['OK']:
        self.log.error( "Failed to get requestID for jobs.", res['Message'] )
        return res
      failoverRequests = res['Value']
      self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) )
      if not failoverRequests:
        return S_OK()
      for jobID, requestName in failoverRequests.items():
        # Put this check just in case, tasks must have associated jobs
        if jobID == 0 or jobID == '0':
          continue
        res = RequestClient().deleteRequest( requestName )
        if not res['OK']:
          self.log.error( "Failed to remove request from RequestDB", res['Message'] )
          failed += 1
        else:
          self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    except RuntimeError:
      failoverRequests = {}
      pass

    # FIXME: and this is the new
    res = self.reqClient.getRequestNamesForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests.update( res['Value']['Successful'] )
    if not failoverRequests:
      return S_OK()
    for jobID, requestName in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )


    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
  else:
    lfns.append( inputFileName )

from DIRAC.Resources.Storage.StorageElement import StorageElement
import DIRAC
# Check is provided SE is OK
se = StorageElement( targetSE )
if not se.valid:
  print se.errorReason
  print
  Script.showHelp()

from DIRAC.RequestManagementSystem.Client.RequestContainer      import RequestContainer
from DIRAC.RequestManagementSystem.Client.ReqClient             import ReqClient

reqClient = ReqClient()
requestType = 'transfer'
requestOperation = 'replicateAndRegister'

for lfnList in breakListIntoChunks( lfns, 100 ):

  oRequest = RequestContainer()
  subRequestIndex = oRequest.initiateSubRequest( requestType )['Value']
  attributeDict = {'Operation':requestOperation, 'TargetSE':targetSE}
  oRequest.setSubRequestAttributes( subRequestIndex, requestType, attributeDict )
  files = []
  for lfn in lfnList:
    files.append( {'LFN':lfn} )
  oRequest.setSubRequestFiles( subRequestIndex, requestType, files )
  requestName = "%s_%s" % ( md5( repr( time.time() ) ).hexdigest()[:16], md5( repr( time.time() ) ).hexdigest()[:16] )
  oRequest.setRequestAttributes( {'RequestName':requestName} )
Beispiel #30
0
class RequestTasks( TaskBase ):

  def __init__( self, transClient = None, logger = None, requestClient = None,
                requestClass = None, requestValidator = None ):
    """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      self.requestClient = ReqClient()
    else:
      self.requestClient = requestClient

    if not requestClass:
      self.requestClass = Request
    else:
      self.requestClass = requestClass

    if not requestValidator:
      self.requestValidator = RequestValidator()
    else:
      self.requestValidator = requestValidator


  def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '', ownerDN = '' ):
    """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB
    """
    if ( not owner ) or ( not ownerGroup ):
      res = getProxyInfo( False, False )
      if not res['OK']:
        return res
      proxyInfo = res['Value']
      owner = proxyInfo['username']
      ownerGroup = proxyInfo['group']

    if not ownerDN:
      res = getDNForUsername( owner )
      if not res['OK']:
        return res
      ownerDN = res['Value'][0]

    requestOperation = 'ReplicateAndRegister'
    if transBody:
      try:
        _requestType, requestOperation = transBody.split( ';' )
      except AttributeError:
        pass

    for taskID in sorted( taskDict ):
      paramDict = taskDict[taskID]
      if paramDict['InputData']:
        transID = paramDict['TransformationID']

        oRequest = Request()
        transfer = Operation()
        transfer.Type = requestOperation
        transfer.TargetSE = paramDict['TargetSE']

        if isinstance( paramDict['InputData'], list ):
          files = paramDict['InputData']
        elif isinstance( paramDict['InputData'], basestring ):
          files = paramDict['InputData'].split( ';' )
        for lfn in files:
          trFile = File()
          trFile.LFN = lfn

          transfer.addFile( trFile )

        oRequest.addOperation( transfer )
        oRequest.RequestName = _requestName( transID, taskID )
        oRequest.OwnerDN = ownerDN
        oRequest.OwnerGroup = ownerGroup

      isValid = self.requestValidator.validate( oRequest )
      if not isValid['OK']:
        return isValid

      taskDict[taskID]['TaskObject'] = oRequest

    return S_OK( taskDict )

  def submitTransformationTasks( self, taskDict ):
    """ Submit requests one by one
    """
    submitted = 0
    failed = 0
    startTime = time.time()
    for taskID in sorted( taskDict ):
      if not taskDict[taskID]['TaskObject']:
        taskDict[taskID]['Success'] = False
        failed += 1
        continue
      res = self.submitTaskToExternal( taskDict[taskID]['TaskObject'] )
      if res['OK']:
        taskDict[taskID]['ExternalID'] = res['Value']
        taskDict[taskID]['Success'] = True
        submitted += 1
      else:
        self._logError( "Failed to submit task to RMS", res['Message'] )
        taskDict[taskID]['Success'] = False
        failed += 1
    self._logInfo( 'submitTasks: Submitted %d tasks to RMS in %.1f seconds' % ( submitted, time.time() - startTime ) )
    if failed:
      self._logWarn( 'submitTasks: But at the same time failed to submit %d tasks to RMS.' % ( failed ) )
    return S_OK( taskDict )

  def submitTaskToExternal( self, oRequest ):
    """ Submits a request using ReqClient
    """
    if isinstance( oRequest, self.requestClass ):
      return self.requestClient.putRequest( oRequest )
    else:
      return S_ERROR( "Request should be a Request object" )

  def updateTransformationReservedTasks( self, taskDicts ):
    requestNameIDs = {}
    noTasks = []
    for taskDict in taskDicts:
      requestName = _requestName( taskDict['TransformationID'], taskDict['TaskID'] )

      reqID = taskDict['ExternalID']

      if reqID:
        requestNameIDs[requestName] = reqID
      else:
        noTasks.append( requestName )
    return S_OK( {'NoTasks':noTasks, 'TaskNameIDs':requestNameIDs} )


  def getSubmittedTaskStatus( self, taskDicts ):
    updateDict = {}

    for taskDict in taskDicts:
      oldStatus = taskDict['ExternalStatus']

      newStatus = self.requestClient.getRequestStatus( taskDict['ExternalID'] )
      if not newStatus['OK']:
        log = self._logVerbose if 'not exist' in newStatus['Message'] else self.log.warn
        log( "getSubmittedTaskStatus: Failed to get requestID for request", '%s' % newStatus['Message'] )
      else:
        newStatus = newStatus['Value']
        if newStatus != oldStatus:
          updateDict.setdefault( newStatus, [] ).append( taskDict['TaskID'] )
    return S_OK( updateDict )

  def getSubmittedFileStatus( self, fileDicts ):
    taskFiles = {}
    submittedTasks = {}
    externalIds = {}
    # Don't try and get status of not submitted tasks!
    for fileDict in fileDicts:
      submittedTasks.setdefault( fileDict['TransformationID'], set() ).add( int( fileDict['TaskID'] ) )
    for transID in submittedTasks:
      res = self.transClient.getTransformationTasks( { 'TransformationID':transID, 'TaskID': list( submittedTasks[transID] )} )
      if not res['OK']:
        return res
      for taskDict in res['Value']:
        taskID = taskDict['TaskID']
        externalIds[taskID] = taskDict['ExternalID']
        if taskDict['ExternalStatus'] == 'Created':
          submittedTasks[transID].remove( taskID )

    for fileDict in fileDicts:
      transID = fileDict['TransformationID']
      taskID = int( fileDict['TaskID'] )
      if taskID in submittedTasks[transID]:
        requestID = externalIds[taskID]
        taskFiles.setdefault( requestID, {} )[fileDict['LFN']] = fileDict['Status']

    updateDict = {}
    for requestID in sorted( taskFiles ):
      lfnDict = taskFiles[requestID]
      statusDict = self.requestClient.getRequestFileStatus( requestID, lfnDict.keys() )
      if not statusDict['OK']:
        log = self._logVerbose if 'not exist' in statusDict['Message'] else self.log.warn
        log( "getSubmittedFileStatus: Failed to get files status for request", '%s' % statusDict['Message'] )
        continue

      statusDict = statusDict['Value']
      for lfn, newStatus in statusDict.items():
        if newStatus == lfnDict[lfn]:
          pass
        elif newStatus == 'Done':
          updateDict[lfn] = 'Processed'
        elif newStatus == 'Failed':
          updateDict[lfn] = 'Problematic'
    return S_OK( updateDict )
                 "operation using %s lfns and %s target SEs" %
                 (requestName, len(lfnList), len(targetSEs)))

    from DIRAC.RequestManagementSystem.Client.Request import Request
    from DIRAC.RequestManagementSystem.Client.Operation import Operation
    from DIRAC.RequestManagementSystem.Client.File import File
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from DIRAC.Core.Utilities.List import breakListIntoChunks

    lfnChunks = breakListIntoChunks(lfnList, 100)
    multiRequests = len(lfnChunks) > 1

    error = 0
    count = 0
    reqClient = ReqClient()
    fc = FileCatalog()
    requestIDs = []
    for lfnChunk in lfnChunks:
        metaDatas = fc.getFileMetadata(lfnChunk)
        if not metaDatas["OK"]:
            gLogger.error("unable to read metadata for lfns: %s" %
                          metaDatas["Message"])
            error = -1
            continue
        metaDatas = metaDatas["Value"]
        for failedLFN, reason in metaDatas["Failed"].items():
            gLogger.error("skipping %s: %s" % (failedLFN, reason))
        lfnChunk = set(metaDatas["Successful"])

        if not lfnChunk:
Beispiel #32
0
class RequestTasks(TaskBase):
  """
  Class for handling tasks for the RMS
  """

  def __init__(self, transClient=None, logger=None, requestClient=None,
               requestClass=None, requestValidator=None,
               ownerDN=None, ownerGroup=None):
    """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

    if not logger:
      logger = gLogger.getSubLogger('RequestTasks')

    super(RequestTasks, self).__init__(transClient, logger)
    useCertificates = True if (bool(ownerDN) and bool(ownerGroup)) else False

    if not requestClient:
      self.requestClient = ReqClient(useCertificates=useCertificates,
                                     delegatedDN=ownerDN,
                                     delegatedGroup=ownerGroup)
    else:
      self.requestClient = requestClient

    if not requestClass:
      self.requestClass = Request
    else:
      self.requestClass = requestClass

    if not requestValidator:
      self.requestValidator = RequestValidator()
    else:
      self.requestValidator = requestValidator

  def prepareTransformationTasks(self, transBody, taskDict, owner='', ownerGroup='', ownerDN='',
                                 bulkSubmissionFlag=False):
    """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB
    """
    if not taskDict:
      return S_OK({})

    if (not owner) or (not ownerGroup):
      res = getProxyInfo(False, False)
      if not res['OK']:
        return res
      proxyInfo = res['Value']
      owner = proxyInfo['username']
      ownerGroup = proxyInfo['group']

    if not ownerDN:
      res = getDNForUsername(owner)
      if not res['OK']:
        return res
      ownerDN = res['Value'][0]

    try:
      transJson = json.loads(transBody)
      self._multiOperationsBody(transJson, taskDict, ownerDN, ownerGroup)
    except ValueError:  # #json couldn't load
      self._singleOperationsBody(transBody, taskDict, ownerDN, ownerGroup)

    return S_OK(taskDict)

  def _multiOperationsBody(self, transJson, taskDict, ownerDN, ownerGroup):
    """ deal with a Request that has multiple operations

    :param transJson: list of lists of string and dictionaries, e.g.:

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :param dict taskDict: dictionary of tasks, modified in this function
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests
    :returns: None
    """
    failedTasks = []
    for taskID, task in taskDict.items():
      transID = task['TransformationID']
      if not task.get('InputData'):
        self._logError("Error creating request for task", "%s, No input data" % taskID, transID=transID)
        taskDict.pop(taskID)
        continue
      files = []

      oRequest = Request()
      if isinstance(task['InputData'], list):
        files = task['InputData']
      elif isinstance(task['InputData'], basestring):
        files = task['InputData'].split(';')

      # create the operations from the json structure
      for operationTuple in transJson:
        op = Operation()
        op.Type = operationTuple[0]
        for parameter, value in operationTuple[1].iteritems():
          setattr(op, parameter, value)

        for lfn in files:
          opFile = File()
          opFile.LFN = lfn
          op.addFile(opFile)

        oRequest.addOperation(op)

      result = self._assignRequestToTask(oRequest, taskDict, transID, taskID, ownerDN, ownerGroup)
      if not result['OK']:
        failedTasks.append(taskID)
    # Remove failed tasks
    for taskID in failedTasks:
      taskDict.pop(taskID)

  def _singleOperationsBody(self, transBody, taskDict, ownerDN, ownerGroup):
    """ deal with a Request that has just one operation, as it was sofar

    :param transBody: string, can be an empty string
    :param dict taskDict: dictionary of tasks, modified in this function
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests
    :returns: None
    """

    requestOperation = 'ReplicateAndRegister'
    if transBody:
      try:
        _requestType, requestOperation = transBody.split(';')
      except AttributeError:
        pass
    failedTasks = []
    # Do not remove sorted, we might pop elements in the loop
    for taskID, task in taskDict.iteritems():

      transID = task['TransformationID']

      oRequest = Request()
      transfer = Operation()
      transfer.Type = requestOperation
      transfer.TargetSE = task['TargetSE']

      # If there are input files
      if task.get('InputData'):
        if isinstance(task['InputData'], list):
          files = task['InputData']
        elif isinstance(task['InputData'], basestring):
          files = task['InputData'].split(';')
        for lfn in files:
          trFile = File()
          trFile.LFN = lfn

          transfer.addFile(trFile)

      oRequest.addOperation(transfer)
      result = self._assignRequestToTask(oRequest, taskDict, transID, taskID, ownerDN, ownerGroup)
      if not result['OK']:
        failedTasks.append(taskID)
    # Remove failed tasks
    for taskID in failedTasks:
      taskDict.pop(taskID)

  def _assignRequestToTask(self, oRequest, taskDict, transID, taskID, ownerDN, ownerGroup):
    """set ownerDN and group to request, and add the request to taskDict if it is
    valid, otherwise remove the task from the taskDict

    :param oRequest: Request
    :param dict taskDict: dictionary of tasks, modified in this function
    :param int transID: Transformation ID
    :param int taskID: Task ID
    :param str ownerDN: certificate DN used for the requests
    :param str onwerGroup: dirac group used for the requests
    :returns: None
    """

    oRequest.RequestName = self._transTaskName(transID, taskID)
    oRequest.OwnerDN = ownerDN
    oRequest.OwnerGroup = ownerGroup

    isValid = self.requestValidator.validate(oRequest)
    if not isValid['OK']:
      self._logError("Error creating request for task", "%s %s" % (taskID, isValid),
                     transID=transID)
      return S_ERROR('Error creating request')
    taskDict[taskID]['TaskObject'] = oRequest
    return S_OK()

  def submitTransformationTasks(self, taskDict):
    """ Submit requests one by one
    """
    submitted = 0
    failed = 0
    startTime = time.time()
    method = 'submitTransformationTasks'
    for task in taskDict.itervalues():
      # transID is the same for all tasks, so pick it up every time here
      transID = task['TransformationID']
      if not task['TaskObject']:
        task['Success'] = False
        failed += 1
        continue
      res = self.submitTaskToExternal(task['TaskObject'])
      if res['OK']:
        task['ExternalID'] = res['Value']
        task['Success'] = True
        submitted += 1
      else:
        self._logError("Failed to submit task to RMS", res['Message'], transID=transID)
        task['Success'] = False
        failed += 1
    if submitted:
      self._logInfo('Submitted %d tasks to RMS in %.1f seconds' % (submitted, time.time() - startTime),
                    transID=transID, method=method)
    if failed:
      self._logWarn('Failed to submit %d tasks to RMS.' % (failed),
                    transID=transID, method=method)
    return S_OK(taskDict)

  def submitTaskToExternal(self, oRequest):
    """
    Submits a request to RMS
    """
    if isinstance(oRequest, self.requestClass):
      return self.requestClient.putRequest(oRequest, useFailoverProxy=False, retryMainService=2)
    return S_ERROR("Request should be a Request object")

  def updateTransformationReservedTasks(self, taskDicts):
    requestNameIDs = {}
    noTasks = []
    for taskDict in taskDicts:
      requestName = self._transTaskName(taskDict['TransformationID'], taskDict['TaskID'])
      reqID = taskDict['ExternalID']
      if reqID:
        requestNameIDs[requestName] = reqID
      else:
        noTasks.append(requestName)
    return S_OK({'NoTasks': noTasks, 'TaskNameIDs': requestNameIDs})

  def getSubmittedTaskStatus(self, taskDicts):
    """
    Check if tasks changed status, and return a list of tasks per new status
    """
    updateDict = {}
    badRequestID = 0
    for taskDict in taskDicts:
      oldStatus = taskDict['ExternalStatus']
      # ExternalID is normally a string
      if taskDict['ExternalID'] and int(taskDict['ExternalID']):
        newStatus = self.requestClient.getRequestStatus(taskDict['ExternalID'])
        if not newStatus['OK']:
          log = self._logVerbose if 'not exist' in newStatus['Message'] else self._logWarn
          log("getSubmittedTaskStatus: Failed to get requestID for request", newStatus['Message'],
              transID=taskDict['TransformationID'])
        else:
          newStatus = newStatus['Value']
          # We don't care updating the tasks to Assigned while the request is being processed
          if newStatus != oldStatus and newStatus != 'Assigned':
            updateDict.setdefault(newStatus, []).append(taskDict['TaskID'])
      else:
        badRequestID += 1
    if badRequestID:
      self._logWarn("%d requests have identifier 0" % badRequestID)
    return S_OK(updateDict)

  def getSubmittedFileStatus(self, fileDicts):
    """
    Check if transformation files changed status, and return a list of taskIDs per new status
    """
    # Don't try and get status of not submitted tasks!
    transID = None
    taskFiles = {}
    for fileDict in fileDicts:
      # There is only one transformation involved, get however the transID in the loop
      transID = fileDict['TransformationID']
      taskID = int(fileDict['TaskID'])
      taskFiles.setdefault(taskID, []).append(fileDict['LFN'])
    # Should not happen, but just in case there are no files, return
    if transID is None:
      return S_OK({})

    res = self.transClient.getTransformationTasks({'TransformationID': transID, 'TaskID': taskFiles.keys()})
    if not res['OK']:
      return res
    requestFiles = {}
    for taskDict in res['Value']:
      taskID = taskDict['TaskID']
      externalID = taskDict['ExternalID']
      # Only consider tasks that are submitted, ExternalID is a string
      if taskDict['ExternalStatus'] != 'Created' and externalID and int(externalID):
        requestFiles[externalID] = taskFiles[taskID]

    updateDict = {}
    for requestID, lfnList in requestFiles.iteritems():
      statusDict = self.requestClient.getRequestFileStatus(requestID, lfnList)
      if not statusDict['OK']:
        log = self._logVerbose if 'not exist' in statusDict['Message'] else self._logWarn
        log("Failed to get files status for request", statusDict['Message'],
            transID=transID, method='getSubmittedFileStatus')
      else:
        for lfn, newStatus in statusDict['Value'].iteritems():
          if newStatus == 'Done':
            updateDict[lfn] = 'Processed'
          elif newStatus == 'Failed':
            updateDict[lfn] = 'Problematic'
    return S_OK(updateDict)
Beispiel #33
0
from DIRAC.Core.Base import Script

Script.setUsageMessage('\n'.join(
    [__doc__, 'Usage:',
     ' %s [option|cfgfile]' % Script.scriptName]))

if __name__ == "__main__":

    from DIRAC.Core.Base.Script import parseCommandLine
    parseCommandLine()

    import DIRAC

    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    reqClient = ReqClient()

    dbSummary = reqClient.getDBSummary()
    if not dbSummary["OK"]:
        DIRAC.gLogger.error(dbSummary["Message"])
        DIRAC.exit(-1)

    dbSummary = dbSummary["Value"]
    if not dbSummary:
        DIRAC.gLogger.info("ReqDB is empty!")
        DIRAC.exit(0)

    reqs = dbSummary.get("Request", {})
    ops = dbSummary.get("Operation", {})
    fs = dbSummary.get("File", {})
Beispiel #34
0
    def __init__(self, *args, **kwargs):
        AgentModule.__init__(self, *args, **kwargs)
        self.name = 'DataRecoveryAgent'
        self.enabled = False
        self.getJobInfoFromJDLOnly = False

        self.__getCSOptions()

        self.jobStatus = [
            'Failed', 'Done'
        ]  # This needs to be both otherwise we cannot account for all cases

        self.jobMon = JobMonitoringClient()
        self.fcClient = FileCatalogClient()
        self.tClient = TransformationClient()
        self.reqClient = ReqClient()
        self.diracAPI = Dirac()
        self.inputFilesProcessed = set()
        self.todo = {'NoInputFiles':
                     [dict(Message="NoInputFiles: OutputExists: Job 'Done'",
                           ShortMessage="NoInputFiles: job 'Done' ",
                           Counter=0,
                           Check=lambda job: job.allFilesExist() and job.status == 'Failed',
                           Actions=lambda job, tInfo: [job.setJobDone(tInfo)],
                           ),
                      dict(Message="NoInputFiles: OutputMissing: Job 'Failed'",
                           ShortMessage="NoInputFiles: job 'Failed' ",
                           Counter=0,
                           Check=lambda job: job.allFilesMissing() and job.status == 'Done',
                           Actions=lambda job, tInfo: [job.setJobFailed(tInfo)],
                           ),
                      ],
                     'InputFiles':
                     [ \
                     # must always be first!

                         dict(Message="One of many Successful: clean others",
                              ShortMessage="Other Tasks --> Keep",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and job.otherTasks and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed),
                              Actions=lambda job, tInfo: [self.inputFilesProcessed.update(job.inputFiles),
                                                          job.setJobDone(tInfo),
                                                          job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input, no Output: Fail",
                              ShortMessage="Other Tasks --> Fail",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allFilesMissing() and job.status != 'Failed',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input: Fail and clean",
                              ShortMessage="Other Tasks --> Cleanup",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(
                                  self.inputFilesProcessed) and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo), job.cleanOutputs(tInfo)]
                              ),
                         dict(Message="InputFile(s) missing: mark job 'Failed', mark input 'Deleted', clean",
                              ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              not job.allTransFilesDeleted(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo),
                                                          job.setInputDeleted(tInfo)],
                              ),
                         dict(Message="InputFile(s) Deleted, output Exists: mark job 'Failed', clean",
                              ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              job.allTransFilesDeleted() and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo)],
                              ),
                         # All Output Exists
                         dict(Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                              ShortMessage="Output Exists --> Job Done, Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo), job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Output Exists, job Failed, input Processed --> Job Done",
                              ShortMessage="Output Exists --> Job Done",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo)]
                              ),
                         dict(Message="Output Exists, job Done, input not Processed --> Input Processed",
                              ShortMessage="Output Exists --> Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputProcessed(tInfo)]
                              ),
                         # outputmissing
                         dict(Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                              ShortMessage="Max ErrorCount --> Input MaxReset",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist() and \
                              job.checkErrorCount(),
                              Actions=lambda job, tInfo: [job.setInputMaxReset(tInfo)]
                              ),
                         dict(Message="Output Missing, job Failed, input Assigned --> Input Unused",
                              ShortMessage="Output Missing --> Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                              ShortMessage="Output Missing --> Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         # some files missing, needing cleanup. Only checking for
                         # assigned, because processed could mean an earlier job was
                         # succesful and this one is just the duplicate that needed
                         # to be removed! But we check for other tasks earlier, so
                         # this should not happen
                         dict(Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [
                                  job.cleanOutputs(tInfo), job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Some missing, job Done --> job Failed",
                              ShortMessage="Output Missing, Done --> Job Failed",
                              Counter=0,
                              Check=lambda job: not job.allFilesExist() and job.status == 'Done',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Something Strange",
                              ShortMessage="Strange",
                              Counter=0,
                              Check=lambda job: job.status not in ("Failed", "Done"),
                              Actions=lambda job, tInfo: []
                              ),
                         # should always be the last one!
                         dict(Message="Failed Hard",
                              ShortMessage="Failed Hard",
                              Counter=0,
                              Check=lambda job: False,  # never
                              Actions=lambda job, tInfo: []
                              ),
                     ]
                     }
        self.jobCache = defaultdict(lambda: (0, 0))
        # Notification options
        self.notesToSend = ""
        self.subject = "DataRecoveryAgent"
        self.startTime = time.time()
  gLogger.info( "Will create request '%s' with 'ReplicateAndRegister' "\
                "operation using %s lfns and %s target SEs" % ( requestName, len( lfnList ), len( targetSEs ) ) )

  from DIRAC.RequestManagementSystem.Client.Request import Request
  from DIRAC.RequestManagementSystem.Client.Operation import Operation
  from DIRAC.RequestManagementSystem.Client.File import File
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
  from DIRAC.Core.Utilities.List import breakListIntoChunks

  lfnChunks = breakListIntoChunks( lfnList, 100 )
  multiRequests = len( lfnChunks ) > 1

  error = 0
  count = 0
  reqClient = ReqClient()
  fc = FileCatalog()
  requestIDs = []
  for lfnChunk in lfnChunks:
    metaDatas = fc.getFileMetadata( lfnChunk )
    if not metaDatas["OK"]:
      gLogger.error( "unable to read metadata for lfns: %s" % metaDatas["Message"] )
      error = -1
      continue
    metaDatas = metaDatas["Value"]
    for failedLFN, reason in metaDatas["Failed"].items():
      gLogger.error( "skipping %s: %s" % ( failedLFN, reason ) )
    lfnChunk = set( metaDatas["Successful"] )

    if not lfnChunk:
      gLogger.error( "LFN list is empty!!!" )
class TransformationCleaningAgent(AgentModule):
  """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)

    self.shifterProxy = None

    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
    # # transformation metadata
    self.transfidmeta = 'TransformationID'
    # # archive periof in days
    self.archiveAfter = 7
    # # transformation log SEs
    self.logSE = 'LogSE'
    # # enable/disable execution
    self.enableFlag = 'True'

    self.dataProcTTypes = ['MCSimulation', 'Merge']
    self.dataManipTTypes = ['Replication', 'Removal']

  def initialize(self):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    # See cleanContent method: this proxy will be used ALSO when the file catalog used
    # is the DIRAC File Catalog (DFC).
    # This is possible because of unset of the "UseServerCertificate" option
    self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy)

    # # transformations types
    self.dataProcTTypes = Operations().getValue('Transformations/DataProcessing', self.dataProcTTypes)
    self.dataManipTTypes = Operations().getValue('Transformations/DataManipulation', self.dataManipTTypes)
    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes)
    self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes))
    # # directory locations
    self.directoryLocations = sorted(self.am_getOption('DirectoryLocations', self.directoryLocations))
    self.log.info("Will search for directories in the following locations: %s" % str(self.directoryLocations))
    # # transformation metadata
    self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta)
    self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta)
    # # archive periof in days
    self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter)  # days
    self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter)
    # # transformation log SEs
    self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
    self.log.info("Will remove logs found on storage element: %s" % self.logSE)

    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute(self):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
    if self.enableFlag != 'True':
      self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag')
      return S_OK('Disabled via CS flag')

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations({'Status': 'Cleaning',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeClean(transDict)
        else:
          self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeClean)(transDict,
                                                   proxyUserDN=transDict['AuthorDN'],
                                                   proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Failed to get transformations", res['Message'])

    # Obtain the transformations in RemovingFiles status and removes the output files
    res = self.transClient.getTransformations({'Status': 'RemovingFiles',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeRemoval(transDict)
        else:
          self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeRemoval)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
    res = self.transClient.getTransformations({'Status': 'Completed',
                                               'Type': self.transformationTypes},
                                              older=olderThanTime,
                                              timeStamp='LastUpdate')
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeArchive(transDict)
        else:
          self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeArchive)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])
    return S_OK()

  def _executeClean(self, transDict):
    """Clean transformation."""
    # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
    # We just archive
    if transDict['Type'] in self.dataManipTTypes:
      res = self.archiveTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                     res['Message']))
    else:
      res = self.cleanTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems cleaning transformation %s: %s" % (transDict['TransformationID'],
                                                                    res['Message']))

  def _executeRemoval(self, transDict):
    """Remove files from given transformation."""
    res = self.removeTransformationOutput(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems removing transformation %s: %s" % (transDict['TransformationID'],
                                                                  res['Message']))

  def _executeArchive(self, transDict):
    """Archive the given transformation."""
    res = self.archiveTransformation(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                   res['Message']))

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories(self, transID):
    """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.verbose("Cleaning Transformation directories of transformation %d" % transID)
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters(transID, ['OutputDirectories'])
      if not res['OK']:
        self.log.error("Failed to obtain transformation directories", res['Message'])
        return res
      transDirectories = []
      if res['Value']:
        if not isinstance(res['Value'], list):
          try:
            transDirectories = ast.literal_eval(res['Value'])
          except BaseException:
            # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
            transDirectories.append(res['Value'])
        else:
          transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata({self.transfidmeta: transID})
      if not res['OK']:
        self.log.error("Failed to obtain metadata catalog directories", res['Message'])
        return res
      transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if not directories:
      self.log.info("No output directories found")
    directories = sorted(directories)
    return S_OK(directories)

  @classmethod
  def _addDirs(cls, transID, newDirs, existingDirs):
    """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str(transID).zfill(8)
      if re.search(transStr, str(folder)):
        if folder not in existingDirs:
          existingDirs.append(os.path.normpath(folder))
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanContent(self, directory):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    self.log.verbose("Cleaning Catalog contents")
    res = self.__getCatalogDirectoryContents([directory])
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info("No files are registered in the catalog directory %s" % directory)
      return S_OK()
    self.log.info("Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound))

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(filesFound, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str(reason):
        self.log.warn("File %s not found in some catalog: " % (lfn))
      else:
        self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason))
        realFailure = True
    if realFailure:
      return S_ERROR("Failed to remove all files found in the catalog")
    return S_OK()

  def __getCatalogDirectoryContents(self, directories):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info('Obtaining the catalog contents for %d directories:' % len(directories))
    for directory in directories:
      self.log.info(directory)
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while activeDirs:
      currentDir = activeDirs[0]
      res = returnSingleResult(fc.listDirectory(currentDir))
      activeDirs.remove(currentDir)
      if not res['OK'] and 'Directory does not exist' in res['Message']:  # FIXME: DFC should return errno
        self.log.info("The supplied directory %s does not exist" % currentDir)
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info("%s: %s" % (currentDir, res['Message']))
        else:
          self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message']))
      else:
        dirContents = res['Value']
        activeDirs.extend(dirContents['SubDirs'])
        allFiles.update(dirContents['Files'])
    self.log.info("Found %d files" % len(allFiles))
    return S_OK(allFiles.keys())

  def cleanTransformationLogFiles(self, directory):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.verbose("Removing log files found in the directory %s" % directory)
    res = returnSingleResult(StorageElement(self.logSE).removeDirectory(directory, recursive=True))
    if not res['OK']:
      if cmpError(res, errno.ENOENT):  # No such file or directory
        self.log.warn("Transformation log directory does not exist", directory)
        return S_OK()
      self.log.error("Failed to remove log files", res['Message'])
      return res
    self.log.info("Successfully removed transformation log directory")
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput(self, transID):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info("Removing output data for transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search('/LOG/', directory):
        res = self.cleanContent(directory)
        if not res['OK']:
          return res

    self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" % (len(directories), transID))
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully removed output of transformation %d" % transID)
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter(transID, 'Status', 'RemovedFiles')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to RemovedFiles" % (transID))
    return S_OK()

  def archiveTransformation(self, transID):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info("Archiving transformation %s" % transID)
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully archived transformation %d" % transID)
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Archived')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Archived" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Archived" % (transID))
    return S_OK()

  def cleanTransformation(self, transID):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info("Cleaning transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search('/LOG/', directory):
        res = self.cleanTransformationLogFiles(directory)
        if not res['OK']:
          return res
      res = self.cleanContent(directory)
      if not res['OK']:
        return res

    # Clean ALL the possible remnants found
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully cleaned transformation %d" % transID)
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Cleaned')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Cleaned" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Cleaned" % (transID))
    return S_OK()

  def cleanMetadataCatalogFiles(self, transID):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata({self.transfidmeta: transID})
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info('No files found for transID %s' % transID)
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(fileToRemove, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason))
    if res['Value']['Failed']:
      return S_ERROR("Failed to remove all files found in the metadata catalog")
    self.log.info("Successfully removed all files found in the BK")
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks(self, transID):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    self.log.verbose("Cleaning Transformation tasks of transformation %d" % transID)
    res = self.__getTransformationExternalIDs(transID)
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters(transID, ['Type'])
      if not res['OK']:
        self.log.error("Failed to determine transformation type")
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks(externalIDs)
      else:
        res = self.__removeRequests(externalIDs)
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs(self, transID):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks(condDict={'TransformationID': transID})
    if not res['OK']:
      self.log.error("Failed to get externalIDs for transformation %d" % transID, res['Message'])
      return res
    externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
    self.log.info("Found %d tasks for transformation" % len(externalIDs))
    return S_OK(externalIDs)

  def __removeRequests(self, requestIDs):
    """ This will remove requests from the RMS system -
    """
    rIDs = [int(long(j)) for j in requestIDs if long(j)]
    for reqID in rIDs:
      self.reqClient.cancelRequest(reqID)

    return S_OK()

  def __removeWMSTasks(self, transJobIDs):
    """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [int(j) for j in transJobIDs if int(j)]
    allRemove = True
    for jobList in breakListIntoChunks(jobIDs, 500):

      res = self.wmsClient.killJob(jobList)
      if res['OK']:
        self.log.info("Successfully killed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to kill %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to kill %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

      res = self.wmsClient.deleteJob(jobList)
      if res['OK']:
        self.log.info("Successfully removed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to remove %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

    if not allRemove:
      return S_ERROR("Failed to remove all remnants from WMS")
    self.log.info("Successfully removed all tasks from the WMS")

    if not jobIDs:
      self.log.info("JobIDs not present, unable to remove asociated requests.")
      return S_OK()

    failed = 0
    failoverRequests = {}
    res = self.reqClient.getRequestIDsForJobs(jobIDs)
    if not res['OK']:
      self.log.error("Failed to get requestID for jobs.", res['Message'])
      return res
    failoverRequests.update(res['Value']['Successful'])
    if not failoverRequests:
      return S_OK()
    for jobID, requestID in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.cancelRequest(requestID)
      if not res['OK']:
        self.log.error("Failed to remove request from RequestDB", res['Message'])
        failed += 1
      else:
        self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID))

    if failed:
      self.log.info("Successfully removed %s requests" % (len(failoverRequests) - failed))
      self.log.info("Failed to remove %s requests" % failed)
      return S_ERROR("Failed to remove all the request from RequestDB")
    self.log.info("Successfully removed all the associated failover requests")
    return S_OK()
      since = convertDate( switch[1] )
    elif switch[0] == 'Until':
      until = convertDate( switch[1] )

  if reset:
    status = 'Failed'
  if terse:
    verbose = True
  if status:
    if not until:
      until = datetime.datetime.utcnow()
    if not since:
      since = until - datetime.timedelta( hours = 24 )
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
  reqClient = ReqClient()
  if transID:
    if not taskIDs:
      gLogger.fatal( "If Transformation is set, a list of Tasks should also be set" )
      Script.showHelp()
      DIRAC.exit( 2 )
    requests = ['%08d_%08d' % ( transID, task ) for task in taskIDs]

  elif not jobs:
    args = Script.getPositionalArgs()
    if len( args ) == 1:
      all = True
      requests = [reqName for reqName in args[0].split( ',' ) if reqName]
  else:
    res = reqClient.getRequestNamesForJobs( jobs )
    if not res['OK']:
Beispiel #38
0
def main():
    from DIRAC.Core.Base import Script
    Script.registerSwitch('', 'Job=', '   JobID[,jobID2,...]')
    Script.registerSwitch('', 'Transformation=', '   transformation ID')
    Script.registerSwitch(
        '', 'Tasks=', '      Associated to --Transformation, list of taskIDs')
    Script.registerSwitch('', 'Verbose', '   Print more information')
    Script.registerSwitch('', 'Terse', '   Only print request status')
    Script.registerSwitch('', 'Full', '   Print full request content')
    Script.registerSwitch('', 'Status=',
                          '   Select all requests in a given status')
    Script.registerSwitch(
        '', 'Since=',
        '      Associated to --Status, start date yyyy-mm-dd or nb of days (default= -one day'
    )
    Script.registerSwitch(
        '', 'Until=', '      Associated to --Status, end date (default= now')
    Script.registerSwitch(
        '', 'Maximum=',
        '      Associated to --Status, max number of requests ')
    Script.registerSwitch('', 'Reset',
                          '   Reset Failed files to Waiting if any')
    Script.registerSwitch('', 'Force', '   Force reset even if not Failed')
    Script.registerSwitch(
        '', 'All',
        '      (if --Status Failed) all requests, otherwise exclude irrecoverable failures'
    )
    Script.registerSwitch('', 'FixJob',
                          '   Set job Done if the request is Done')
    Script.registerSwitch('', 'Cancel', '   Cancel the request')
    Script.registerSwitch('', 'ListJobs', ' List the corresponding jobs')
    Script.registerSwitch(
        '', 'TargetSE=', ' Select request only if that SE is in the targetSEs')
    from DIRAC.Core.Base.Script import parseCommandLine
    parseCommandLine()

    import DIRAC
    from DIRAC import gLogger

    jobs = []
    requestID = 0
    transID = None
    taskIDs = None
    tasks = None
    requests = []
    full = False
    verbose = False
    status = None
    until = None
    since = None
    terse = False
    allR = False
    reset = False
    fixJob = False
    maxRequests = 999999999999
    cancel = False
    listJobs = False
    force = False
    targetSE = set()
    for switch in Script.getUnprocessedSwitches():
        if switch[0] == 'Job':
            jobs = []
            job = "Unknown"
            try:
                for arg in switch[1].split(','):
                    if os.path.exists(arg):
                        with open(arg, 'r') as fp:
                            lines = fp.readlines()
                        for line in lines:
                            for job in line.split(','):
                                jobs += [int(job.strip())]
                        gLogger.notice("Found %d jobs in file %s" %
                                       (len(jobs), arg))
                    else:
                        jobs.append(int(arg))
            except TypeError:
                gLogger.fatal("Invalid jobID", job)
        elif switch[0] == 'Transformation':
            try:
                transID = int(switch[1])
            except Exception:
                gLogger.fatal('Invalid transID', switch[1])
        elif switch[0] == 'Tasks':
            try:
                taskIDs = [int(task) for task in switch[1].split(',')]
            except Exception:
                gLogger.fatal('Invalid tasks', switch[1])
        elif switch[0] == 'Full':
            full = True
        elif switch[0] == 'Verbose':
            verbose = True
        elif switch[0] == 'Terse':
            terse = True
        elif switch[0] == 'All':
            allR = True
        elif switch[0] == 'Reset':
            reset = True
        elif switch[0] == 'Force':
            force = True
        elif switch[0] == 'Status':
            status = switch[1].capitalize()
        elif switch[0] == 'Since':
            since = convertDate(switch[1])
        elif switch[0] == 'Until':
            until = convertDate(switch[1])
        elif switch[0] == 'FixJob':
            fixJob = True
        elif switch[0] == 'Cancel':
            cancel = True
        elif switch[0] == 'ListJobs':
            listJobs = True
        elif switch[0] == 'Maximum':
            try:
                maxRequests = int(switch[1])
            except Exception:
                pass
        elif switch[0] == 'TargetSE':
            targetSE = set(switch[1].split(','))

    if reset and not force:
        status = 'Failed'
    if fixJob:
        status = 'Done'
    if terse:
        verbose = True
    if status:
        if not until:
            until = datetime.datetime.utcnow()
        if not since:
            since = until - datetime.timedelta(hours=24)
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
    reqClient = ReqClient()
    if transID:
        if not taskIDs:
            gLogger.fatal(
                "If Transformation is set, a list of Tasks should also be set")
            Script.showHelp(exitCode=2)
        # In principle, the task name is unique, so the request name should be unique as well
        # If ever this would not work anymore, we would need to use the transformationClient
        # to fetch the ExternalID
        requests = ['%08d_%08d' % (transID, task) for task in taskIDs]
        allR = True

    elif not jobs:
        requests = []
        # Get full list of arguments, with and without comma
        for arg in [
                x.strip() for arg in Script.getPositionalArgs()
                for x in arg.split(',')
        ]:
            if os.path.exists(arg):
                lines = open(arg, 'r').readlines()
                requests += [
                    reqID.strip() for line in lines
                    for reqID in line.split(',')
                ]
                gLogger.notice("Found %d requests in file" % len(requests))
            else:
                requests.append(arg)
            allR = True
    else:
        res = reqClient.getRequestIDsForJobs(jobs)
        if not res['OK']:
            gLogger.fatal("Error getting request for jobs", res['Message'])
            DIRAC.exit(2)
        if res['Value']['Failed']:
            gLogger.error(
                "No request found for jobs %s" %
                ','.join(sorted(str(job) for job in res['Value']['Failed'])))
        requests = sorted(res['Value']['Successful'].values())
        if requests:
            allR = True
        else:
            DIRAC.exit(0)

    if status and not requests:
        allR = allR or status != 'Failed'
        res = reqClient.getRequestIDsList([status],
                                          limit=maxRequests,
                                          since=since,
                                          until=until)

        if not res['OK']:
            gLogger.error("Error getting requests:", res['Message'])
            DIRAC.exit(2)
        requests = [
            reqID for reqID, _st, updTime in res['Value']
            if updTime > since and updTime <= until and reqID
        ]
        gLogger.notice('Obtained %d requests %s between %s and %s' %
                       (len(requests), status, since, until))
    if not requests:
        gLogger.notice('No request selected....')
        Script.showHelp(exitCode=2)
    okRequests = []
    warningPrinted = False
    jobIDList = []
    for reqID in requests:
        # We allow reqID to be the requestName if it is unique
        try:
            requestID = int(reqID)
        except ValueError:
            requestID = reqClient.getRequestIDForName(reqID)
            if not requestID['OK']:
                gLogger.notice(requestID['Message'])
                continue
            requestID = requestID['Value']

        request = reqClient.peekRequest(requestID)
        if not request["OK"]:
            gLogger.error(request["Message"])
            DIRAC.exit(-1)

        request = request["Value"]
        if not request:
            gLogger.error("no such request %s" % requestID)
            continue
        # If no operation as the targetSE, skip
        if targetSE:
            found = False
            for op in request:
                if op.TargetSE and targetSE.intersection(
                        op.TargetSE.split(',')):
                    found = True
                    break
            if not found:
                continue
        # keep a list of jobIDs if requested
        if request.JobID and listJobs:
            jobIDList.append(request.JobID)

        if status and request.Status != status:
            gLogger.notice(
                "Request %s is not in requested status %s%s" %
                (reqID, status, ' (cannot be reset)' if reset else ''))
            continue

        if fixJob and request.Status == 'Done' and request.JobID:
            # The request is for a job and is Done, verify that the job is in the proper status
            result = reqClient.finalizeRequest(request.RequestID,
                                               request.JobID,
                                               useCertificates=False)
            if not result['OK']:
                gLogger.error("Error finalizing job", result['Message'])
            else:
                gLogger.notice("Job %d updated to %s" %
                               (request.JobID, result['Value']))
            continue

        if cancel:
            if request.Status not in ('Done', 'Failed'):
                ret = reqClient.cancelRequest(requestID)
                if not ret['OK']:
                    gLogger.error("Error canceling request %s" % reqID,
                                  ret['Message'])
                else:
                    gLogger.notice("Request %s cancelled" % reqID)
            else:
                gLogger.notice("Request %s is in status %s, not cancelled" %
                               (reqID, request.Status))

        elif allR or recoverableRequest(request):
            okRequests.append(str(requestID))
            if reset:
                gLogger.notice('============ Request %s =============' %
                               requestID)
                ret = reqClient.resetFailedRequest(requestID, allR=allR)
                if not ret['OK']:
                    gLogger.error("Error resetting request %s" % requestID,
                                  ret['Message'])
            else:
                if len(requests) > 1:
                    gLogger.notice('\n===================================')
                dbStatus = reqClient.getRequestStatus(requestID).get(
                    'Value', 'Unknown')
                printRequest(request,
                             status=dbStatus,
                             full=full,
                             verbose=verbose,
                             terse=terse)

    if listJobs:
        gLogger.notice("List of %d jobs:\n" % len(jobIDList),
                       ','.join(str(jobID) for jobID in jobIDList))

    if status and okRequests:
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        gLogger.notice('\nList of %d selected requests:' % len(okRequests))
        for reqs in breakListIntoChunks(okRequests, 100):
            gLogger.notice(','.join(reqs))
Beispiel #39
0
class FTS3Operation(FTS3Serializable):
  """ Abstract class to represent an operation to be executed by FTS. It is a
      container for FTSFiles, as well as for FTSJobs.

      There can be a mapping between one FTS3Operation and one RMS Operation.

      The FTS3Operation takes care of generating the appropriate FTSJobs,
      and to perform a callback when the work with FTS is over. The actual
      generation and callback depends on the subclass.

      This class should not be instantiated directly, but rather one of its
      subclass
  """

  ALL_STATES = ['Active',  # Default state until FTS has done everything
                'Processed',  # Interactions with FTS done, but callback not done
                'Finished',  # Everything was done
                'Canceled',  # Canceled by the user
                'Failed',  # I don't know yet
                ]
  FINAL_STATES = ['Finished', 'Canceled', 'Failed']
  INIT_STATE = 'Active'

  _attrToSerialize = ['operationID', 'username', 'userGroup', 'rmsReqID', 'rmsOpID',
                      'sourceSEs', 'ftsFiles', 'activity', 'priority',
                      'ftsJobs', 'creationTime', 'lastUpdate', 'error', 'status']

  def __init__(self, ftsFiles=None, username=None, userGroup=None, rmsReqID=-1,
               rmsOpID=0, sourceSEs=None, activity=None, priority=None):
    """
        :param ftsFiles: list of FTS3Files object that belongs to the operation
        :param username: username whose proxy should be used
        :param userGroup: group that should be used with username
        :param rmsReqID: ID of the Request in the RMS system
        :param rmsOpID: ID of the Operation in the RMS system
        :param sourceSEs: list of SE to be used as source (if applicable)
        :param activity: FTS activity to use
        :param priority: FTS priority to use

    """
    ############################
    # persistent attributes

    self.username = username
    self.userGroup = userGroup

    self.rmsReqID = rmsReqID
    self.rmsOpID = rmsOpID

    if isinstance(sourceSEs, list):
      sourceSEs = ','.join(sourceSEs)

    self.sourceSEs = sourceSEs

    self.ftsFiles = ftsFiles if ftsFiles else []

    self.activity = activity
    self.priority = priority

    self.ftsJobs = []

    now = datetime.datetime.utcnow().replace(microsecond=0)

    self.creationTime = now
    self.lastUpdate = now
    self.error = None
    self.status = FTS3Operation.INIT_STATE

    ########################

    self.reqClient = None
    self.dManager = None
    self._log = None
    self.init_on_load()

  @orm.reconstructor
  def init_on_load(self):
    """ This method initializes some attributes.
        It is called by sqlalchemy (which does not call __init__)
    """
    self._vo = None

    self.dManager = DataManager()
    self.rssClient = ResourceStatus()

    opID = getattr(self, 'operationID', None)
    loggerName = '%s/' % opID if opID else ''
    loggerName += 'req_%s/op_%s' % (self.rmsReqID, self.rmsOpID)

    self._log = gLogger.getSubLogger(loggerName, True)

  @property
  def vo(self):
    """:returns: return vo of the usergroup """
    if self._vo:
      return self._vo

    if self.userGroup:
      self._vo = getVOForGroup(self.userGroup)

    return self._vo

  def isTotallyProcessed(self):
    """ Returns True if and only if there is nothing
        else to be done by FTS for this operation.
        All files are successful or definitely failed
    """

    if self.status == 'Processed':
      return True

    fileStatuses = set([f.status for f in self.ftsFiles])

    # If all the files are in a final state
    if fileStatuses <= set(FTS3File.FINAL_STATES):
      self.status = 'Processed'
      return True

    return False

  def _getFilesToSubmit(self, maxAttemptsPerFile=10):
    """ Return the list of FTS3files that can be submitted
        Either because they never were submitted, or because
        we can make more attempts

        :param maxAttemptsPerFile: the maximum number of attempts to be tried for a file

        :return List of FTS3File to submit
    """

    toSubmit = []

    for ftsFile in self.ftsFiles:
      if ftsFile.attempt >= maxAttemptsPerFile:
        ftsFile.status = 'Defunct'
      # The file was never submitted or
      # The file failed from the point of view of FTS
      # but no more than the maxAttemptsPerFile
      elif ftsFile.status in ('New', 'Failed'):
        toSubmit.append(ftsFile)

    return toSubmit

  @staticmethod
  def _checkSEAccess(seName, accessType, vo=None):
    """Check the Status of a storage element
        :param seName: name of the StorageElement
        :param accessType ReadAccess, WriteAccess,CheckAccess,RemoveAccess

        :return S_ERROR if not allowed or error, S_OK() otherwise
    """
    # Check that the target is writable
    # access = self.rssClient.getStorageElementStatus( seName, accessType )
    # if not access["OK"]:
    #   return access
    # if access["Value"][seName][accessType] not in ( "Active", "Degraded" ):
    #   return S_ERROR( "%s does not have %s in Active or Degraded" % ( seName, accessType ) )

    status = StorageElement(seName, vo=vo).getStatus()
    if not status['OK']:
      return status

    status = status['Value']

    accessType = accessType.replace('Access', '')
    if not status[accessType]:
      return S_ERROR("%s does not have %s in Active or Degraded" % (seName, accessType))

    return S_OK()

  def _createNewJob(self, jobType, ftsFiles, targetSE, sourceSE=None):
    """ Create a new FTS3Job object
        :param jobType: type of job to create (Transfer, Staging, Removal)
        :param ftsFiles: list of FTS3File objects the job has to work on
        :param targetSE: SE on which to operate
        :param sourceSE: source SE, only useful for Transfer jobs

        :return FTS3Job object
     """

    newJob = FTS3Job()
    newJob.type = jobType
    newJob.sourceSE = sourceSE
    newJob.targetSE = targetSE
    newJob.activity = self.activity
    newJob.priority = self.priority
    newJob.username = self.username
    newJob.userGroup = self.userGroup
    newJob.vo = self.vo
    newJob.filesToSubmit = ftsFiles
    newJob.operationID = getattr(self, 'operationID')

    return newJob

  def _callback(self):
    """Actually performs the callback
    """
    raise NotImplementedError("You should not be using the base class")

  def callback(self):
    """ Trigger the callback once all the FTS interactions are done
        and update the status of the Operation to 'Finished' if successful
    """
    self.reqClient = ReqClient()

    res = self._callback()

    if res['OK']:
      self.status = 'Finished'

    return res

  def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):
    """ Prepare the new jobs that have to be submitted

        :param maxFilesPerJob: maximum number of files assigned to a job
        :param maxAttemptsPerFile: maximum number of retry after an fts failure

        :return list of jobs
    """
    raise NotImplementedError("You should not be using the base class")

  def _updateRmsOperationStatus(self):
    """ Update the status of the Files in the rms operation
          :return: S_OK with a dict:
                        * request: rms Request object
                        * operation: rms Operation object
                        * ftsFilesByTarget: dict {SE: [ftsFiles that were successful]}
    """

    log = self._log.getSubLogger("_updateRmsOperationStatus/%s/%s" %
                                 (getattr(self, 'operationID'), self.rmsReqID), child=True)

    res = self.reqClient.getRequest(self.rmsReqID)
    if not res['OK']:
      return res

    request = res['Value']

    res = request.getWaiting()

    if not res["OK"]:
      log.error("Unable to find 'Scheduled' operation in request")
      res = self.reqClient.putRequest(request, useFailoverProxy=False, retryMainService=3)
      if not res['OK']:
        log.error("Could not put back the request !", res['Message'])
      return S_ERROR("Could not find scheduled operation")

    operation = res['Value']

    # We index the files of the operation by their IDs
    rmsFileIDs = {}

    for opFile in operation:
      rmsFileIDs[opFile.FileID] = opFile

    # Files that failed to transfer
    defunctRmsFileIDs = set()

    # { SE : [FTS3Files] }
    ftsFilesByTarget = {}
    for ftsFile in self.ftsFiles:

      if ftsFile.status == 'Defunct':
        log.info(
            "File failed to transfer, setting it to failed in RMS", "%s %s" %
            (ftsFile.lfn, ftsFile.targetSE))
        defunctRmsFileIDs.add(ftsFile.rmsFileID)
        continue

      if ftsFile.status == 'Canceled':
        log.info(
            "File canceled, setting it Failed in RMS", "%s %s" %
            (ftsFile.lfn, ftsFile.targetSE))
        defunctRmsFileIDs.add(ftsFile.rmsFileID)
        continue

      # SHOULD NEVER HAPPEN !
      if ftsFile.status != 'Finished':
        log.error(
            "Callback called with file in non terminal state", "%s %s" %
            (ftsFile.lfn, ftsFile.targetSE))
        res = self.reqClient.putRequest(request, useFailoverProxy=False, retryMainService=3)
        if not res['OK']:
          log.error("Could not put back the request !", res['Message'])
        return S_ERROR("Callback called with file in non terminal state")

      ftsFilesByTarget.setdefault(ftsFile.targetSE, []).append(ftsFile)

    # Now, we set the rmsFile as done in the operation, providing
    # that they are not in the defunctFiles.
    # We cannot do this in the previous list because in the FTS system,
    # each destination is a separate line in the DB but not in the RMS

    for ftsFile in self.ftsFiles:
      opFile = rmsFileIDs[ftsFile.rmsFileID]

      opFile.Status = 'Failed' if ftsFile.rmsFileID in defunctRmsFileIDs else 'Done'

    return S_OK({'request': request, 'operation': operation, 'ftsFilesByTarget': ftsFilesByTarget})

  @classmethod
  def fromRMSObjects(cls, rmsReq, rmsOp, username):
    """ Construct an FTS3Operation object from the RMS Request and Operation corresponding.
        The attributes taken are the OwnerGroup, Request and Operation IDS, sourceSE,
        and activity and priority if they are defined in the Argument field of the operation
        :param rmsReq: RMS Request object
        :param rmsOp: RMS Operation object
        :param username: username to which associate the FTS3Operation (normally comes from the Req OwnerDN)

        :returns: FTS3Operation object
    """

    ftsOp = cls()
    ftsOp.username = username
    ftsOp.userGroup = rmsReq.OwnerGroup

    ftsOp.rmsReqID = rmsReq.RequestID
    ftsOp.rmsOpID = rmsOp.OperationID

    ftsOp.sourceSEs = rmsOp.SourceSE

    try:
      argumentDic = json.loads(rmsOp.Arguments)

      ftsOp.activity = argumentDic['activity']
      ftsOp.priority = argumentDic['priority']
    except Exception as _e:
      pass

    return ftsOp
import DIRAC
# Check is provided SE is OK
se = StorageElement(targetSE)
if not se.valid:
    print se.errorReason
    print
    Script.showHelp()

from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
from DIRAC.RequestManagementSystem.Client.Request import Request
from DIRAC.RequestManagementSystem.Client.Operation import Operation
from DIRAC.RequestManagementSystem.Client.File import File
from DIRAC.RequestManagementSystem.private.RequestValidator import gRequestValidator
from DIRAC.DataManagementSystem.Client.ReplicaManager import ReplicaManager

reqClient = ReqClient()
rm = ReplicaManager()

for lfnList in breakListIntoChunks(lfns, 100):

    oRequest = Request()
    oRequest.RequestName = "%s_%s" % (md5(repr(time.time())).hexdigest()[:16],
                                      md5(repr(time.time())).hexdigest()[:16])

    replicateAndRegister = Operation()
    replicateAndRegister.Type = 'ReplicateAndRegister'
    replicateAndRegister.TargetSE = targetSE

    res = rm.getCatalogFileMetadata(lfnList)
    if not res['OK']:
        print "Can't get file metadata: %s" % res['Message']
class TransformationCleaningAgent(AgentModule):
    """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.shifterProxy = None

        # # transformation client
        self.transClient = None
        # # wms client
        self.wmsClient = None
        # # request client
        self.reqClient = None
        # # file catalog client
        self.metadataClient = None

        # # transformations types
        self.transformationTypes = None
        # # directory locations
        self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
        # # transformation metadata
        self.transfidmeta = 'TransformationID'
        # # archive periof in days
        self.archiveAfter = 7
        # # transformation log SEs
        self.logSE = 'LogSE'
        # # enable/disable execution
        self.enableFlag = 'True'

        self.dataProcTTypes = ['MCSimulation', 'Merge']
        self.dataManipTTypes = ['Replication', 'Removal']

    def initialize(self):
        """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
        # # shifter proxy
        # See cleanContent method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption('shifterProxy',
                                              self.shifterProxy)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            'Transformations/DataProcessing', self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            'Transformations/DataManipulation', self.dataManipTTypes)
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # transformation log SEs
        self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()

        return S_OK()

    #############################################################################
    def execute(self):
        """ execution in one agent's cycle

    :param self: self reference
    """

        self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
        if self.enableFlag != 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option EnableFlag'
            )
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Failed to get transformations", res['Message'])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime,
            timeStamp='LastUpdate')
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])
        return S_OK()

    def _executeClean(self, transDict):
        """Clean transformation."""
        # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # We just archive
        if transDict['Type'] in self.dataManipTTypes:
            res = self.archiveTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems archiving transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))
        else:
            res = self.cleanTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems cleaning transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))

    def _executeRemoval(self, transDict):
        """Remove files from given transformation."""
        res = self.removeTransformationOutput(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems removing transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

    def _executeArchive(self, transDict):
        """Archive the given transformation."""
        res = self.archiveTransformation(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems archiving transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.verbose(
            "Cleaning Transformation directories of transformation %d" %
            transID)
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                self.log.error("Failed to obtain transformation directories",
                               res['Message'])
                return res
            transDirectories = []
            if res['Value']:
                if not isinstance(res['Value'], list):
                    try:
                        transDirectories = ast.literal_eval(res['Value'])
                    except BaseException:
                        # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
                        transDirectories.append(res['Value'])
                else:
                    transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                self.log.error("Failed to obtain metadata catalog directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @classmethod
    def _addDirs(cls, transID, newDirs, existingDirs):
        """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
        for folder in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, str(folder)):
                if folder not in existingDirs:
                    existingDirs.append(os.path.normpath(folder))
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanContent(self, directory):
        """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
        self.log.verbose("Cleaning Catalog contents")
        res = self.__getCatalogDirectoryContents([directory])
        if not res['OK']:
            return res
        filesFound = res['Value']
        if not filesFound:
            self.log.info(
                "No files are registered in the catalog directory %s" %
                directory)
            return S_OK()
        self.log.info(
            "Attempting to remove %d possible remnants from the catalog and storage"
            % len(filesFound))

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(filesFound, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        realFailure = False
        for lfn, reason in res['Value']['Failed'].items():
            if "File does not exist" in str(reason):
                self.log.warn("File %s not found in some catalog: " % (lfn))
            else:
                self.log.error("Failed to remove file found in the catalog",
                               "%s %s" % (lfn, reason))
                realFailure = True
        if realFailure:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
        self.log.info('Obtaining the catalog contents for %d directories:' %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res['OK'] and 'Directory does not exist' in res[
                    'Message']:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res['OK']:
                if "No such file or directory" in res['Message']:
                    self.log.info("%s: %s" % (currentDir, res['Message']))
                else:
                    self.log.error("Failed to get directory %s content: %s" %
                                   (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        self.log.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())

    def cleanTransformationLogFiles(self, directory):
        """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
        self.log.verbose("Removing log files found in the directory %s" %
                         directory)
        res = returnSingleResult(
            StorageElement(self.logSE).removeDirectory(directory,
                                                       recursive=True))
        if not res['OK']:
            if cmpError(res, errno.ENOENT):  # No such file or directory
                self.log.warn("Transformation log directory does not exist",
                              directory)
                return S_OK()
            self.log.error("Failed to remove log files", res['Message'])
            return res
        self.log.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """ This just removes any mention of the output data from the catalog and storage """
        self.log.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        for directory in directories:
            if not re.search('/LOG/', directory):
                res = self.cleanContent(directory)
                if not res['OK']:
                    return res

        self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" %
                      (len(directories), transID))
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully removed output of transformation %d" %
                      transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'RemovedFiles')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to RemovedFiles" %
                      (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Archived')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Archived" %
                      (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
        self.log.info("Cleaning transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search('/LOG/', directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res['OK']:
                    return res
            res = self.cleanContent(directory)
            if not res['OK']:
                return res

        # Clean ALL the possible remnants found
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully cleaned transformation %d" % transID)
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Cleaned')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Cleaned" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Cleaned" %
                      (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """ wipe out files from catalog """
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res['OK']:
            return res
        fileToRemove = res['Value']
        if not fileToRemove:
            self.log.info('No files found for transID %s' % transID)
            return S_OK()

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(fileToRemove, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            self.log.error("Failed to remove file found in metadata catalog",
                           "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        self.log.info("Successfully removed all files found in the BK")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
        self.log.verbose("Cleaning Transformation tasks of transformation %d" %
                         transID)
        res = self.__getTransformationExternalIDs(transID)
        if not res['OK']:
            return res
        externalIDs = res['Value']
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ['Type'])
            if not res['OK']:
                self.log.error("Failed to determine transformation type")
                return res
            transType = res['Value']
            if transType in self.dataProcTTypes:
                res = self.__removeWMSTasks(externalIDs)
            else:
                res = self.__removeRequests(externalIDs)
            if not res['OK']:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
        res = self.transClient.getTransformationTasks(
            condDict={'TransformationID': transID})
        if not res['OK']:
            self.log.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res['Message'])
            return res
        externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
        self.log.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        """ This will remove requests from the RMS system -
    """
        rIDs = [int(long(j)) for j in requestIDs if long(j)]
        for reqID in rIDs:
            self.reqClient.cancelRequest(reqID)

        return S_OK()

    def __removeWMSTasks(self, transJobIDs):
        """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
        # Prevent 0 job IDs
        jobIDs = [int(j) for j in transJobIDs if int(j)]
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):

            res = self.wmsClient.killJob(jobList)
            if res['OK']:
                self.log.info("Successfully killed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to kill %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to kill %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

            res = self.wmsClient.deleteJob(jobList)
            if res['OK']:
                self.log.info("Successfully removed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to remove %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to remove %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

        if not allRemove:
            return S_ERROR("Failed to remove all remnants from WMS")
        self.log.info("Successfully removed all tasks from the WMS")

        if not jobIDs:
            self.log.info(
                "JobIDs not present, unable to remove asociated requests.")
            return S_OK()

        failed = 0
        failoverRequests = {}
        res = self.reqClient.getRequestIDsForJobs(jobIDs)
        if not res['OK']:
            self.log.error("Failed to get requestID for jobs.", res['Message'])
            return res
        failoverRequests.update(res['Value']['Successful'])
        if not failoverRequests:
            return S_OK()
        for jobID, requestID in res['Value']['Successful'].items():
            # Put this check just in case, tasks must have associated jobs
            if jobID == 0 or jobID == '0':
                continue
            res = self.reqClient.cancelRequest(requestID)
            if not res['OK']:
                self.log.error("Failed to remove request from RequestDB",
                               res['Message'])
                failed += 1
            else:
                self.log.verbose("Removed request %s associated to job %d." %
                                 (requestID, jobID))

        if failed:
            self.log.info("Successfully removed %s requests" %
                          (len(failoverRequests) - failed))
            self.log.info("Failed to remove %s requests" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        self.log.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Beispiel #42
0
    def removeDeletedJobs(self):
        """Fully remove jobs that are already in status "DELETED", unless there are still requests.

        :returns: S_OK/S_ERROR
        """

        res = self._getJobsList({"Status": JobStatus.DELETED})
        if not res["OK"]:
            return res
        jobList = res["Value"]
        if not jobList:
            self.log.info("No jobs to remove")
            return S_OK()

        self.log.info("Unassigning sandboxes from soon to be deleted jobs",
                      "(%d)" % len(jobList))
        result = SandboxStoreClient(useCertificates=True).unassignJobs(jobList)
        if not result["OK"]:
            self.log.error("Cannot unassign jobs to sandboxes",
                           result["Message"])
            return result

        self.log.info("Attempting to remove deleted jobs",
                      "(%d)" % len(jobList))

        # remove from jobList those that have still Operations to do in RMS
        reqClient = ReqClient()
        res = reqClient.getRequestIDsForJobs(jobList)
        if not res["OK"]:
            return res
        if res["Value"]["Successful"]:
            notFinal = set()
            # Check whether these requests are in a final status
            for job, reqID in res["Value"]["Successful"].items():
                # If not, remove job from list to remove
                if reqClient.getRequestStatus(reqID).get(
                        "Value") not in Request.FINAL_STATES:
                    # Keep that job
                    notFinal.add(job)
                else:
                    # Remove the request, if failed, keep the job
                    res1 = reqClient.deleteRequest(reqID)
                    if not res1["OK"]:
                        notFinal.add(job)
            if notFinal:
                self.log.info(
                    "Some jobs won't be removed, as still having Requests not in final status",
                    "(n=%d)" % len(notFinal))
                jobList = list(set(jobList) - notFinal)
        if not jobList:
            return S_OK()

        ownerJobsDict = self._getOwnerJobsDict(jobList)

        fail = False
        for owner, jobsList in ownerJobsDict.items():
            ownerDN = owner.split(";")[0]
            ownerGroup = owner.split(";")[1]
            self.log.verbose(
                "Attempting to remove jobs",
                "(n=%d) for %s : %s" % (len(jobsList), ownerDN, ownerGroup))
            wmsClient = WMSClient(useCertificates=True,
                                  delegatedDN=ownerDN,
                                  delegatedGroup=ownerGroup)
            result = wmsClient.removeJob(jobsList)
            if not result["OK"]:
                self.log.error(
                    "Could not remove jobs",
                    "for %s : %s (n=%d) : %s" %
                    (ownerDN, ownerGroup, len(jobsList), result["Message"]),
                )
                fail = True

        if fail:
            return S_ERROR()

        return S_OK()
            since = convertDate(switch[1])
        elif switch[0] == 'Until':
            until = convertDate(switch[1])

    if reset:
        status = 'Failed'
    if terse:
        verbose = True
    if status:
        if not until:
            until = datetime.datetime.utcnow()
        if not since:
            since = until - datetime.timedelta(hours=24)
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
    reqClient = ReqClient()
    if transID:
        if not taskIDs:
            gLogger.fatal(
                "If Transformation is set, a list of Tasks should also be set")
            Script.showHelp()
            DIRAC.exit(2)
        # In principle, the task name is unique, so the request name should be unique as well
        # If ever this would not work anymore, we would need to use the transformationClient
        # to fetch the ExternalID
        requests = ['%08d_%08d' % (transID, task) for task in taskIDs]
        allR = True

    elif not jobs:
        args = Script.getPositionalArgs()
        if len(args) == 1:
import DIRAC
# Check is provided SE is OK
se = StorageElement( targetSE )
if not se.valid:
  print se.errorReason
  print
  Script.showHelp()

from DIRAC.RequestManagementSystem.Client.ReqClient         import ReqClient
from DIRAC.RequestManagementSystem.Client.Request           import Request
from DIRAC.RequestManagementSystem.Client.Operation         import Operation
from DIRAC.RequestManagementSystem.Client.File              import File
from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator
from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

reqClient = ReqClient()
fc = FileCatalog()

for lfnList in breakListIntoChunks( lfns, 100 ):

  oRequest = Request()
  oRequest.RequestName = "%s_%s" % ( md5( repr( time.time() ) ).hexdigest()[:16], md5( repr( time.time() ) ).hexdigest()[:16] )

  replicateAndRegister = Operation()
  replicateAndRegister.Type = 'ReplicateAndRegister'
  replicateAndRegister.TargetSE = targetSE

  res = fc.getFileMetadata( lfnList )
  if not res['OK']:
    print "Can't get file metadata: %s" % res['Message']
    DIRAC.exit( 1 )
Beispiel #45
0
 def reqClient(self):
   """Return RequestClient."""
   if not self._reqClient:
     from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
     self._reqClient = ReqClient()
   return self._reqClient
Beispiel #46
0
class RequestTasks( TaskBase ):

  def __init__( self, transClient = None, logger = None, requestClient = None, requestClass = None, ):
    """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

    if not logger:
      logger = gLogger.getSubLogger( 'RequestTasks' )

    super( RequestTasks, self ).__init__( transClient, logger )

    if not requestClient:
      self.requestClient = ReqClient()
    else:
      self.requestClient = requestClient

    if not requestClass:
      self.requestClass = Request
    else:
      self.requestClass = requestClass

  def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '' ):
    """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB
    """
    requestOperation = 'ReplicateAndRegister'
    if transBody:
      try:
        _requestType, requestOperation = transBody.split( ';' )
      except AttributeError:
        pass

    for taskID in sorted( taskDict ):
      paramDict = taskDict[taskID]
      if paramDict['InputData']:
        transID = paramDict['TransformationID']

        oRequest = Request()
        transfer = Operation()
        transfer.Type = requestOperation
        transfer.TargetSE = paramDict['TargetSE']

        if type( paramDict['InputData'] ) == type( [] ):
          files = paramDict['InputData']
        elif type( paramDict['InputData'] ) == type( '' ):
          files = paramDict['InputData'].split( ';' )
        for lfn in files:
          trFile = File()
          trFile.LFN = lfn

          transfer.addFile( trFile )

        oRequest.addOperation( transfer )
        oRequest.RequestName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
        oRequest.OwnerDN = owner
        oRequest.OwnerGroup = ownerGroup

      isValid = gRequestValidator.validate( oRequest )
      if not isValid['OK']:
        return isValid

      taskDict[taskID]['TaskObject'] = oRequest

    return S_OK( taskDict )

  def submitTransformationTasks( self, taskDict ):
    """ Submit requests one by one
    """
    submitted = 0
    failed = 0
    startTime = time.time()
    for taskID in sorted( taskDict ):
      if not taskDict[taskID]['TaskObject']:
        taskDict[taskID]['Success'] = False
        failed += 1
        continue
      res = self.submitTaskToExternal( taskDict[taskID]['TaskObject'] )
      if res['OK']:
        taskDict[taskID]['ExternalID'] = res['Value']
        taskDict[taskID]['Success'] = True
        submitted += 1
      else:
        self.log.error( "Failed to submit task to RMS", res['Message'] )
        taskDict[taskID]['Success'] = False
        failed += 1
    self.log.info( 'submitTasks: Submitted %d tasks to RMS in %.1f seconds' % ( submitted, time.time() - startTime ) )
    if failed:
      self.log.info( 'submitTasks: Failed to submit %d tasks to RMS.' % ( failed ) )
    return S_OK( taskDict )

  def submitTaskToExternal( self, oRequest ):
    """ Submits a request using ReqClient
    """
    if isinstance( oRequest, self.requestClass ):
      return self.requestClient.putRequest( oRequest )
    else:
      return S_ERROR( "Request should be a Request object" )

  def updateTransformationReservedTasks( self, taskDicts ):
    taskNameIDs = {}
    noTasks = []
    for taskDict in taskDicts:
      transID = taskDict['TransformationID']
      taskID = taskDict['TaskID']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      res = self.requestClient.getRequestInfo( taskName )
      if res['OK']:
        taskNameIDs[taskName] = res['Value'][0]
      elif re.search( "Failed to retrieve RequestID for Request", res['Message'] ):
        noTasks.append( taskName )
      else:
        self.log.warn( "Failed to get requestID for request", res['Message'] )
    return S_OK( {'NoTasks':noTasks, 'TaskNameIDs':taskNameIDs} )

  def getSubmittedTaskStatus( self, taskDicts ):
    updateDict = {}
    for taskDict in taskDicts:
      transID = taskDict['TransformationID']
      taskID = taskDict['TaskID']
      oldStatus = taskDict['ExternalStatus']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      res = self.requestClient.getRequestStatus( taskName )
      newStatus = ''
      if res['OK']:
        # FIXME: for compatibility between old and new RMS
        try:
          # old
          newStatus = res['Value']['RequestStatus']
        except TypeError:
          # new
          newStatus = res['Value']
      elif re.search( "Failed to retrieve RequestID for Request", res['Message'] ):
        newStatus = 'Failed'
      else:
        self.log.info( "getSubmittedTaskStatus: Failed to get requestID for request", res['Message'] )
      if newStatus and ( newStatus != oldStatus ):
        if newStatus not in updateDict:
          updateDict[newStatus] = []
        updateDict[newStatus].append( taskID )
    return S_OK( updateDict )

  def getSubmittedFileStatus( self, fileDicts ):
    taskFiles = {}
    for fileDict in fileDicts:
      transID = fileDict['TransformationID']
      taskID = fileDict['TaskID']
      taskName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 )
      if taskName not in taskFiles:
        taskFiles[taskName] = {}
      taskFiles[taskName][fileDict['LFN']] = fileDict['Status']

    updateDict = {}
    for taskName in sorted( taskFiles ):
      lfnDict = taskFiles[taskName]
      res = self.requestClient.getRequestFileStatus( taskName, lfnDict.keys() )
      if not res['OK']:
        self.log.warn( "getSubmittedFileStatus: Failed to get files status for request", res['Message'] )
        continue
      for lfn, newStatus in res['Value'].items():
        if newStatus == lfnDict[lfn]:
          pass
        elif newStatus == 'Done':
          updateDict[lfn] = 'Processed'
        elif newStatus == 'Failed':
          updateDict[lfn] = 'Problematic'
    return S_OK( updateDict )
Beispiel #47
0
    try:
      shutil.copy(appTar,"%s%s" % (final_path, os.path.basename(appTar)))
    except EnvironmentError, x:
      gLogger.error("Could not copy because %s" % x)
      return S_ERROR("Could not copy because %s" % x)
  elif path.find("http://") > -1:
    gLogger.error("Path %s was not foreseen!" % path)
    gLogger.error("Location not known, upload to location yourself, and publish in CS manually")
    return S_ERROR()
  else:
    lfnpath = "%s%s" % (path, os.path.basename(appTar))
    res = datMan.putAndRegister(lfnpath, appTar, ops.getValue('Software/BaseStorageElement', "CERN-SRM"))
    if not res['OK']:
      return res
    request = Request()
    requestClient = ReqClient()
    request.RequestName = 'copy_%s' % os.path.basename(appTar).replace(".tgz", "").replace(".tar.gz", "")
    request.SourceComponent = 'ReplicateILCSoft'
    copies_at = ops.getValue('Software/CopiesAt', [])
    for copies in copies_at:
      transfer = Operation()
      transfer.Type = "ReplicateAndRegister"
      transfer.TargetSE = copies
      trFile = File()
      trFile.LFN = lfnpath
      trFile.GUID = ""
      transfer.addFile(trFile)
      request.addOperation(transfer)

    res = RequestValidator().validate(request)
    if not res['OK']:
Beispiel #48
0
    def _treatOperation(self, operation):
        """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

        :param operation: the operation to treat

        :return: operation, S_OK()/S_ERROR()
    """
        try:
            threadID = current_process().name
            log = gLogger.getSubLogger("treatOperation/%s" %
                                       operation.operationID,
                                       child=True)

            # If the operation is totally processed
            # we perform the callback
            if operation.isTotallyProcessed():
                log.debug("FTS3Operation %s is totally processed" %
                          operation.operationID)
                res = operation.callback()

                if not res['OK']:
                    log.error("Error performing the callback", res)
                    log.info("Putting back the operation")
                    dbRes = self.fts3db.persistOperation(operation)

                    if not dbRes['OK']:
                        log.error("Could not persist operation", dbRes)

                    return operation, res

            else:
                log.debug("FTS3Operation %s is not totally processed yet" %
                          operation.operationID)

                # This flag is set to False if we want to stop the ongoing processing
                # of an operation, typically when the matching RMS Request has been
                # canceled (see below)
                continueOperationProcessing = True

                # Check the status of the associated RMS Request.
                # If it is canceled then we will not create new FTS3Jobs, and mark
                # this as FTS3Operation canceled.

                if operation.rmsReqID:
                    res = ReqClient().getRequestStatus(operation.rmsReqID)
                    if not res['OK']:
                        log.error("Could not get request status", res)
                        return operation, res
                    rmsReqStatus = res['Value']

                    if rmsReqStatus == 'Canceled':
                        log.info(
                            "The RMS Request is canceled, canceling the FTS3Operation",
                            "rmsReqID: %s, FTS3OperationID: %s" %
                            (operation.rmsReqID, operation.operationID))
                        operation.status = 'Canceled'
                        continueOperationProcessing = False

                if continueOperationProcessing:
                    res = operation.prepareNewJobs(
                        maxFilesPerJob=self.maxFilesPerJob,
                        maxAttemptsPerFile=self.maxAttemptsPerFile)

                    if not res['OK']:
                        log.error(
                            "Cannot prepare new Jobs",
                            "FTS3Operation %s : %s" %
                            (operation.operationID, res))
                        return operation, res

                    newJobs = res['Value']

                    log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                              (operation.operationID, len(newJobs)))

                    for ftsJob in newJobs:
                        res = self._serverPolicy.chooseFTS3Server()
                        if not res['OK']:
                            log.error(res)
                            continue

                        ftsServer = res['Value']
                        log.debug("Use %s server" % ftsServer)

                        ftsJob.ftsServer = ftsServer

                        res = self.getFTS3Context(ftsJob.username,
                                                  ftsJob.userGroup,
                                                  ftsServer,
                                                  threadID=threadID)

                        if not res['OK']:
                            log.error("Could not get context", res)
                            continue

                        context = res['Value']
                        res = ftsJob.submit(context=context,
                                            protocols=self.thirdPartyProtocols)

                        if not res['OK']:
                            log.error(
                                "Could not submit FTS3Job",
                                "FTS3Operation %s : %s" %
                                (operation.operationID, res))
                            continue

                        operation.ftsJobs.append(ftsJob)

                        submittedFileIds = res['Value']
                        log.info(
                            "FTS3Operation %s: Submitted job for %s transfers"
                            % (operation.operationID, len(submittedFileIds)))

                # new jobs are put in the DB at the same time
            res = self.fts3db.persistOperation(operation)

            if not res['OK']:
                log.error("Could not persist operation", res)

            return operation, res

        except Exception as e:
            log.exception('Exception in the thread', repr(e))
            return operation, S_ERROR("Exception %s" % repr(e))
Beispiel #49
0
            since = convertDate(switch[1])
        elif switch[0] == 'Until':
            until = convertDate(switch[1])

    if reset:
        status = 'Failed'
    if terse:
        verbose = True
    if status:
        if not until:
            until = datetime.datetime.utcnow()
        if not since:
            since = until - datetime.timedelta(hours=24)
    from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
    from DIRAC.RequestManagementSystem.Client.ReqClient import printRequest, recoverableRequest
    reqClient = ReqClient()
    if transID:
        if not taskIDs:
            gLogger.fatal(
                "If Transformation is set, a list of Tasks should also be set")
            Script.showHelp()
            DIRAC.exit(2)
        requests = ['%08d_%08d' % (transID, task) for task in taskIDs]
        all = True

    elif not jobs:
        args = Script.getPositionalArgs()
        if len(args) == 1:
            all = True
            requests = [reqName for reqName in args[0].split(',') if reqName]
    else:
Beispiel #50
0
class DataRecoveryAgent(AgentModule):
    """Data Recovery Agent"""
    def __init__(self, *args, **kwargs):
        AgentModule.__init__(self, *args, **kwargs)
        self.name = 'DataRecoveryAgent'
        self.enabled = False
        self.getJobInfoFromJDLOnly = False

        self.__getCSOptions()

        self.jobStatus = [
            'Failed', 'Done'
        ]  # This needs to be both otherwise we cannot account for all cases

        self.jobMon = JobMonitoringClient()
        self.fcClient = FileCatalogClient()
        self.tClient = TransformationClient()
        self.reqClient = ReqClient()
        self.diracAPI = Dirac()
        self.inputFilesProcessed = set()
        self.todo = {'NoInputFiles':
                     [dict(Message="NoInputFiles: OutputExists: Job 'Done'",
                           ShortMessage="NoInputFiles: job 'Done' ",
                           Counter=0,
                           Check=lambda job: job.allFilesExist() and job.status == 'Failed',
                           Actions=lambda job, tInfo: [job.setJobDone(tInfo)],
                           ),
                      dict(Message="NoInputFiles: OutputMissing: Job 'Failed'",
                           ShortMessage="NoInputFiles: job 'Failed' ",
                           Counter=0,
                           Check=lambda job: job.allFilesMissing() and job.status == 'Done',
                           Actions=lambda job, tInfo: [job.setJobFailed(tInfo)],
                           ),
                      ],
                     'InputFiles':
                     [ \
                     # must always be first!

                         dict(Message="One of many Successful: clean others",
                              ShortMessage="Other Tasks --> Keep",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and job.otherTasks and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed),
                              Actions=lambda job, tInfo: [self.inputFilesProcessed.update(job.inputFiles),
                                                          job.setJobDone(tInfo),
                                                          job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input, no Output: Fail",
                              ShortMessage="Other Tasks --> Fail",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allFilesMissing() and job.status != 'Failed',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Other Task processed Input: Fail and clean",
                              ShortMessage="Other Tasks --> Cleanup",
                              Counter=0,
                              Check=lambda job: set(job.inputFiles).issubset(
                                  self.inputFilesProcessed) and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo), job.cleanOutputs(tInfo)]
                              ),
                         dict(Message="InputFile(s) missing: mark job 'Failed', mark input 'Deleted', clean",
                              ShortMessage="Input Missing --> Job 'Failed, Input 'Deleted', Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              not job.allTransFilesDeleted(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo),
                                                          job.setInputDeleted(tInfo)],
                              ),
                         dict(Message="InputFile(s) Deleted, output Exists: mark job 'Failed', clean",
                              ShortMessage="Input Deleted --> Job 'Failed, Cleanup",
                              Counter=0,
                              Check=lambda job: job.inputFiles and job.allInputFilesMissing() and \
                              job.allTransFilesDeleted() and not job.allFilesMissing(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setJobFailed(tInfo)],
                              ),
                         # All Output Exists
                         dict(Message="Output Exists, job Failed, input not Processed --> Job Done, Input Processed",
                              ShortMessage="Output Exists --> Job Done, Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo), job.setInputProcessed(tInfo)]
                              ),
                         dict(Message="Output Exists, job Failed, input Processed --> Job Done",
                              ShortMessage="Output Exists --> Job Done",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setJobDone(tInfo)]
                              ),
                         dict(Message="Output Exists, job Done, input not Processed --> Input Processed",
                              ShortMessage="Output Exists --> Input Processed",
                              Counter=0,
                              Check=lambda job: job.allFilesExist() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              not job.allFilesProcessed() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputProcessed(tInfo)]
                              ),
                         # outputmissing
                         dict(Message="Output Missing, job Failed, input Assigned, MaxError --> Input MaxReset",
                              ShortMessage="Max ErrorCount --> Input MaxReset",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist() and \
                              job.checkErrorCount(),
                              Actions=lambda job, tInfo: [job.setInputMaxReset(tInfo)]
                              ),
                         dict(Message="Output Missing, job Failed, input Assigned --> Input Unused",
                              ShortMessage="Output Missing --> Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Output Missing, job Done, input Assigned --> Job Failed, Input Unused",
                              ShortMessage="Output Missing --> Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.allFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              not set(job.inputFiles).issubset(self.inputFilesProcessed) and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         # some files missing, needing cleanup. Only checking for
                         # assigned, because processed could mean an earlier job was
                         # succesful and this one is just the duplicate that needed
                         # to be removed! But we check for other tasks earlier, so
                         # this should not happen
                         dict(Message="Some missing, job Failed, input Assigned --> cleanup, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Failed' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [job.cleanOutputs(tInfo), job.setInputUnused(tInfo)]
                              ),
                         dict(Message="Some missing, job Done, input Assigned --> cleanup, job Failed, Input 'Unused'",
                              ShortMessage="Output Missing --> Cleanup, Job Failed, Input Unused",
                              Counter=0,
                              Check=lambda job: job.someFilesMissing() and \
                              not job.otherTasks and \
                              job.status == 'Done' and \
                              job.allFilesAssigned() and \
                              job.allInputFilesExist(),
                              Actions=lambda job, tInfo: [
                                  job.cleanOutputs(tInfo), job.setInputUnused(tInfo), job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Some missing, job Done --> job Failed",
                              ShortMessage="Output Missing, Done --> Job Failed",
                              Counter=0,
                              Check=lambda job: not job.allFilesExist() and job.status == 'Done',
                              Actions=lambda job, tInfo: [job.setJobFailed(tInfo)]
                              ),
                         dict(Message="Something Strange",
                              ShortMessage="Strange",
                              Counter=0,
                              Check=lambda job: job.status not in ("Failed", "Done"),
                              Actions=lambda job, tInfo: []
                              ),
                         # should always be the last one!
                         dict(Message="Failed Hard",
                              ShortMessage="Failed Hard",
                              Counter=0,
                              Check=lambda job: False,  # never
                              Actions=lambda job, tInfo: []
                              ),
                     ]
                     }
        self.jobCache = defaultdict(lambda: (0, 0))
        # Notification options
        self.notesToSend = ""
        self.subject = "DataRecoveryAgent"
        self.startTime = time.time()

        #############################################################################

    def beginExecution(self):
        """Resets defaults after one cycle."""
        self.__getCSOptions()
        return S_OK()

    def __getCSOptions(self):
        """Get agent options from the CS."""
        self.enabled = self.am_getOption('EnableFlag', False)
        self.transformationsToIgnore = self.am_getOption(
            'TransformationsToIgnore', [])
        self.getJobInfoFromJDLOnly = self.am_getOption('JobInfoFromJDLOnly',
                                                       False)
        self.transformationStatus = self.am_getOption('TransformationStatus',
                                                      ['Active', 'Completing'])
        ops = Operations()
        extendableTTypes = set(
            ops.getValue('Transformations/ExtendableTransfTypes',
                         ['MCSimulation']))
        dataProcessing = set(ops.getValue('Transformations/DataProcessing',
                                          []))
        self.transNoInput = self.am_getOption('TransformationsNoInput',
                                              list(extendableTTypes))
        self.transWithInput = self.am_getOption(
            'TransformationsWithInput',
            list(dataProcessing - extendableTTypes))
        self.transformationTypes = self.transWithInput + self.transNoInput
        self.log.notice('Will treat transformations without input files',
                        self.transNoInput)
        self.log.notice('Will treat transformations with input files',
                        self.transWithInput)
        self.addressTo = self.am_getOption('MailTo', [])
        self.addressFrom = self.am_getOption('MailFrom', '')
        self.printEveryNJobs = self.am_getOption('PrintEvery', 200)

    def execute(self):
        """ The main execution method.
    """
        self.log.notice("Will ignore the following transformations: %s" %
                        self.transformationsToIgnore)
        self.log.notice(" Job Cache: %s " % self.jobCache)
        transformations = self.getEligibleTransformations(
            self.transformationStatus, self.transformationTypes)
        if not transformations['OK']:
            self.log.error("Failure to get transformations",
                           transformations['Message'])
            return S_ERROR("Failure to get transformations")
        for transID, transInfoDict in transformations['Value'].iteritems():
            if transID in self.transformationsToIgnore:
                self.log.notice('Ignoring Transformation: %s' % transID)
                continue
            self.__resetCounters()
            self.inputFilesProcessed = set()
            self.log.notice('Running over Transformation: %s' % transID)
            self.treatTransformation(int(transID), transInfoDict)
            self.sendNotification(transID, transInfoDict)

        return S_OK()

    def getEligibleTransformations(self, status, typeList):
        """ Select transformations of given status and type.
    """
        res = self.tClient.getTransformations(condDict={
            'Status': status,
            'Type': typeList
        })
        if not res['OK']:
            return res
        transformations = {}
        for prod in res['Value']:
            transID = prod['TransformationID']
            transformations[str(transID)] = prod
        return S_OK(transformations)

    def treatTransformation(self, transID, transInfoDict):
        """Run this thing for given transformation."""
        tInfo = TransformationInfo(transID, transInfoDict, self.enabled,
                                   self.tClient, self.fcClient, self.jobMon)
        jobs, nDone, nFailed = tInfo.getJobs(statusList=self.jobStatus)

        if self.jobCache[transID][0] == nDone and self.jobCache[transID][
                1] == nFailed:
            self.log.notice(
                'Skipping transformation %s because nothing changed' % transID)
            return

        self.jobCache[transID] = (nDone, nFailed)

        tasksDict = None
        lfnTaskDict = None

        self.startTime = time.time()
        if transInfoDict['Type'] in self.transWithInput:
            self.log.notice('Getting tasks...')
            tasksDict = tInfo.checkTasksStatus()
            lfnTaskDict = dict([(taskDict['LFN'], taskID)
                                for taskID, taskDicts in tasksDict.items()
                                for taskDict in taskDicts])

        self.checkAllJobs(jobs, tInfo, tasksDict, lfnTaskDict)
        self.printSummary()

    def checkJob(self, job, tInfo):
        """Deal with the job."""
        checks = self.todo[
            'NoInputFiles'] if job.tType in self.transNoInput else self.todo[
                'InputFiles']
        for do in checks:
            self.log.verbose('Testing: ', do['Message'])
            if do['Check'](job):
                do['Counter'] += 1
                self.log.notice(do['Message'])
                self.log.notice(job)
                self.notesToSend += do['Message'] + '\n'
                self.notesToSend += str(job) + '\n'
                do['Actions'](job, tInfo)
                return

    def getLFNStatus(self, jobs):
        """Get all the LFNs for the jobs and get their status."""
        self.log.notice('Collecting LFNs...')
        lfnExistence = {}
        lfnCache = []
        counter = 0
        jobInfoStart = time.time()
        for counter, job in enumerate(jobs.values()):
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    'Getting JobInfo: %d/%d: %3.1fs' %
                    (counter, len(jobs), float(time.time() - jobInfoStart)))
            while True:
                try:
                    job.getJobInformation(self.diracAPI,
                                          self.jobMon,
                                          jdlOnly=self.getJobInfoFromJDLOnly)
                    lfnCache.extend(job.inputFiles)
                    lfnCache.extend(job.outputFiles)
                    break
                except RuntimeError as e:  # try again
                    self.log.error('+++++ Failure for job:', job.jobID)
                    self.log.error('+++++ Exception: ', str(e))

        timeSpent = float(time.time() - jobInfoStart)
        self.log.notice('Getting JobInfo Done: %3.1fs (%3.3fs per job)' %
                        (timeSpent, timeSpent / counter))

        counter = 0
        fileInfoStart = time.time()
        for lfnChunk in breakListIntoChunks(list(lfnCache), 200):
            counter += 200
            if counter % 1000 == 0:
                self.log.notice('Getting FileInfo: %d/%d: %3.1fs' %
                                (counter, len(lfnCache),
                                 float(time.time() - fileInfoStart)))
            while True:
                try:
                    reps = self.fcClient.exists(lfnChunk)
                    if not reps['OK']:
                        self.log.error(
                            'Failed to check file existence, try again...',
                            reps['Message'])
                        raise RuntimeError('Try again')
                    statuses = reps['Value']
                    lfnExistence.update(statuses['Successful'])
                    break
                except RuntimeError:  # try again
                    pass
        self.log.notice('Getting FileInfo Done: %3.1fs' %
                        (float(time.time() - fileInfoStart)))

        return lfnExistence

    def setPendingRequests(self, jobs):
        """Loop over all the jobs and get requests, if any."""
        for jobChunk in breakListIntoChunks(jobs.values(), 1000):
            jobIDs = [job.jobID for job in jobChunk]
            while True:
                result = self.reqClient.readRequestsForJobs(jobIDs)
                if result['OK']:
                    break
                self.log.error('Failed to read requests', result['Message'])
                # repeat
            for jobID in result['Value']['Successful']:
                request = result['Value']['Successful'][jobID]
                requestID = request.RequestID
                dbStatus = self.reqClient.getRequestStatus(requestID).get(
                    'Value', 'Unknown')
                for job in jobChunk:
                    if job.jobID == jobID:
                        job.pendingRequest = dbStatus not in ('Done',
                                                              'Canceled')
                        self.log.notice(
                            'Found %s request for job %d' %
                            ('pending' if job.pendingRequest else 'finished',
                             jobID))
                        break

    def checkAllJobs(self, jobs, tInfo, tasksDict=None, lfnTaskDict=None):
        """run over all jobs and do checks"""
        fileJobDict = defaultdict(list)
        counter = 0
        nJobs = len(jobs)
        self.setPendingRequests(jobs)
        lfnExistence = self.getLFNStatus(jobs)
        self.log.notice('Running over all the jobs')
        jobCheckStart = time.time()
        for counter, job in enumerate(jobs.values()):
            if counter % self.printEveryNJobs == 0:
                self.log.notice(
                    'Checking Jobs %d/%d: %3.1fs' %
                    (counter, nJobs, float(time.time() - jobCheckStart)))
            while True:
                try:
                    if job.pendingRequest:
                        self.log.warn('Job has Pending requests:\n%s' % job)
                        break
                    job.checkFileExistence(lfnExistence)
                    if tasksDict and lfnTaskDict:
                        try:
                            job.getTaskInfo(tasksDict, lfnTaskDict,
                                            self.transWithInput)
                        except TaskInfoException as e:
                            self.log.error(
                                " Skip Task, due to TaskInfoException: %s" % e)
                            if not job.inputFiles and job.tType in self.transWithInput:
                                self.__failJobHard(job, tInfo)
                            break
                        for inputFile in job.inputFiles:
                            fileJobDict[inputFile].append(job.jobID)
                    self.checkJob(job, tInfo)
                    break  # get out of the while loop
                except RuntimeError as e:
                    self.log.error("+++++ Failure for job: %d " % job.jobID)
                    self.log.error("+++++ Exception: ", str(e))
                    # run these again because of RuntimeError
        self.log.notice('Checking Jobs Done: %d/%d: %3.1fs' %
                        (counter, nJobs, float(time.time() - jobCheckStart)))

    def printSummary(self):
        """print summary of changes"""
        self.log.notice("Summary:")
        for do in itertools.chain.from_iterable(self.todo.values()):
            message = "%s: %s" % (do['ShortMessage'].ljust(56),
                                  str(do['Counter']).rjust(5))
            self.log.notice(message)
            if self.notesToSend:
                self.notesToSend = str(message) + '\n' + self.notesToSend

    def __resetCounters(self):
        """ reset counters for modified jobs """
        for _name, checks in self.todo.iteritems():
            for do in checks:
                do['Counter'] = 0

    def __failJobHard(self, job, tInfo):
        """ set job to failed and remove output files if there are any """
        if job.inputFiles:
            return
        if job.status in ("Failed",) \
           and job.allFilesMissing():
            return
        self.log.notice("Failing job hard %s" % job)
        self.notesToSend += "Failing job %s: no input file?\n" % job.jobID
        self.notesToSend += str(job) + '\n'
        self.todo['InputFiles'][-1]['Counter'] += 1
        job.cleanOutputs(tInfo)
        job.setJobFailed(tInfo)
        # if job.inputFile is not None:
        #   job.setInputDeleted(tInfo)

    def __notOnlyKeepers(self, transType):
        """check of we only have 'Keep' messages

    in this case we do not have to send report email or run again next time

    """
        if transType in self.transNoInput:
            return True

        checks = self.todo['InputFiles']
        totalCount = 0
        for check in checks[1:]:
            totalCount += check['Counter']

        return totalCount > 0

    def sendNotification(self, transID, transInfoDict):
        """Send notification email if something was modified for a transformation.

    :param int transID: ID of given transformation
    :param transInfoDict:
    """
        if not self.addressTo or not self.addressFrom or not self.notesToSend:
            return
        if not self.__notOnlyKeepers(transInfoDict['Type']):
            # purge notes
            self.notesToSend = ""
            return

        # remove from the jobCache because something happened
        self.jobCache.pop(int(transID), None)
        # send the email to recipients
        for address in self.addressTo:
            result = NotificationClient().sendMail(address,
                                                   "%s: %s" %
                                                   (self.subject, transID),
                                                   self.notesToSend,
                                                   self.addressFrom,
                                                   localAttempt=False)
            if not result['OK']:
                self.log.error('Cannot send notification mail',
                               result['Message'])
        # purge notes
        self.notesToSend = ""
from DIRAC.Core.Base import Script
Script.setUsageMessage( '\n'.join( [ __doc__,
                                     'Usage:',
                                     ' %s [option|cfgfile] <Request list>' % Script.scriptName ] ) )

if __name__ == "__main__":

  from DIRAC.Core.Base.Script import parseCommandLine
  parseCommandLine()

  import DIRAC
  requests = []

  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  reqClient = ReqClient()


  args = Script.getPositionalArgs()
  if len( args ) == 1:
    requests = [reqName for reqName in args[0].split( ',' ) if reqName]

  if not requests:
    DIRAC.gLogger.fatal( "Need at least one request name" )
    Script.showHelp()
    DIRAC.exit( 1 )

  for reqName in requests:
    reqName = reqName.strip()
    res = reqClient.cancelRequest( reqName )
    if res['OK']:
class FileStatusTransformationAgent(AgentModule):
  """ FileStatusTransformationAgent """

  def __init__(self, *args, **kwargs):
    AgentModule.__init__(self, *args, **kwargs)
    self.name = 'FileStatusTransformationAgent'
    self.enabled = False
    self.shifterProxy = 'DataManager'
    self.transformationTypes = ["Replication"]
    self.transformationStatuses = ["Active"]
    self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"]

    self.addressTo = ["*****@*****.**"]
    self.addressFrom = "*****@*****.**"
    self.emailSubject = "FileStatusTransformationAgent"

    self.accounting = defaultdict(list)
    self.errors = []

    self.fcClient = FileCatalogClient()
    self.tClient = TransformationClient()
    self.reqClient = ReqClient()
    self.nClient = NotificationClient()

  def checkFileStatusFuncExists(self, status):
    """ returns True/False if a function to check transformation files with a given status exists or not """
    checkFileStatusFuncName = "check_%s_files" % (status.lower())
    if not (hasattr(self, checkFileStatusFuncName) and callable(getattr(self, checkFileStatusFuncName))):
      self.log.warn("Unable to process transformation files with status ", status)
      return False

    return True

  def beginExecution(self):
    """ Reload the configurations before every cycle """
    self.enabled = self.am_getOption('EnableFlag', False)
    self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager')
    self.transformationTypes = self.am_getOption('TransformationTypes', ["Replication"])
    self.transformationStatuses = self.am_getOption('TransformationStatuses', ["Active"])
    self.transformationFileStatuses = self.am_getOption(
        'TransformationFileStatuses', ["Assigned", "Problematic", "Processed", "Unused"])

    self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"])
    self.addressFrom = self.am_getOption('MailFrom', "*****@*****.**")

    self.transformationFileStatuses = filter(self.checkFileStatusFuncExists, self.transformationFileStatuses)
    self.accounting.clear()

    return S_OK()

  def sendNotification(self, transID, transType=None, sourceSEs=None, targetSEs=None):
    """ sends email notification about accounting information of a transformation """
    if not(self.errors or self.accounting):
      return S_OK()

    emailBody = "Transformation ID: %s\n" % transID
    if transType:
      emailBody += "Transformation Type: %s\n" % transType

    if sourceSEs:
      emailBody += "Source SE: %s\n" % (" ".join(str(source) for source in sourceSEs))

    if targetSEs:
      emailBody += "Target SE: %s\n\n" % (" ".join(str(target) for target in targetSEs))

    rows = []
    for action, transFiles in self.accounting.iteritems():
      emailBody += "Total number of files with action %s: %s\n" % (action, len(transFiles))
      for transFile in transFiles:
        rows.append([[transFile['LFN']], [str(transFile['AvailableOnSource'])],
                     [str(transFile['AvailableOnTarget'])], [transFile['Status']], [action]])

    if rows:
      columns = ["LFN", "Source", "Target", "Old Status", "Action"]
      emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ')

    if self.errors:
      emailBody += "\n\nErrors:"
      emailBody += "\n".join(self.errors)

    self.log.notice(emailBody)
    subject = "%s: %s" % (self.emailSubject, transID)
    for address in self.addressTo:
      res = self.nClient.sendMail(address, subject, emailBody, self.addressFrom, localAttempt=False)
      if not res['OK']:
        self.log.error("Failure to send Email notification to ", address)
        continue

    self.errors = []
    self.accounting.clear()
    return S_OK()

  def logError(self, errStr, varMsg=''):
    self.log.error(errStr, varMsg)
    self.errors.append(errStr + varMsg)

  def execute(self):
    """ main execution loop of Agent """

    res = self.getTransformations()
    if not res['OK']:
      self.log.error('Failure to get transformations', res['Message'])
      return S_ERROR("Failure to get transformations")

    transformations = res['Value']
    if not transformations:
      self.log.notice('No transformations found with Status %s and Type %s ' %
                      (self.transformationStatuses, self.transformationTypes))
      return S_OK()

    self.log.notice('Will treat %d transformations' % len(transformations))
    self.log.notice('Transformations: %s' % ",".join([str(transformation['TransformationID'])
                                                      for transformation in transformations]))

    for trans in transformations:
      transID = trans['TransformationID']
      if 'SourceSE' not in trans or not trans['SourceSE']:
        self.logError("SourceSE not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID)
        continue

      if 'TargetSE' not in trans or not trans['TargetSE']:
        self.logError("TargetSE not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID, sourceSEs=trans['SourceSE'])
        continue

      if 'DataTransType' not in trans:
        self.logError("Transformation Type not set for transformation, skip processing, transID: ", "%s" % transID)
        self.sendNotification(transID, sourceSEs=trans['SourceSE'], targetSEs=trans['TargetSE'])
        continue

      res = self.processTransformation(transID, trans['SourceSE'], trans['TargetSE'], trans['DataTransType'])
      if not res['OK']:
        self.log.error('Failure to process transformation with ID:', transID)
        continue

    return S_OK()

  def getTransformations(self, transID=None):
    """ returns transformations of a given type and status """
    res = None
    if transID:
      res = self.tClient.getTransformations(
          condDict={'TransformationID': transID,
                    'Status': self.transformationStatuses,
                    'Type': self.transformationTypes})
    else:
      res = self.tClient.getTransformations(
          condDict={'Status': self.transformationStatuses, 'Type': self.transformationTypes})

    if not res['OK']:
      return res

    result = res['Value']
    for trans in result:
      res = self.tClient.getTransformationParameters(trans['TransformationID'], ['SourceSE', 'TargetSE'])
      if not res['OK']:
        self.log.error('Failure to get SourceSE and TargetSE parameters for Transformation ID:',
                       trans['TransformationID'])
        continue

      trans['SourceSE'] = eval(res['Value']['SourceSE'])
      trans['TargetSE'] = eval(res['Value']['TargetSE'])

      res = self.getDataTransformationType(trans['TransformationID'])
      if not res['OK']:
        self.log.error('Failure to determine Data Transformation Type', "%s: %s"
                       % (trans['TransformationID'], res['Message']))
        continue

      trans['DataTransType'] = res['Value']

    return S_OK(result)

  def getRequestStatus(self, transID, taskIDs):
    """ returns request statuses for a given list of task IDs """
    res = self.tClient.getTransformationTasks(condDict={'TransformationID': transID, 'TaskID': taskIDs})
    if not res['OK']:
      self.log.error('Failure to get Transformation Tasks for Transformation ID:', transID)
      return res

    result = res['Value']
    requestStatus = {}
    for task in result:
      requestStatus[task['TaskID']] = {'RequestStatus': task['ExternalStatus'], 'RequestID': long(task['ExternalID'])}

    return S_OK(requestStatus)

  def getDataTransformationType(self, transID):
    """ returns transformation types Replication/Moving/Unknown for a given transformation """
    res = self.tClient.getTransformationParameters(transID, 'Body')
    if not res['OK']:
      return res

    # if body is empty then we assume that it is a replication transformation
    if not res['Value']:
      return S_OK(REPLICATION_TRANS)

    replication = False
    rmReplica = False
    try:
      body = json.loads(res['Value'])
      for operation in body:
        if 'ReplicateAndRegister' in operation:
          replication = True
        if 'RemoveReplica' in operation:
          rmReplica = True
    except ValueError:
      if 'ReplicateAndRegister' in res['Value']:
        replication = True
        if 'RemoveReplica' in res['Value']:
          rmReplica = True

    if rmReplica and replication:
      return S_OK(MOVING_TRANS)

    if replication:
      return S_OK(REPLICATION_TRANS)

    return S_ERROR("Unknown Transformation Type '%r'" % res['Value'])

  def setFileStatus(self, transID, transFiles, status):
    """ sets transformation file status  """

    lfns = [transFile['LFN'] for transFile in transFiles]
    lfnStatuses = {lfn: status for lfn in lfns}

    if lfnStatuses:
      if self.enabled:
        res = self.tClient.setFileStatusForTransformation(transID, newLFNsStatus=lfnStatuses, force=True)
        if not res['OK']:
          self.logError('Failed to set statuses for LFNs ', "%s" % res['Message'])
          return res

      for transFile in transFiles:
        self.accounting[status].append({'LFN': transFile['LFN'],
                                        'Status': transFile['Status'],
                                        'AvailableOnSource': transFile['AvailableOnSource'],
                                        'AvailableOnTarget': transFile['AvailableOnTarget']})
    return S_OK()

  def selectFailedRequests(self, transFile):
    """ returns True if transformation file has a failed request otherwise returns False """
    res = self.getRequestStatus(transFile['TransformationID'], transFile['TaskID'])
    if not res['OK']:
      self.log.error('Failure to get Request Status for Assigned File')
      return False
    result = res['Value']

    if result[transFile['TaskID']]['RequestStatus'] == 'Failed':
      return True

    return False

  def retryStrategyForFiles(self, transID, transFiles):
    """ returns retryStrategy Reset Request if a request is found in RMS, otherwise returns set file status to unused"""
    taskIDs = [transFile['TaskID'] for transFile in transFiles]
    res = self.getRequestStatus(transID, taskIDs)
    if not res['OK']:
      return res
    result = res['Value']
    retryStrategy = defaultdict(dict)
    for taskID in taskIDs:
      if taskID is None:
        self.log.error("Task ID is None", "Transformation: %s\n Files: %r " % (transID, transFiles))
        retryStrategy[None]['Strategy'] = SET_UNUSED
        continue
      res = self.reqClient.getRequest(requestID=result[taskID]['RequestID'])
      if not res['OK']:
        self.log.notice('Request %s does not exist setting file status to unused' % result[taskID]['RequestID'])
        retryStrategy[taskID]['Strategy'] = SET_UNUSED
      else:
        retryStrategy[taskID]['Strategy'] = SET_UNUSED  # RESET_REQUEST
        retryStrategy[taskID]['RequestID'] = result[taskID]['RequestID']

    return S_OK(retryStrategy)

  def check_assigned_files(self, actions, transFiles, transType):
    """ treatment for transformation files with assigned status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        if transType == REPLICATION_TRANS:
          actions[SET_PROCESSED].append(transFile)
        if transType == MOVING_TRANS:
          actions[RETRY].append(transFile)

      elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      else:
        # not on src and target
        actions[SET_DELETED].append(transFile)

  def check_unused_files(self, actions, transFiles, transType):
    """ treatment for transformation files with unused status """
    for transFile in transFiles:
      if not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[SET_DELETED].append(transFile)

  def check_processed_files(self, actions, transFiles, transType):
    """ treatment for transformation files with processed status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget'] and transType == MOVING_TRANS:
        actions[RETRY].append(transFile)

      if transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[SET_DELETED].append(transFile)

  def check_problematic_files(self, actions, transFiles, transType):
    """ treatment for transformation files with problematic status """
    for transFile in transFiles:
      if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        if transType == REPLICATION_TRANS:
          actions[SET_PROCESSED].append(transFile)
        if transType == MOVING_TRANS:
          actions[RETRY].append(transFile)

      elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']:
        actions[RETRY].append(transFile)

      elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']:
        actions[SET_PROCESSED].append(transFile)

      else:
        # not available on source and target
        actions[SET_DELETED].append(transFile)

  def retryFiles(self, transID, transFiles):
    """ resubmits request or sets file status to unused based on the retry strategy of transformation file """
    setFilesUnused = []
    setFilesAssigned = []
    res = self.retryStrategyForFiles(transID, transFiles)
    if not res['OK']:
      self.logError('Failure to determine retry strategy (unused / reset request) for files ', "%s" % res['Message'])
      return res

    retryStrategy = res['Value']
    for transFile in transFiles:
      if retryStrategy[transFile['TaskID']]['Strategy'] != RESET_REQUEST:
        setFilesUnused.append(transFile)
        continue

      requestID = retryStrategy[transFile['TaskID']]['RequestID']
      if self.enabled:
        res = self.reqClient.resetFailedRequest(requestID, allR=True)
        if not res['OK']:
          self.logError('Failed to reset request ', 'ReqID: %s Error: %s' % (requestID, res['Message']))
          continue

        if res['Value'] == "Not reset":
          self.logError('Failed to reset request ', 'ReqID: %s is non-recoverable' % requestID)
          continue

        setFilesAssigned.append(transFile)

        res = self.tClient.setTaskStatus(transID, transFile['TaskID'], 'Waiting')
        if not res['OK']:
          self.logError('Failure to set Waiting status for Task ID: ', "%s %s" % (transFile['TaskID'], res['Message']))
          continue

      self.accounting[RESET_REQUEST].append({'LFN': transFile['LFN'],
                                             'Status': transFile['Status'],
                                             'AvailableOnSource': transFile['AvailableOnSource'],
                                             'AvailableOnTarget': transFile['AvailableOnTarget']})

    if setFilesUnused:
      self.setFileStatus(transID, setFilesUnused, 'Unused')

    if setFilesAssigned:
      self.setFileStatus(transID, setFilesAssigned, 'Assigned')

    return S_OK()

  def applyActions(self, transID, actions):
    """ sets new file statuses and resets requests """
    for action, transFiles in actions.iteritems():
      if action == SET_PROCESSED and transFiles:
        self.setFileStatus(transID, transFiles, 'Processed')

      if action == SET_DELETED and transFiles:
        self.setFileStatus(transID, transFiles, 'Deleted')

      if action == RETRY and transFiles:
        # if there is a request in RMS then reset request otherwise set file status unused
        self.retryFiles(transID, transFiles)

  def existsInFC(self, storageElements, lfns):
    """ checks if files have replicas registered in File Catalog for all given storageElements """
    res = self.fcClient.getReplicas(lfns)
    if not res['OK']:
      return res

    result = {}
    result['Successful'] = {}
    result['Failed'] = {}
    setOfSEs = set(storageElements)

    for lfn, msg in res['Value']['Failed'].iteritems():
      if msg == 'No such file or directory':
        result['Successful'][lfn] = False
      else:
        result['Failed'][lfn] = msg

    # check if all replicas are registered in FC
    filesFoundInFC = res['Value']['Successful']
    for lfn, replicas in filesFoundInFC.iteritems():
      result['Successful'][lfn] = setOfSEs.issubset(replicas.keys())

    return S_OK(result)

  def existsOnSE(self, storageElements, lfns):
    """ checks if the given files exist physically on a list of storage elements"""

    result = {}
    result['Failed'] = {}
    result['Successful'] = {}

    if not lfns:
      return S_OK(result)

    voName = lfns[0].split('/')[1]
    for se in storageElements:
      res = StorageElement(se, vo=voName).exists(lfns)
      if not res['OK']:
        return res
      for lfn, status in res['Value']['Successful'].iteritems():
        if lfn not in result['Successful']:
          result['Successful'][lfn] = status

        if not status:
          result['Successful'][lfn] = False

      result['Failed'][se] = res['Value']['Failed']

    return S_OK(result)

  def exists(self, storageElements, lfns):
    """ checks if files exists on both file catalog and storage elements """

    fcRes = self.existsInFC(storageElements, lfns)
    if not fcRes['OK']:
      self.logError('Failure to determine if files exists in File Catalog ', "%s" % fcRes['Message'])
      return fcRes

    if fcRes['Value']['Failed']:
      self.logError("Failed FileCatalog Response ", "%s" % fcRes['Value']['Failed'])

    # check if files found in file catalog also exist on SE
    checkLFNsOnStorage = [lfn for lfn in fcRes['Value']['Successful'] if fcRes['Value']['Successful'][lfn]]

    # no files were found in FC, return the result instead of verifying them on SE
    if not checkLFNsOnStorage:
      return fcRes

    seRes = self.existsOnSE(storageElements, checkLFNsOnStorage)
    if not seRes['OK']:
      self.logError('Failure to determine if files exist on SE ', "%s" % seRes['Message'])
      return seRes

    for se in storageElements:
      if seRes['Value']['Failed'][se]:
        self.logError('Failed to determine if files exist on SE ', "%s %s" % (se, seRes['Value']['Failed'][se]))
        return S_ERROR()

    fcResult = fcRes['Value']['Successful']
    seResult = seRes['Value']['Successful']
    for lfn in fcResult:
      if fcResult[lfn] and not seResult[lfn]:
        fcRes['Value']['Successful'][lfn] = False

    return fcRes

  def processTransformation(self, transID, sourceSE, targetSEs, transType):
    """ process transformation for a given transformation ID """

    actions = {}
    actions[SET_PROCESSED] = []
    actions[RETRY] = []
    actions[SET_DELETED] = []

    for status in self.transformationFileStatuses:
      res = self.tClient.getTransformationFiles(condDict={'TransformationID': transID, 'Status': status})
      if not res['OK']:
        errStr = 'Failure to get Transformation Files, Status: %s Transformation ID: %s Message: %s' % (status,
                                                                                                        transID,
                                                                                                        res['Message'])
        self.logError(errStr)
        continue

      transFiles = res['Value']
      if not transFiles:
        self.log.notice("No Transformation Files found with status %s for Transformation ID %d" % (status, transID))
        continue

      self.log.notice("Processing Transformation Files with status %s for TransformationID %d " % (status, transID))

      if status == 'Assigned':
        transFiles = filter(self.selectFailedRequests, transFiles)

      lfns = [transFile['LFN'] for transFile in transFiles]

      if not lfns:
        continue

      res = self.exists(sourceSE, lfns)
      if not res['OK']:
        continue

      resultSourceSe = res['Value']['Successful']

      res = self.exists(targetSEs, lfns)
      if not res['OK']:
        continue
      resultTargetSEs = res['Value']['Successful']

      for transFile in transFiles:
        lfn = transFile['LFN']
        transFile['AvailableOnSource'] = resultSourceSe[lfn]
        transFile['AvailableOnTarget'] = resultTargetSEs[lfn]

      checkFilesFuncName = "check_%s_files" % status.lower()
      checkFiles = getattr(self, checkFilesFuncName)
      checkFiles(actions, transFiles, transType)

    self.applyActions(transID, actions)
    self.sendNotification(transID, transType, sourceSE, targetSEs)

    return S_OK()
Beispiel #53
0
 def requestClient(self):
     """ request client getter """
     if not self.__requestClient:
         self.__requestClient = ReqClient()
     return self.__requestClient
Script.registerSwitch('', 'Full', '   Print full list of requests')
Script.setUsageMessage('\n'.join([__doc__, 'Usage:', ' %s [option|cfgfile]' % Script.scriptName]))
parseCommandLine()
from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient


if __name__ == "__main__":

  fullPrint = False

  for switch in Script.getUnprocessedSwitches():
    if switch[0] == 'Full':
      fullPrint = True

  reqClient = ReqClient()

  for server, rpcClient in reqClient.requestProxies().iteritems():
    DIRAC.gLogger.always("Checking request cache at %s" % server)
    reqCache = rpcClient.listCacheDir()
    if not reqCache['OK']:
      DIRAC.gLogger.error("Cannot list request cache", reqCache)
      continue
    reqCache = reqCache['Value']

    if fullPrint:
      DIRAC.gLogger.always("List of requests", reqCache)
    else:
      DIRAC.gLogger.always("Number of requests in the cache", len(reqCache))

  DIRAC.exit(0)
Beispiel #55
0
class RequestTasks(TaskBase):
    def __init__(self,
                 transClient=None,
                 logger=None,
                 requestClient=None,
                 requestClass=None,
                 requestValidator=None):
        """ c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
    """

        if not logger:
            logger = gLogger.getSubLogger('RequestTasks')

        super(RequestTasks, self).__init__(transClient, logger)

        if not requestClient:
            self.requestClient = ReqClient()
        else:
            self.requestClient = requestClient

        if not requestClass:
            self.requestClass = Request
        else:
            self.requestClass = requestClass

        if not requestValidator:
            self.requestValidator = RequestValidator()
        else:
            self.requestValidator = requestValidator

    def prepareTransformationTasks(self,
                                   transBody,
                                   taskDict,
                                   owner='',
                                   ownerGroup='',
                                   ownerDN=''):
        """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB
    """
        if (not owner) or (not ownerGroup):
            res = getProxyInfo(False, False)
            if not res['OK']:
                return res
            proxyInfo = res['Value']
            owner = proxyInfo['username']
            ownerGroup = proxyInfo['group']

        if not ownerDN:
            res = getDNForUsername(owner)
            if not res['OK']:
                return res
            ownerDN = res['Value'][0]

        requestOperation = 'ReplicateAndRegister'
        if transBody:
            try:
                _requestType, requestOperation = transBody.split(';')
            except AttributeError:
                pass

        for taskID in sorted(taskDict):
            paramDict = taskDict[taskID]
            if paramDict['InputData']:
                transID = paramDict['TransformationID']

                oRequest = Request()
                transfer = Operation()
                transfer.Type = requestOperation
                transfer.TargetSE = paramDict['TargetSE']

                if isinstance(paramDict['InputData'], list):
                    files = paramDict['InputData']
                elif isinstance(paramDict['InputData'], basestring):
                    files = paramDict['InputData'].split(';')
                for lfn in files:
                    trFile = File()
                    trFile.LFN = lfn

                    transfer.addFile(trFile)

                oRequest.addOperation(transfer)
                oRequest.RequestName = _requestName(transID, taskID)
                oRequest.OwnerDN = ownerDN
                oRequest.OwnerGroup = ownerGroup

            isValid = self.requestValidator.validate(oRequest)
            if not isValid['OK']:
                return isValid

            taskDict[taskID]['TaskObject'] = oRequest

        return S_OK(taskDict)

    def submitTransformationTasks(self, taskDict):
        """ Submit requests one by one
    """
        submitted = 0
        failed = 0
        startTime = time.time()
        for taskID in sorted(taskDict):
            if not taskDict[taskID]['TaskObject']:
                taskDict[taskID]['Success'] = False
                failed += 1
                continue
            res = self.submitTaskToExternal(taskDict[taskID]['TaskObject'])
            if res['OK']:
                taskDict[taskID]['ExternalID'] = res['Value']
                taskDict[taskID]['Success'] = True
                submitted += 1
            else:
                self._logError("Failed to submit task to RMS", res['Message'])
                taskDict[taskID]['Success'] = False
                failed += 1
        self._logInfo(
            'submitTasks: Submitted %d tasks to RMS in %.1f seconds' %
            (submitted, time.time() - startTime))
        if failed:
            self._logWarn(
                'submitTasks: But at the same time failed to submit %d tasks to RMS.'
                % (failed))
        return S_OK(taskDict)

    def submitTaskToExternal(self, oRequest):
        """ Submits a request using ReqClient
    """
        if isinstance(oRequest, self.requestClass):
            return self.requestClient.putRequest(oRequest)
        else:
            return S_ERROR("Request should be a Request object")

    def updateTransformationReservedTasks(self, taskDicts):
        requestNameIDs = {}
        noTasks = []
        for taskDict in taskDicts:
            requestName = _requestName(taskDict['TransformationID'],
                                       taskDict['TaskID'])

            reqID = taskDict['ExternalID']

            if reqID:
                requestNameIDs[requestName] = reqID
            else:
                noTasks.append(requestName)
        return S_OK({'NoTasks': noTasks, 'TaskNameIDs': requestNameIDs})

    def getSubmittedTaskStatus(self, taskDicts):
        updateDict = {}

        for taskDict in taskDicts:
            oldStatus = taskDict['ExternalStatus']

            newStatus = self.requestClient.getRequestStatus(
                taskDict['ExternalID'])
            if not newStatus['OK']:
                log = self._logVerbose if 'not exist' in newStatus[
                    'Message'] else self.log.warn
                log(
                    "getSubmittedTaskStatus: Failed to get requestID for request",
                    '%s' % newStatus['Message'])
            else:
                newStatus = newStatus['Value']
                if newStatus != oldStatus:
                    updateDict.setdefault(newStatus,
                                          []).append(taskDict['TaskID'])
        return S_OK(updateDict)

    def getSubmittedFileStatus(self, fileDicts):
        taskFiles = {}
        submittedTasks = {}
        externalIds = {}
        # Don't try and get status of not submitted tasks!
        for fileDict in fileDicts:
            submittedTasks.setdefault(fileDict['TransformationID'],
                                      set()).add(int(fileDict['TaskID']))
        for transID in submittedTasks:
            res = self.transClient.getTransformationTasks({
                'TransformationID':
                transID,
                'TaskID':
                list(submittedTasks[transID])
            })
            if not res['OK']:
                return res
            for taskDict in res['Value']:
                taskID = taskDict['TaskID']
                externalIds[taskID] = taskDict['ExternalID']
                if taskDict['ExternalStatus'] == 'Created':
                    submittedTasks[transID].remove(taskID)

        for fileDict in fileDicts:
            transID = fileDict['TransformationID']
            taskID = int(fileDict['TaskID'])
            if taskID in submittedTasks[transID]:
                requestID = externalIds[taskID]
                taskFiles.setdefault(requestID,
                                     {})[fileDict['LFN']] = fileDict['Status']

        updateDict = {}
        for requestID in sorted(taskFiles):
            lfnDict = taskFiles[requestID]
            statusDict = self.requestClient.getRequestFileStatus(
                requestID, lfnDict.keys())
            if not statusDict['OK']:
                log = self._logVerbose if 'not exist' in statusDict[
                    'Message'] else self.log.warn
                log(
                    "getSubmittedFileStatus: Failed to get files status for request",
                    '%s' % statusDict['Message'])
                continue

            statusDict = statusDict['Value']
            for lfn, newStatus in statusDict.items():
                if newStatus == lfnDict[lfn]:
                    pass
                elif newStatus == 'Done':
                    updateDict[lfn] = 'Processed'
                elif newStatus == 'Failed':
                    updateDict[lfn] = 'Problematic'
        return S_OK(updateDict)
Beispiel #56
0
class FTS3Operation(FTS3Serializable):
    """ Abstract class to represent an operation to be executed by FTS. It is a
      container for FTSFiles, as well as for FTSJobs.

      There can be a mapping between one FTS3Operation and one RMS Operation.

      The FTS3Operation takes care of generating the appropriate FTSJobs,
      and to perform a callback when the work with FTS is over. The actual
      generation and callback depends on the subclass.

      This class should not be instantiated directly, but rather one of its
      subclass
  """

    ALL_STATES = [
        'Active',  # Default state until FTS has done everything
        'Processed',  # Interactions with FTS done, but callback not done
        'Finished',  # Everything was done
        'Canceled',  # Canceled by the user
        'Failed',  # I don't know yet
    ]
    FINAL_STATES = ['Finished', 'Canceled', 'Failed']
    INIT_STATE = 'Active'

    _attrToSerialize = [
        'operationID', 'username', 'userGroup', 'rmsReqID', 'rmsOpID',
        'sourceSEs', 'ftsFiles', 'activity', 'priority', 'ftsJobs',
        'creationTime', 'lastUpdate', 'error', 'status'
    ]

    def __init__(self,
                 ftsFiles=None,
                 username=None,
                 userGroup=None,
                 rmsReqID=-1,
                 rmsOpID=0,
                 sourceSEs=None,
                 activity=None,
                 priority=None):
        """
        :param ftsFiles: list of FTS3Files object that belongs to the operation
        :param username: username whose proxy should be used
        :param userGroup: group that should be used with username
        :param rmsReqID: ID of the Request in the RMS system
        :param rmsOpID: ID of the Operation in the RMS system
        :param sourceSEs: list of SE to be used as source (if applicable)
        :param activity: FTS activity to use
        :param priority: FTS priority to use

    """
        ############################
        # persistent attributes

        self.username = username
        self.userGroup = userGroup

        self.rmsReqID = rmsReqID
        self.rmsOpID = rmsOpID

        if isinstance(sourceSEs, list):
            sourceSEs = ','.join(sourceSEs)

        self.sourceSEs = sourceSEs

        self.ftsFiles = ftsFiles if ftsFiles else []

        self.activity = activity
        self.priority = priority

        self.ftsJobs = []

        now = datetime.datetime.utcnow().replace(microsecond=0)

        self.creationTime = now
        self.lastUpdate = now
        self.error = None
        self.status = FTS3Operation.INIT_STATE

        ########################

        self.reqClient = None
        self.dManager = None
        self._log = None
        self.init_on_load()

    @orm.reconstructor
    def init_on_load(self):
        """ This method initializes some attributes.
        It is called by sqlalchemy (which does not call __init__)
    """
        self._vo = None

        self.dManager = DataManager()
        self.rssClient = ResourceStatus()

        opID = getattr(self, 'operationID', None)
        loggerName = '%s/' % opID if opID else ''
        loggerName += 'req_%s/op_%s' % (self.rmsReqID, self.rmsOpID)

        self._log = gLogger.getSubLogger(loggerName, True)

    @property
    def vo(self):
        """:returns: return vo of the usergroup """
        if self._vo:
            return self._vo

        if self.userGroup:
            self._vo = getVOForGroup(self.userGroup)

        return self._vo

    def isTotallyProcessed(self):
        """ Returns True if and only if there is nothing
        else to be done by FTS for this operation.
        All files are successful or definitely failed
    """

        if self.status == 'Processed':
            return True

        fileStatuses = set([f.status for f in self.ftsFiles])

        # If all the files are in a final state
        if fileStatuses <= set(FTS3File.FINAL_STATES):
            self.status = 'Processed'
            return True

        return False

    def _getFilesToSubmit(self, maxAttemptsPerFile=10):
        """ Return the list of FTS3files that can be submitted
        Either because they never were submitted, or because
        we can make more attempts

        :param maxAttemptsPerFile: the maximum number of attempts to be tried for a file

        :return: List of FTS3File to submit
    """

        toSubmit = []

        for ftsFile in self.ftsFiles:
            if ftsFile.attempt >= maxAttemptsPerFile:
                ftsFile.status = 'Defunct'
            # The file was never submitted or
            # The file failed from the point of view of FTS
            # but no more than the maxAttemptsPerFile
            elif ftsFile.status in [FTS3File.INIT_STATE
                                    ] + FTS3File.FTS_FAILED_STATES:
                toSubmit.append(ftsFile)

        return toSubmit

    @staticmethod
    def _checkSEAccess(seName, accessType, vo=None):
        """Check the Status of a storage element

        :param seName: name of the StorageElement
        :param accessType ReadAccess, WriteAccess,CheckAccess,RemoveAccess

        :return: S_ERROR if not allowed or error, S_OK() otherwise
    """
        # Check that the target is writable
        # access = self.rssClient.getStorageElementStatus( seName, accessType )
        # if not access["OK"]:
        #   return access
        # if access["Value"][seName][accessType] not in ( "Active", "Degraded" ):
        #   return S_ERROR( "%s does not have %s in Active or Degraded" % ( seName, accessType ) )

        status = StorageElement(seName, vo=vo).getStatus()
        if not status['OK']:
            return status

        status = status['Value']

        accessType = accessType.replace('Access', '')
        if not status[accessType]:
            return S_ERROR(
                errno.EACCES, "%s does not have %s in Active or Degraded" %
                (seName, accessType))

        return S_OK()

    def _createNewJob(self, jobType, ftsFiles, targetSE, sourceSE=None):
        """ Create a new FTS3Job object

        :param jobType: type of job to create (Transfer, Staging, Removal)
        :param ftsFiles: list of FTS3File objects the job has to work on
        :param targetSE: SE on which to operate
        :param sourceSE: source SE, only useful for Transfer jobs

        :return: FTS3Job object
     """

        newJob = FTS3Job()
        newJob.type = jobType
        newJob.sourceSE = sourceSE
        newJob.targetSE = targetSE
        newJob.activity = self.activity
        newJob.priority = self.priority
        newJob.username = self.username
        newJob.userGroup = self.userGroup
        newJob.vo = self.vo
        newJob.filesToSubmit = ftsFiles
        newJob.operationID = getattr(self, 'operationID')

        return newJob

    def _callback(self):
        """Actually performs the callback
    """
        raise NotImplementedError("You should not be using the base class")

    def callback(self):
        """ Trigger the callback once all the FTS interactions are done
        and update the status of the Operation to 'Finished' if successful
    """
        self.reqClient = ReqClient()

        res = self._callback()

        if res['OK']:
            self.status = 'Finished'

        return res

    def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):
        """ Prepare the new jobs that have to be submitted

        :param maxFilesPerJob: maximum number of files assigned to a job
        :param maxAttemptsPerFile: maximum number of retry after an fts failure

        :return: list of jobs
    """
        raise NotImplementedError("You should not be using the base class")

    def _updateRmsOperationStatus(self):
        """ Update the status of the Files in the rms operation
          :return: S_OK with a dict:
                        * request: rms Request object
                        * operation: rms Operation object
                        * ftsFilesByTarget: dict {SE: [ftsFiles that were successful]}
    """

        log = self._log.getSubLogger(
            "_updateRmsOperationStatus/%s/%s" %
            (getattr(self, 'operationID'), self.rmsReqID),
            child=True)

        res = self.reqClient.getRequest(self.rmsReqID)
        if not res['OK']:
            return res

        request = res['Value']

        res = request.getWaiting()

        if not res["OK"]:
            log.error("Unable to find 'Scheduled' operation in request")
            res = self.reqClient.putRequest(request,
                                            useFailoverProxy=False,
                                            retryMainService=3)
            if not res['OK']:
                log.error("Could not put back the request !", res['Message'])
            return S_ERROR("Could not find scheduled operation")

        operation = res['Value']

        # We index the files of the operation by their IDs
        rmsFileIDs = {}

        for opFile in operation:
            rmsFileIDs[opFile.FileID] = opFile

        # Files that failed to transfer
        defunctRmsFileIDs = set()

        # { SE : [FTS3Files] }
        ftsFilesByTarget = {}
        for ftsFile in self.ftsFiles:

            if ftsFile.status == 'Defunct':
                log.info(
                    "File failed to transfer, setting it to failed in RMS",
                    "%s %s" % (ftsFile.lfn, ftsFile.targetSE))
                defunctRmsFileIDs.add(ftsFile.rmsFileID)
                continue

            if ftsFile.status == 'Canceled':
                log.info("File canceled, setting it Failed in RMS",
                         "%s %s" % (ftsFile.lfn, ftsFile.targetSE))
                defunctRmsFileIDs.add(ftsFile.rmsFileID)
                continue

            # SHOULD NEVER HAPPEN !
            if ftsFile.status != 'Finished':
                log.error("Callback called with file in non terminal state",
                          "%s %s" % (ftsFile.lfn, ftsFile.targetSE))
                res = self.reqClient.putRequest(request,
                                                useFailoverProxy=False,
                                                retryMainService=3)
                if not res['OK']:
                    log.error("Could not put back the request !",
                              res['Message'])
                return S_ERROR(
                    "Callback called with file in non terminal state")

            ftsFilesByTarget.setdefault(ftsFile.targetSE, []).append(ftsFile)

        # Now, we set the rmsFile as done in the operation, providing
        # that they are not in the defunctFiles.
        # We cannot do this in the previous list because in the FTS system,
        # each destination is a separate line in the DB but not in the RMS

        for ftsFile in self.ftsFiles:
            opFile = rmsFileIDs[ftsFile.rmsFileID]

            opFile.Status = 'Failed' if ftsFile.rmsFileID in defunctRmsFileIDs else 'Done'

        return S_OK({
            'request': request,
            'operation': operation,
            'ftsFilesByTarget': ftsFilesByTarget
        })

    @classmethod
    def fromRMSObjects(cls, rmsReq, rmsOp, username):
        """ Construct an FTS3Operation object from the RMS Request and Operation corresponding.
        The attributes taken are the OwnerGroup, Request and Operation IDS, sourceSE,
        and activity and priority if they are defined in the Argument field of the operation

        :param rmsReq: RMS Request object
        :param rmsOp: RMS Operation object
        :param username: username to which associate the FTS3Operation (normally comes from the Req OwnerDN)

        :returns: FTS3Operation object
    """

        ftsOp = cls()
        ftsOp.username = username
        ftsOp.userGroup = rmsReq.OwnerGroup

        ftsOp.rmsReqID = rmsReq.RequestID
        ftsOp.rmsOpID = rmsOp.OperationID

        ftsOp.sourceSEs = rmsOp.SourceSE

        try:
            argumentDic = json.loads(rmsOp.Arguments)

            ftsOp.activity = argumentDic['activity']
            ftsOp.priority = argumentDic['priority']
        except Exception as _e:
            pass

        return ftsOp
                                     'Arguments:',
                                     ' requestName: a request name' ] ) )
# # execution
if __name__ == "__main__":

  from DIRAC.Core.Base.Script import parseCommandLine
  parseCommandLine()

  import DIRAC
  from DIRAC import gLogger
  resetFailed = False
  requests = []
  jobs = []
  all = False
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  reqClient = ReqClient()
  for switch in Script.getUnprocessedSwitches():
    if switch[0] == 'Failed':
      resetFailed = True
    elif switch[0] == 'All':
      all = True
    elif switch[0] == 'Maximum':
      try:
        maxReset = int( switch[1] )
      except:
        pass
    elif switch[0] == 'Job':
      try:
        jobs = [int( job ) for job in switch[1].split( ',' )]
      except:
        print "Invalid jobID", switch[1]
Beispiel #58
0
class RequestTask(object):
    """
  .. class:: RequestTask

  request's processing task
  """
    def __init__(self,
                 requestJSON,
                 handlersDict,
                 csPath,
                 agentName,
                 standalone=False,
                 requestClient=None):
        """c'tor

    :param self: self reference
    :param str requestJSON: request serialized to JSON
    :param dict opHandlers: operation handlers
    """
        self.request = Request(requestJSON)
        # # csPath
        self.csPath = csPath
        # # agent name
        self.agentName = agentName
        # # standalone flag
        self.standalone = standalone
        # # handlers dict
        self.handlersDict = handlersDict
        # # handlers class def
        self.handlers = {}
        # # own sublogger
        self.log = gLogger.getSubLogger(
            "pid_%s/%s" % (os.getpid(), self.request.RequestName))
        # # get shifters info
        self.__managersDict = {}
        shifterProxies = self.__setupManagerProxies()
        if not shifterProxies["OK"]:
            self.log.error(shifterProxies["Message"])

        # # initialize gMonitor
        gMonitor.setComponentType(gMonitor.COMPONENT_AGENT)
        gMonitor.setComponentName(self.agentName)
        gMonitor.initialize()

        # # own gMonitor activities
        gMonitor.registerActivity("RequestAtt", "Requests processed",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RequestFail", "Requests failed",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RequestOK", "Requests done",
                                  "RequestExecutingAgent", "Requests/min",
                                  gMonitor.OP_SUM)

        if requestClient is None:
            self.requestClient = ReqClient()
        else:
            self.requestClient = requestClient

    def __setupManagerProxies(self):
        """ setup grid proxy for all defined managers """
        oHelper = Operations()
        shifters = oHelper.getSections("Shifter")
        if not shifters["OK"]:
            self.log.error(shifters["Message"])
            return shifters
        shifters = shifters["Value"]
        for shifter in shifters:
            shifterDict = oHelper.getOptionsDict("Shifter/%s" % shifter)
            if not shifterDict["OK"]:
                self.log.error(shifterDict["Message"])
                continue
            userName = shifterDict["Value"].get("User", "")
            userGroup = shifterDict["Value"].get("Group", "")

            userDN = CS.getDNForUsername(userName)
            if not userDN["OK"]:
                self.log.error(userDN["Message"])
                continue
            userDN = userDN["Value"][0]
            vomsAttr = CS.getVOMSAttributeForGroup(userGroup)
            if vomsAttr:
                self.log.debug(
                    "getting VOMS [%s] proxy for shifter %s@%s (%s)" %
                    (vomsAttr, userName, userGroup, userDN))
                getProxy = gProxyManager.downloadVOMSProxyToFile(
                    userDN,
                    userGroup,
                    requiredTimeLeft=1200,
                    cacheTime=4 * 43200)
            else:
                self.log.debug("getting proxy for shifter %s@%s (%s)" %
                               (userName, userGroup, userDN))
                getProxy = gProxyManager.downloadProxyToFile(
                    userDN,
                    userGroup,
                    requiredTimeLeft=1200,
                    cacheTime=4 * 43200)
            if not getProxy["OK"]:
                self.log.error(getProxy["Message"])
                return S_ERROR("unable to setup shifter proxy for %s: %s" %
                               (shifter, getProxy["Message"]))
            chain = getProxy["chain"]
            fileName = getProxy["Value"]
            self.log.debug("got %s: %s %s" % (shifter, userName, userGroup))
            self.__managersDict[shifter] = {
                "ShifterDN": userDN,
                "ShifterName": userName,
                "ShifterGroup": userGroup,
                "Chain": chain,
                "ProxyFile": fileName
            }
        return S_OK()

    def setupProxy(self):
        """ download and dump request owner proxy to file and env

    :return: S_OK with name of newly created owner proxy file and shifter name if any
    """
        self.__managersDict = {}
        shifterProxies = self.__setupManagerProxies()
        if not shifterProxies["OK"]:
            self.log.error(shifterProxies["Message"])

        ownerDN = self.request.OwnerDN
        ownerGroup = self.request.OwnerGroup
        isShifter = []
        for shifter, creds in self.__managersDict.items():
            if creds["ShifterDN"] == ownerDN and creds[
                    "ShifterGroup"] == ownerGroup:
                isShifter.append(shifter)
        if isShifter:
            proxyFile = self.__managersDict[isShifter[0]]["ProxyFile"]
            os.environ["X509_USER_PROXY"] = proxyFile
            return S_OK({"Shifter": isShifter, "ProxyFile": proxyFile})

        # # if we're here owner is not a shifter at all
        ownerProxyFile = gProxyManager.downloadVOMSProxyToFile(
            ownerDN, ownerGroup)
        if not ownerProxyFile["OK"] or not ownerProxyFile["Value"]:
            reason = ownerProxyFile.get(
                "Message", "No valid proxy found in ProxyManager.")
            return S_ERROR("Change proxy error for '%s'@'%s': %s" %
                           (ownerDN, ownerGroup, reason))

        ownerProxyFile = ownerProxyFile["Value"]
        os.environ["X509_USER_PROXY"] = ownerProxyFile
        return S_OK({"Shifter": isShifter, "ProxyFile": ownerProxyFile})

    @staticmethod
    def getPluginName(pluginPath):
        if not pluginPath:
            return ''
        if "/" in pluginPath:
            pluginPath = ".".join(
                [chunk for chunk in pluginPath.split("/") if chunk])
        return pluginPath.split(".")[-1]

    @staticmethod
    def loadHandler(pluginPath):
        """ Create an instance of requested plugin class, loading and importing it when needed.
    This function could raise ImportError when plugin cannot be find or TypeError when
    loaded class object isn't inherited from BaseOperation class.

    :param str pluginName: dotted path to plugin, specified as in import statement, i.e.
        "DIRAC.CheesShopSystem.private.Cheddar" or alternatively in 'normal' path format
        "DIRAC/CheesShopSystem/private/Cheddar"

    :return: object instance

    This function try to load and instantiate an object from given path. It is assumed that:

      * `pluginPath` is pointing to module directory "importable" by python interpreter, i.e.: it's
        package's top level directory is in $PYTHONPATH env variable,
      * the module should consist a class definition following module name,
      *  the class itself is inherited from DIRAC.RequestManagementSystem.private.BaseOperation.BaseOperation

    If above conditions aren't meet, function is throwing exceptions:

    :raises ImportError: when class cannot be imported
    :raises TypeError: when class isn't inherited from OperationHandlerBase
    """
        if "/" in pluginPath:
            pluginPath = ".".join(
                [chunk for chunk in pluginPath.split("/") if chunk])
        pluginName = pluginPath.split(".")[-1]
        if pluginName not in globals():
            mod = __import__(pluginPath, globals(), fromlist=[pluginName])
            pluginClassObj = getattr(mod, pluginName)
        else:
            pluginClassObj = globals()[pluginName]
        if not issubclass(pluginClassObj, OperationHandlerBase):
            raise TypeError(
                "operation handler '%s' isn't inherited from OperationHandlerBase class"
                % pluginName)
        for key, status in (("Att", "Attempted"), ("OK", "Successful"),
                            ("Fail", "Failed")):
            gMonitor.registerActivity(
                "%s%s" % (pluginName, key),
                "%s operations %s" % (pluginName, status),
                "RequestExecutingAgent", "Operations/min", gMonitor.OP_SUM)
        # # return an instance
        return pluginClassObj

    def getHandler(self, operation):
        """ return instance of a handler for a given operation type on demand
        all created handlers are kept in self.handlers dict for further use

    :param ~Operation.Operation operation: Operation instance
    """
        if operation.Type not in self.handlersDict:
            return S_ERROR("handler for operation '%s' not set" %
                           operation.Type)
        handler = self.handlers.get(operation.Type, None)
        if not handler:
            try:
                handlerCls = self.loadHandler(
                    self.handlersDict[operation.Type])
                self.handlers[operation.Type] = handlerCls(
                    csPath="%s/OperationHandlers/%s" %
                    (self.csPath, operation.Type))
                handler = self.handlers[operation.Type]
            except (ImportError, TypeError) as error:
                self.log.exception("getHandler: %s" % str(error),
                                   lException=error)
                return S_ERROR(str(error))
        # # set operation for this handler
        handler.setOperation(operation)
        # # and return
        return S_OK(handler)

    def updateRequest(self):
        """ put back request to the RequestDB """
        updateRequest = self.requestClient.putRequest(self.request,
                                                      useFailoverProxy=False,
                                                      retryMainService=2)
        if not updateRequest["OK"]:
            self.log.error(updateRequest["Message"])
        return updateRequest

    def __call__(self):
        """ request processing """

        self.log.debug("about to execute request")
        gMonitor.addMark("RequestAtt", 1)

        # # setup proxy for request owner
        setupProxy = self.setupProxy()
        if not setupProxy["OK"]:
            self.request.Error = setupProxy["Message"]
            if 'has no proxy registered' in setupProxy["Message"]:
                self.log.error('Request set to Failed:', setupProxy["Message"])
                # If user is no longer registered, fail the request
                for operation in self.request:
                    for opFile in operation:
                        opFile.Status = 'Failed'
                    operation.Status = 'Failed'
            else:
                self.log.error(setupProxy["Message"])
            return S_OK(self.request)
        shifter = setupProxy["Value"]["Shifter"]
        proxyFile = setupProxy["Value"]["ProxyFile"]

        error = None
        while self.request.Status == "Waiting":

            # # get waiting operation
            operation = self.request.getWaiting()
            if not operation["OK"]:
                self.log.error(operation["Message"])
                return operation
            operation = operation["Value"]
            self.log.info("executing operation #%s '%s'" %
                          (operation.Order, operation.Type))

            # # and handler for it
            handler = self.getHandler(operation)
            if not handler["OK"]:
                self.log.error("unable to process operation %s: %s" %
                               (operation.Type, handler["Message"]))
                # gMonitor.addMark( "%s%s" % ( operation.Type, "Fail" ), 1 )
                operation.Error = handler["Message"]
                break

            handler = handler["Value"]
            # # set shifters list in the handler
            handler.shifter = shifter
            # # and execute
            pluginName = self.getPluginName(
                self.handlersDict.get(operation.Type))
            if self.standalone:
                useServerCertificate = gConfig.useServerCertificate()
            else:
                # Always use server certificates if executed within an agent
                useServerCertificate = True
            try:
                if pluginName:
                    gMonitor.addMark("%s%s" % (pluginName, "Att"), 1)
                # Always use request owner proxy
                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        '/DIRAC/Security/UseServerCertificate', 'false')
                exe = handler()
                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        '/DIRAC/Security/UseServerCertificate', 'true')
                if not exe["OK"]:
                    self.log.error("unable to process operation %s: %s" %
                                   (operation.Type, exe["Message"]))
                    if pluginName:
                        gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
                    gMonitor.addMark("RequestFail", 1)
                    if self.request.JobID:
                        # Check if the job exists
                        monitorServer = RPCClient(
                            "WorkloadManagement/JobMonitoring",
                            useCertificates=True)
                        res = monitorServer.getJobPrimarySummary(
                            int(self.request.JobID))
                        if not res["OK"]:
                            self.log.error(
                                "RequestTask: Failed to get job %d status" %
                                self.request.JobID)
                        elif not res['Value']:
                            self.log.warn(
                                "RequestTask: job %d does not exist (anymore): failed request"
                                % self.request.JobID)
                            for opFile in operation:
                                opFile.Status = 'Failed'
                            if operation.Status != 'Failed':
                                operation.Status = 'Failed'
                            self.request.Error = 'Job no longer exists'
            except Exception as error:
                self.log.exception("hit by exception: %s" % str(error))
                if pluginName:
                    gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
                gMonitor.addMark("RequestFail", 1)
                if useServerCertificate:
                    gConfigurationData.setOptionInCFG(
                        '/DIRAC/Security/UseServerCertificate', 'true')
                break

            # # operation status check
            if operation.Status == "Done" and pluginName:
                gMonitor.addMark("%s%s" % (pluginName, "OK"), 1)
            elif operation.Status == "Failed" and pluginName:
                gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1)
            elif operation.Status in ("Waiting", "Scheduled"):
                # # no update for waiting or all files scheduled
                break

        gMonitor.flush()

        if error:
            return S_ERROR(error)

        # # request done?
        if self.request.Status == "Done":
            # # update request to the RequestDB
            self.log.info('updating request with status %s' %
                          self.request.Status)
            update = self.updateRequest()
            if not update["OK"]:
                self.log.error(update["Message"])
                return update
            self.log.info("request '%s' is done" % self.request.RequestName)
            gMonitor.addMark("RequestOK", 1)
            # # and there is a job waiting for it? finalize!
            if self.request.JobID:
                attempts = 0
                while True:
                    finalizeRequest = self.requestClient.finalizeRequest(
                        self.request.RequestID, self.request.JobID)  # pylint: disable=no-member
                    if not finalizeRequest["OK"]:
                        if not attempts:
                            self.log.error(
                                "unable to finalize request %s: %s, will retry"
                                % (self.request.RequestName,
                                   finalizeRequest["Message"]))
                        self.log.verbose("Waiting 10 seconds")
                        attempts += 1
                        if attempts == 10:
                            self.log.error(
                                "giving up finalize request after %d attempts"
                                % attempts)
                            return S_ERROR('Could not finalize request')

                        time.sleep(10)

                    else:
                        self.log.info("request '%s' is finalized%s" %
                                      (self.request.RequestName,
                                       (' after %d attempts' %
                                        attempts) if attempts else ''))
                        break

        # Request will be updated by the callBack method
        self.log.verbose("RequestTasks exiting, request %s" %
                         self.request.Status)
        return S_OK(self.request)
Beispiel #59
0
class RequestTasks(TaskBase):
    """
    Class for handling tasks for the RMS
    """
    def __init__(
        self,
        transClient=None,
        logger=None,
        requestClient=None,
        requestClass=None,
        requestValidator=None,
        ownerDN=None,
        ownerGroup=None,
    ):
        """c'tor

        the requestClass is by default Request.
        If extensions want to use an extended type, they can pass it as a parameter.
        This is the same behavior as WorfkloTasks and jobClass
        """

        if not logger:
            logger = gLogger.getSubLogger(self.__class__.__name__)

        super(RequestTasks, self).__init__(transClient, logger)
        useCertificates = True if (bool(ownerDN)
                                   and bool(ownerGroup)) else False

        if not requestClient:
            self.requestClient = ReqClient(useCertificates=useCertificates,
                                           delegatedDN=ownerDN,
                                           delegatedGroup=ownerGroup)
        else:
            self.requestClient = requestClient

        if not requestClass:
            self.requestClass = Request
        else:
            self.requestClass = requestClass

        if not requestValidator:
            self.requestValidator = RequestValidator()
        else:
            self.requestValidator = requestValidator

    def prepareTransformationTasks(self,
                                   transBody,
                                   taskDict,
                                   owner="",
                                   ownerGroup="",
                                   ownerDN="",
                                   bulkSubmissionFlag=False):
        """Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB"""
        if not taskDict:
            return S_OK({})

        if (not owner) or (not ownerGroup):
            res = getProxyInfo(False, False)
            if not res["OK"]:
                return res
            proxyInfo = res["Value"]
            owner = proxyInfo["username"]
            ownerGroup = proxyInfo["group"]

        if not ownerDN:
            res = getDNForUsername(owner)
            if not res["OK"]:
                return res
            ownerDN = res["Value"][0]

        try:
            transJson, _decLen = decode(transBody)

            if isinstance(transJson, BaseBody):
                self._bodyPlugins(transJson, taskDict, ownerDN, ownerGroup)
            else:
                self._multiOperationsBody(transJson, taskDict, ownerDN,
                                          ownerGroup)
        except ValueError:  # #json couldn't load
            self._singleOperationsBody(transBody, taskDict, ownerDN,
                                       ownerGroup)

        return S_OK(taskDict)

    def _multiOperationsBody(self, transJson, taskDict, ownerDN, ownerGroup):
        """Deal with a Request that has multiple operations

        :param transJson: list of lists of string and dictionaries, e.g.:

          .. code :: python

            body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"TASK:TargetSE" }),
                     ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
                   ]

            If a value of an operation parameter in the body starts with ``TASK:``,
            we take it from the taskDict.
            For example ``TASK:TargetSE`` is replaced with ``task['TargetSE']``

        :param dict taskDict: dictionary of tasks, modified in this function
        :param str ownerDN: certificate DN used for the requests
        :param str onwerGroup: dirac group used for the requests

        :returns: None
        """
        for taskID, task in list(taskDict.items()):
            try:
                transID = task["TransformationID"]
                if not task.get("InputData"):
                    raise StopTaskIteration("No input data")
                files = []

                oRequest = Request()
                if isinstance(task["InputData"], list):
                    files = task["InputData"]
                elif isinstance(task["InputData"], six.string_types):
                    files = task["InputData"].split(";")

                # create the operations from the json structure
                for operationTuple in transJson:
                    op = Operation()
                    op.Type = operationTuple[0]
                    for parameter, value in operationTuple[1].items():
                        # Here we massage a bit the body to replace some parameters
                        # with what we have in the task.
                        try:
                            taskKey = value.split("TASK:")[1]
                            value = task[taskKey]
                        # Either the attribute is not a string (AttributeError)
                        # or it does not start with 'TASK:' (IndexError)
                        except (AttributeError, IndexError):
                            pass
                        # That happens when the requested substitution is not
                        # a key in the task, and that's a problem
                        except KeyError:
                            raise StopTaskIteration(
                                "Parameter %s does not exist in taskDict" %
                                taskKey)

                        setattr(op, parameter, value)

                    for lfn in files:
                        opFile = File()
                        opFile.LFN = lfn
                        op.addFile(opFile)

                    oRequest.addOperation(op)

                result = self._assignRequestToTask(oRequest, taskDict, transID,
                                                   taskID, ownerDN, ownerGroup)
                if not result["OK"]:
                    raise StopTaskIteration(
                        "Could not assign request to task: %s" %
                        result["Message"])
            except StopTaskIteration as e:
                self._logError("Error creating request for task",
                               "%s, %s" % (taskID, e),
                               transID=transID)
                taskDict.pop(taskID)

    def _singleOperationsBody(self, transBody, taskDict, ownerDN, ownerGroup):
        """deal with a Request that has just one operation, as it was sofar

        :param transBody: string, can be an empty string
        :param dict taskDict: dictionary of tasks, modified in this function
        :param str ownerDN: certificate DN used for the requests
        :param str onwerGroup: dirac group used for the requests

        :returns: None
        """

        requestOperation = "ReplicateAndRegister"
        if transBody:
            try:
                _requestType, requestOperation = transBody.split(";")
            except AttributeError:
                pass
        failedTasks = []
        # Do not remove sorted, we might pop elements in the loop
        for taskID, task in taskDict.items():

            transID = task["TransformationID"]

            oRequest = Request()
            transfer = Operation()
            transfer.Type = requestOperation
            transfer.TargetSE = task["TargetSE"]

            # If there are input files
            if task.get("InputData"):
                if isinstance(task["InputData"], list):
                    files = task["InputData"]
                elif isinstance(task["InputData"], six.string_types):
                    files = task["InputData"].split(";")
                for lfn in files:
                    trFile = File()
                    trFile.LFN = lfn

                    transfer.addFile(trFile)

            oRequest.addOperation(transfer)
            result = self._assignRequestToTask(oRequest, taskDict, transID,
                                               taskID, ownerDN, ownerGroup)
            if not result["OK"]:
                failedTasks.append(taskID)
        # Remove failed tasks
        for taskID in failedTasks:
            taskDict.pop(taskID)

    def _bodyPlugins(self, bodyObj, taskDict, ownerDN, ownerGroup):
        """Deal with complex body object"""
        for taskID, task in list(taskDict.items()):
            try:
                transID = task["TransformationID"]
                if not task.get("InputData"):
                    raise StopTaskIteration("No input data")

                oRequest = bodyObj.taskToRequest(taskID, task, transID)
                result = self._assignRequestToTask(oRequest, taskDict, transID,
                                                   taskID, ownerDN, ownerGroup)
                if not result["OK"]:
                    raise StopTaskIteration(
                        "Could not assign request to task: %s" %
                        result["Message"])
            except StopTaskIteration as e:
                self._logError("Error creating request for task",
                               "%s, %s" % (taskID, e),
                               transID=transID)
                taskDict.pop(taskID)

    def _assignRequestToTask(self, oRequest, taskDict, transID, taskID,
                             ownerDN, ownerGroup):
        """set ownerDN and group to request, and add the request to taskDict if it is
        valid, otherwise remove the task from the taskDict

        :param oRequest: Request
        :param dict taskDict: dictionary of tasks, modified in this function
        :param int transID: Transformation ID
        :param int taskID: Task ID
        :param str ownerDN: certificate DN used for the requests
        :param str onwerGroup: dirac group used for the requests

        :returns: None
        """

        oRequest.RequestName = self._transTaskName(transID, taskID)
        oRequest.OwnerDN = ownerDN
        oRequest.OwnerGroup = ownerGroup

        isValid = self.requestValidator.validate(oRequest)
        if not isValid["OK"]:
            self._logError("Error creating request for task",
                           "%s %s" % (taskID, isValid),
                           transID=transID)
            return S_ERROR("Error creating request")
        taskDict[taskID]["TaskObject"] = oRequest
        return S_OK()

    def submitTransformationTasks(self, taskDict):
        """Submit requests one by one"""
        submitted = 0
        failed = 0
        startTime = time.time()
        method = "submitTransformationTasks"
        for task in taskDict.values():
            # transID is the same for all tasks, so pick it up every time here
            transID = task["TransformationID"]
            if not task["TaskObject"]:
                task["Success"] = False
                failed += 1
                continue
            res = self.submitTaskToExternal(task["TaskObject"])
            if res["OK"]:
                task["ExternalID"] = res["Value"]
                task["Success"] = True
                submitted += 1
            else:
                self._logError("Failed to submit task to RMS",
                               res["Message"],
                               transID=transID)
                task["Success"] = False
                failed += 1
        if submitted:
            self._logInfo(
                "Submitted %d tasks to RMS in %.1f seconds" %
                (submitted, time.time() - startTime),
                transID=transID,
                method=method,
            )
        if failed:
            self._logWarn("Failed to submit %d tasks to RMS." % (failed),
                          transID=transID,
                          method=method)
        return S_OK(taskDict)

    def submitTaskToExternal(self, oRequest):
        """
        Submits a request to RMS
        """
        if isinstance(oRequest, self.requestClass):
            return self.requestClient.putRequest(oRequest,
                                                 useFailoverProxy=False,
                                                 retryMainService=2)
        return S_ERROR("Request should be a Request object")

    def updateTransformationReservedTasks(self, taskDicts):
        requestNameIDs = {}
        noTasks = []
        for taskDict in taskDicts:
            requestName = self._transTaskName(taskDict["TransformationID"],
                                              taskDict["TaskID"])
            reqID = taskDict["ExternalID"]
            if reqID and int(reqID):
                requestNameIDs[requestName] = reqID
            else:
                noTasks.append(requestName)
        return S_OK({"NoTasks": noTasks, "TaskNameIDs": requestNameIDs})

    def getSubmittedTaskStatus(self, taskDicts):
        """
        Check if tasks changed status, and return a list of tasks per new status
        """
        updateDict = {}
        badRequestID = 0
        for taskDict in taskDicts:
            oldStatus = taskDict["ExternalStatus"]
            # ExternalID is normally a string
            if taskDict["ExternalID"] and int(taskDict["ExternalID"]):
                newStatus = self.requestClient.getRequestStatus(
                    taskDict["ExternalID"])
                if not newStatus["OK"]:
                    log = self._logVerbose if "not exist" in newStatus[
                        "Message"] else self._logWarn
                    log(
                        "getSubmittedTaskStatus: Failed to get requestID for request",
                        newStatus["Message"],
                        transID=taskDict["TransformationID"],
                    )
                else:
                    newStatus = newStatus["Value"]
                    # We don't care updating the tasks to Assigned while the request is being processed
                    if newStatus != oldStatus and newStatus != "Assigned":
                        updateDict.setdefault(newStatus,
                                              []).append(taskDict["TaskID"])
            else:
                badRequestID += 1
        if badRequestID:
            self._logWarn("%d requests have identifier 0" % badRequestID)
        return S_OK(updateDict)

    def getSubmittedFileStatus(self, fileDicts):
        """
        Check if transformation files changed status, and return a list of taskIDs per new status
        """
        # Don't try and get status of not submitted tasks!
        transID = None
        taskFiles = {}
        for fileDict in fileDicts:
            # There is only one transformation involved, get however the transID in the loop
            transID = fileDict["TransformationID"]
            taskID = int(fileDict["TaskID"])
            taskFiles.setdefault(taskID, []).append(fileDict["LFN"])
        # Should not happen, but just in case there are no files, return
        if transID is None:
            return S_OK({})

        res = self.transClient.getTransformationTasks({
            "TransformationID": transID,
            "TaskID": list(taskFiles)
        })
        if not res["OK"]:
            return res
        requestFiles = {}
        for taskDict in res["Value"]:
            taskID = taskDict["TaskID"]
            externalID = taskDict["ExternalID"]
            # Only consider tasks that are submitted, ExternalID is a string
            if taskDict["ExternalStatus"] != "Created" and externalID and int(
                    externalID):
                requestFiles[externalID] = taskFiles[taskID]

        updateDict = {}
        for requestID, lfnList in requestFiles.items():
            statusDict = self.requestClient.getRequestFileStatus(
                requestID, lfnList)
            if not statusDict["OK"]:
                log = self._logVerbose if "not exist" in statusDict[
                    "Message"] else self._logWarn
                log(
                    "Failed to get files status for request",
                    statusDict["Message"],
                    transID=transID,
                    method="getSubmittedFileStatus",
                )
            else:
                for lfn, newStatus in statusDict["Value"].items():
                    if newStatus == "Done":
                        updateDict[lfn] = TransformationFilesStatus.PROCESSED
                    elif newStatus == "Failed":
                        updateDict[lfn] = TransformationFilesStatus.PROBLEMATIC
        return S_OK(updateDict)
                                     'Usage:',
                                     ' %s [option|cfgfile] [requestName|requestID]' % Script.scriptName,
                                     'Arguments:',
                                     ' requestName: a request name' ] ) )
# # execution
if __name__ == "__main__":

  from DIRAC.Core.Base.Script import parseCommandLine
  parseCommandLine()

  import DIRAC
  resetFailed = False
  requestName = ''
  job = None
  from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient
  reqClient = ReqClient()
  for switch in Script.getUnprocessedSwitches():
    if switch[0] == 'Failed':
      resetFailed = True
    elif switch[0] == 'Maximum':
      try:
        maxReset = int( switch[1] )
      except:
        pass
    elif switch[0] == 'Job':
      try:
        job = int( switch[1] )
      except:
        print "Invalid jobID", switch[1]

  if not job: