Exemple #1
0
    def _tryFailoverTransfer(self, tarFileName, tarFileDir):
        """tries to upload the log tarBall to the failoverSE and creates moving request"""
        failoverTransfer = FailoverTransfer(self._getRequestContainer())
        ##determine the experiment
        self.failoverSEs = self.ops.getValue(
            "Production/%s/FailOverSE" % self.experiment, self.failoverSEs)
        catalogs = self.ops.getValue(
            'Production/%s/Catalogs' % self.experiment,
            ['FileCatalog', 'LcgFileCatalog'])

        random.shuffle(self.failoverSEs)
        self.log.info(
            "Attempting to store file %s to the following SE(s):\n%s" %
            (tarFileName, ', '.join(self.failoverSEs)))
        result = failoverTransfer.transferAndRegisterFile(
            tarFileName,
            '%s/%s' % (tarFileDir, tarFileName),
            self.logLFNPath,
            self.failoverSEs,
            fileMetaDict={"GUID": None},
            fileCatalog=catalogs)
        if not result['OK']:
            self.log.error('Failed to upload logs to all destinations')
            self.setApplicationStatus('Failed To Upload Logs')
            return S_OK(
            )  #because if the logs are lost, it's not the end of the world.

        #Now after all operations, return potentially modified request object
        return S_OK({
            'Request': failoverTransfer.request,
            'uploadedSE': result['Value']['uploadedSE']
        })
  def _tryFailoverTransfer(self, tarFileName, tarFileDir):
    """tries to upload the log tarBall to the failoverSE and creates moving request"""
    failoverTransfer = FailoverTransfer(self._getRequestContainer())
    ##determine the experiment
    self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs)
    catalogs = self.ops.getValue('Production/%s/Catalogs' % self.experiment, ['FileCatalog', 'LcgFileCatalog'])

    random.shuffle(self.failoverSEs)
    self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (tarFileName,
                                                                               ', '.join(self.failoverSEs )))
    result = failoverTransfer.transferAndRegisterFile(tarFileName, '%s/%s' % (tarFileDir, tarFileName), self.logLFNPath,
                                                      self.failoverSEs, fileMetaDict = { "GUID": None },
                                                      fileCatalog = catalogs )
    if not result['OK']:
      self.log.error('Failed to upload logs to all destinations')
      self.setApplicationStatus('Failed To Upload Logs')
      return S_OK() #because if the logs are lost, it's not the end of the world.

    #Now after all operations, return potentially modified request object
    return S_OK( {'Request': failoverTransfer.request, 'uploadedSE': result['Value']['uploadedSE']})
Exemple #3
0
    def execute(self,
                production_id=None,
                prod_job_id=None,
                wms_job_id=None,
                workflowStatus=None,
                stepStatus=None,
                wf_commons=None,
                step_commons=None,
                step_number=None,
                step_id=None,
                orderedSEs=None):
        """ Main execution function.
    """

        try:

            super(UserJobFinalization,
                  self).execute(self.version, production_id, prod_job_id,
                                wms_job_id, workflowStatus, stepStatus,
                                wf_commons, step_commons, step_number, step_id)

            self._resolveInputVariables()

            # Earlier modules may have populated the report objects
            self.request.RequestName = 'job_%d_request.xml' % self.jobID
            self.request.JobID = self.jobID
            self.request.SourceComponent = "Job_%d" % self.jobID

            if not self._checkWFAndStepStatus():
                return S_OK()

            if not self.userOutputData:
                self.log.info(
                    "No user output data is specified for this job, nothing to do"
                )
                return S_OK("No output data to upload")

            self.log.info("User specified output file list is: %s" %
                          (', '.join(self.userOutputData)))

            globList = []
            for i in self.userOutputData:
                if re.search('\*', i):
                    globList.append(i)

            # Check whether list of userOutputData is a globbable pattern
            if globList:
                for i in globList:
                    self.userOutputData.remove(i)

                globbedOutputList = list(set(getGlobbedFiles(globList)))
                if globbedOutputList:
                    self.log.info(
                        'Found a pattern in the output data file list, \
          extra files to upload are: %s' % (', '.join(globbedOutputList)))
                    self.userOutputData += globbedOutputList
                else:
                    self.log.info(
                        "No files were found on the local disk for the following patterns: %s"
                        % (', '.join(globList)))

            self.log.info("Final list of files to upload are: %s" %
                          (', '.join(self.userOutputData)))

            # Determine the final list of possible output files for the workflow and all the parameters needed to upload them.
            outputList = []
            for i in self.userOutputData:
                outputList.append({
                    'outputDataType': ('.'.split(i)[-1]).upper(),
                    'outputDataName': os.path.basename(i)
                })

            userOutputLFNs = []
            if self.userOutputData:
                self.log.info("Constructing user output LFN(s) for %s" %
                              (', '.join(self.userOutputData)))

                userOutputLFNs = constructUserLFNs(self.jobID,
                                                   self._getCurrentOwner(),
                                                   self.userOutputData,
                                                   self.userOutputPath,
                                                   self.userPrependString)

            self.log.verbose(
                "Calling getCandidateFiles( %s, %s, %s)" %
                (outputList, userOutputLFNs, self.outputDataFileMask))
            try:
                fileDict = self.getCandidateFiles(outputList, userOutputLFNs,
                                                  self.outputDataFileMask)
            except os.error as e:
                self.setApplicationStatus(e)
                return S_OK()

            try:
                fileMetadata = self.getFileMetadata(fileDict)
            except RuntimeError as e:
                self.setApplicationStatus(e)
                return S_OK()

            if not fileMetadata:
                self.log.info(
                    "No output data files were determined to be uploaded for this workflow"
                )
                self.setApplicationStatus('No Output Data Files To Upload')
                return S_OK()

            if not orderedSEs:
                orderedSEs = self._getOrderedSEsList()

            self.log.info("Ordered list of output SEs is: %s" %
                          (', '.join(orderedSEs)))
            final = {}
            for fileName, metadata in fileMetadata.items():
                final[fileName] = metadata
                final[fileName]['resolvedSE'] = orderedSEs

            # At this point can exit and see exactly what the module will upload
            if not self._enableModule():
                self.log.info(
                    "Module disabled would have attempted to upload the files %s"
                    % ', '.join(final.keys()))
                for fileName, metadata in final.items():
                    self.log.info('--------%s--------' % fileName)
                    for n, v in metadata.items():
                        self.log.info('%s = %s' % (n, v))

                return S_OK("Module is disabled by control flag")

            # Disable the watchdog check in case the file uploading takes a long time
            self._disableWatchdogCPUCheck()

            # Instantiate the failover transfer client with the global request object
            if not self.failoverTransfer:
                self.failoverTransfer = FailoverTransfer(self.request)

            # One by one upload the files with failover if necessary
            replication = {}
            failover = {}
            uploaded = []
            for fileName, metadata in final.items():
                self.log.info(
                    "Attempting to store %s to the following SE(s): %s" %
                    (fileName, ', '.join(metadata['resolvedSE'])))
                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID'],
                    'Checksum': metadata['filedict']['Checksum'],
                    'ChecksumType': metadata['filedict']['ChecksumType']
                }
                result = self.failoverTransfer.transferAndRegisterFile(
                    fileName=fileName,
                    localPath=metadata['localpath'],
                    lfn=metadata['filedict']['LFN'],
                    destinationSEList=metadata['resolvedSE'],
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register %s with metadata:\n %s"
                        % (fileName, metadata))
                    failover[fileName] = metadata
                else:
                    # Only attempt replication after successful upload
                    lfn = metadata['lfn']
                    uploaded.append(lfn)
                    seList = metadata['resolvedSE']
                    replicateSE = ''
                    uploadedSE = result['Value'].get('uploadedSE', '')
                    if uploadedSE:
                        for se in seList:
                            if not se == uploadedSE:
                                replicateSE = se
                                break

                    if replicateSE and lfn and self.replicateUserOutputData:
                        self.log.info("Will attempt to replicate %s to %s" %
                                      (lfn, replicateSE))
                        replication[lfn] = (uploadedSE, replicateSE,
                                            fileMetaDict)

            cleanUp = False
            for fileName, metadata in failover.items():
                random.shuffle(self.failoverSEs)
                targetSE = metadata['resolvedSE'][0]
                if len(metadata['resolvedSE']) > 1:
                    replicateSE = metadata['resolvedSE'][1]
                else:
                    replicateSE = ''
                metadata['resolvedSE'] = self.failoverSEs
                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID']
                }
                result = self.failoverTransfer.transferAndRegisterFileFailover(
                    fileName,
                    metadata['localpath'],
                    metadata['lfn'],
                    targetSE,
                    metadata['resolvedSE'],
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register %s with metadata:\n %s"
                        % (fileName, metadata))
                    cleanUp = True
                    continue  # for users can continue even if one completely fails
                else:
                    lfn = metadata['lfn']
                    uploaded.append(lfn)
                    # Even when using Failover, one needs to replicate to a second SE
                    if replicateSE and self.replicateUserOutputData:
                        replication[lfn] = (targetSE, replicateSE,
                                            fileMetaDict)

            # For files correctly uploaded must report LFNs to job parameters
            if uploaded:
                report = ', '.join(uploaded)
                self.setJobParameter('UploadedOutputData', report)

            # Now after all operations, retrieve potentially modified request object
            self.request = self.failoverTransfer.request

            # If some or all of the files failed to be saved to failover
            if cleanUp:
                self.workflow_commons['Request'] = self.request
                # Leave any uploaded files just in case it is useful for the user
                # do not try to replicate any files.
                return S_ERROR("Failed To Upload Output Data")

            for lfn, (uploadedSE, repSE,
                      fileMetaDictItem) in replication.items():
                self.failoverTransfer._setFileReplicationRequest(
                    lfn, repSE, fileMetaDictItem, uploadedSE)

            self.workflow_commons['Request'] = self.failoverTransfer.request

            self.generateFailoverFile()

            self.setApplicationStatus("Job Finished Successfully")

            return S_OK('Output data uploaded')

        except Exception as e:  # pylint:disable=broad-except
            self.log.exception("Failure in UserJobFinalization execute module",
                               lException=e)
            self.setApplicationStatus(repr(e))
            return S_ERROR(str(e))

        finally:
            super(UserJobFinalization, self).finalize(self.version)
Exemple #4
0
class UserJobFinalization(ModuleBase):
    """ Finalization of user jobs
  """

    #############################################################################
    def __init__(self, bkClient=None, dm=None):
        """Module initialization.
    """

        self.log = gLogger.getSubLogger("UserJobFinalization")
        super(UserJobFinalization, self).__init__(self.log,
                                                  bkClientIn=bkClient,
                                                  dm=dm)

        self.version = __RCSID__
        self.enable = True
        self.defaultOutputSE = resolveSEGroup(
            gConfig.getValue('/Resources/StorageElementGroups/Tier1-USER', []))
        self.failoverSEs = resolveSEGroup(
            gConfig.getValue('/Resources/StorageElementGroups/Tier1-Failover',
                             []))
        # List all parameters here
        self.request = None
        # Always allow any files specified by users
        self.outputDataFileMask = ''
        self.userOutputData = []
        self.userOutputSE = ''
        self.userOutputPath = ''
        self.failoverTransfer = None
        self.replicateUserOutputData = False
        self.userPrependString = ''

    #############################################################################
    def _resolveInputVariables(self):
        """ By convention the module parameters are resolved here.
    """
        super(UserJobFinalization, self)._resolveInputVariables()

        # Use LHCb utility for local running via dirac-jobexec
        if 'UserOutputData' in self.workflow_commons:
            userOutputData = self.workflow_commons['UserOutputData']
            if not isinstance(userOutputData, list):
                userOutputData = [i.strip() for i in userOutputData.split(';')]
            self.userOutputData = userOutputData

        if 'UserOutputSE' in self.workflow_commons:
            specifiedSE = self.workflow_commons['UserOutputSE']
            if not isinstance(specifiedSE, list):
                self.userOutputSE = [i.strip() for i in specifiedSE.split(';')]
        else:
            self.log.verbose(
                'No UserOutputSE specified, using default value: %s' %
                (', '.join(self.defaultOutputSE)))
            self.userOutputSE = []

        if 'UserOutputPath' in self.workflow_commons:
            self.userOutputPath = self.workflow_commons['UserOutputPath']

        if 'ReplicateUserOutputData' in self.workflow_commons and self.workflow_commons[
                'ReplicateUserOutputData']:
            self.replicateUserOutputData = True

        if 'UserOutputLFNPrepend' in self.workflow_commons:
            self.userPrependString = self.workflow_commons[
                'UserOutputLFNPrepend']

    #############################################################################

    def execute(self,
                production_id=None,
                prod_job_id=None,
                wms_job_id=None,
                workflowStatus=None,
                stepStatus=None,
                wf_commons=None,
                step_commons=None,
                step_number=None,
                step_id=None,
                orderedSEs=None):
        """ Main execution function.
    """

        try:

            super(UserJobFinalization,
                  self).execute(self.version, production_id, prod_job_id,
                                wms_job_id, workflowStatus, stepStatus,
                                wf_commons, step_commons, step_number, step_id)

            self._resolveInputVariables()

            # Earlier modules may have populated the report objects
            self.request.RequestName = 'job_%d_request.xml' % self.jobID
            self.request.JobID = self.jobID
            self.request.SourceComponent = "Job_%d" % self.jobID

            if not self._checkWFAndStepStatus():
                return S_OK()

            if not self.userOutputData:
                self.log.info(
                    "No user output data is specified for this job, nothing to do"
                )
                return S_OK("No output data to upload")

            self.log.info("User specified output file list is: %s" %
                          (', '.join(self.userOutputData)))

            globList = []
            for i in self.userOutputData:
                if re.search('\*', i):
                    globList.append(i)

            # Check whether list of userOutputData is a globbable pattern
            if globList:
                for i in globList:
                    self.userOutputData.remove(i)

                globbedOutputList = list(set(getGlobbedFiles(globList)))
                if globbedOutputList:
                    self.log.info(
                        'Found a pattern in the output data file list, \
          extra files to upload are: %s' % (', '.join(globbedOutputList)))
                    self.userOutputData += globbedOutputList
                else:
                    self.log.info(
                        "No files were found on the local disk for the following patterns: %s"
                        % (', '.join(globList)))

            self.log.info("Final list of files to upload are: %s" %
                          (', '.join(self.userOutputData)))

            # Determine the final list of possible output files for the workflow and all the parameters needed to upload them.
            outputList = []
            for i in self.userOutputData:
                outputList.append({
                    'outputDataType': ('.'.split(i)[-1]).upper(),
                    'outputDataName': os.path.basename(i)
                })

            userOutputLFNs = []
            if self.userOutputData:
                self.log.info("Constructing user output LFN(s) for %s" %
                              (', '.join(self.userOutputData)))

                userOutputLFNs = constructUserLFNs(self.jobID,
                                                   self._getCurrentOwner(),
                                                   self.userOutputData,
                                                   self.userOutputPath,
                                                   self.userPrependString)

            self.log.verbose(
                "Calling getCandidateFiles( %s, %s, %s)" %
                (outputList, userOutputLFNs, self.outputDataFileMask))
            try:
                fileDict = self.getCandidateFiles(outputList, userOutputLFNs,
                                                  self.outputDataFileMask)
            except os.error as e:
                self.setApplicationStatus(e)
                return S_OK()

            try:
                fileMetadata = self.getFileMetadata(fileDict)
            except RuntimeError as e:
                self.setApplicationStatus(e)
                return S_OK()

            if not fileMetadata:
                self.log.info(
                    "No output data files were determined to be uploaded for this workflow"
                )
                self.setApplicationStatus('No Output Data Files To Upload')
                return S_OK()

            if not orderedSEs:
                orderedSEs = self._getOrderedSEsList()

            self.log.info("Ordered list of output SEs is: %s" %
                          (', '.join(orderedSEs)))
            final = {}
            for fileName, metadata in fileMetadata.items():
                final[fileName] = metadata
                final[fileName]['resolvedSE'] = orderedSEs

            # At this point can exit and see exactly what the module will upload
            if not self._enableModule():
                self.log.info(
                    "Module disabled would have attempted to upload the files %s"
                    % ', '.join(final.keys()))
                for fileName, metadata in final.items():
                    self.log.info('--------%s--------' % fileName)
                    for n, v in metadata.items():
                        self.log.info('%s = %s' % (n, v))

                return S_OK("Module is disabled by control flag")

            # Disable the watchdog check in case the file uploading takes a long time
            self._disableWatchdogCPUCheck()

            # Instantiate the failover transfer client with the global request object
            if not self.failoverTransfer:
                self.failoverTransfer = FailoverTransfer(self.request)

            # One by one upload the files with failover if necessary
            replication = {}
            failover = {}
            uploaded = []
            for fileName, metadata in final.items():
                self.log.info(
                    "Attempting to store %s to the following SE(s): %s" %
                    (fileName, ', '.join(metadata['resolvedSE'])))
                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID'],
                    'Checksum': metadata['filedict']['Checksum'],
                    'ChecksumType': metadata['filedict']['ChecksumType']
                }
                result = self.failoverTransfer.transferAndRegisterFile(
                    fileName=fileName,
                    localPath=metadata['localpath'],
                    lfn=metadata['filedict']['LFN'],
                    destinationSEList=metadata['resolvedSE'],
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register %s with metadata:\n %s"
                        % (fileName, metadata))
                    failover[fileName] = metadata
                else:
                    # Only attempt replication after successful upload
                    lfn = metadata['lfn']
                    uploaded.append(lfn)
                    seList = metadata['resolvedSE']
                    replicateSE = ''
                    uploadedSE = result['Value'].get('uploadedSE', '')
                    if uploadedSE:
                        for se in seList:
                            if not se == uploadedSE:
                                replicateSE = se
                                break

                    if replicateSE and lfn and self.replicateUserOutputData:
                        self.log.info("Will attempt to replicate %s to %s" %
                                      (lfn, replicateSE))
                        replication[lfn] = (uploadedSE, replicateSE,
                                            fileMetaDict)

            cleanUp = False
            for fileName, metadata in failover.items():
                random.shuffle(self.failoverSEs)
                targetSE = metadata['resolvedSE'][0]
                if len(metadata['resolvedSE']) > 1:
                    replicateSE = metadata['resolvedSE'][1]
                else:
                    replicateSE = ''
                metadata['resolvedSE'] = self.failoverSEs
                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID']
                }
                result = self.failoverTransfer.transferAndRegisterFileFailover(
                    fileName,
                    metadata['localpath'],
                    metadata['lfn'],
                    targetSE,
                    metadata['resolvedSE'],
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register %s with metadata:\n %s"
                        % (fileName, metadata))
                    cleanUp = True
                    continue  # for users can continue even if one completely fails
                else:
                    lfn = metadata['lfn']
                    uploaded.append(lfn)
                    # Even when using Failover, one needs to replicate to a second SE
                    if replicateSE and self.replicateUserOutputData:
                        replication[lfn] = (targetSE, replicateSE,
                                            fileMetaDict)

            # For files correctly uploaded must report LFNs to job parameters
            if uploaded:
                report = ', '.join(uploaded)
                self.setJobParameter('UploadedOutputData', report)

            # Now after all operations, retrieve potentially modified request object
            self.request = self.failoverTransfer.request

            # If some or all of the files failed to be saved to failover
            if cleanUp:
                self.workflow_commons['Request'] = self.request
                # Leave any uploaded files just in case it is useful for the user
                # do not try to replicate any files.
                return S_ERROR("Failed To Upload Output Data")

            for lfn, (uploadedSE, repSE,
                      fileMetaDictItem) in replication.items():
                self.failoverTransfer._setFileReplicationRequest(
                    lfn, repSE, fileMetaDictItem, uploadedSE)

            self.workflow_commons['Request'] = self.failoverTransfer.request

            self.generateFailoverFile()

            self.setApplicationStatus("Job Finished Successfully")

            return S_OK('Output data uploaded')

        except Exception as e:  # pylint:disable=broad-except
            self.log.exception("Failure in UserJobFinalization execute module",
                               lException=e)
            self.setApplicationStatus(repr(e))
            return S_ERROR(str(e))

        finally:
            super(UserJobFinalization, self).finalize(self.version)

    #############################################################################

    def _getOrderedSEsList(self):
        """ Returns list of ordered SEs to which trying to upload
    """
        # FIXME: remove all banned SEs (not the force ones)
        # First get the local (or assigned) SE to try first for upload and others in random fashion
        localSEs = set(
            getDestinationSEList('Tier1-USER',
                                 self.siteName,
                                 outputmode='local'))
        self.log.verbose("Site Local SE for user outputs is: %s" %
                         (list(localSEs)))
        userSEs = set(self.userOutputSE)
        otherSEs = set(self.defaultOutputSE) - localSEs - userSEs
        # If a user SE is  local set it first
        topSEs = userSEs & localSEs
        # reordered user SEs, setting local first
        userSEs = list(topSEs) + list(userSEs - topSEs)
        localSEs = list(localSEs - topSEs)
        if len(userSEs) < 2 and localSEs:
            # Set a local SE first
            orderedSEs = localSEs[0:1] + userSEs + localSEs[1:]
        else:
            orderedSEs = userSEs + localSEs
        random.shuffle(list(otherSEs))
        orderedSEs += otherSEs

        return orderedSEs

    def _getCurrentOwner(self):
        """Simple function to return current DIRAC username.
    """
        if 'OwnerName' in self.workflow_commons:
            return self.workflow_commons['OwnerName']

        result = getProxyInfo()

        if not result['OK']:
            if not self._enableModule():
                return 'testUser'
            raise RuntimeError('Could not obtain proxy information')

        if 'username' not in result['Value']:
            raise RuntimeError('Could not get username from proxy')

        return result['Value']['username']
Exemple #5
0
  def execute(self, production_id=None, prod_job_id=None, wms_job_id=None,
              workflowStatus=None, stepStatus=None,
              wf_commons=None, step_commons=None,
              step_number=None, step_id=None):
    """ Main executon method
    """

    try:

      super(UploadLogFile, self).execute(self.version, production_id, prod_job_id, wms_job_id,
                                         workflowStatus, stepStatus,
                                         wf_commons, step_commons, step_number, step_id)

      self._resolveInputVariables()

      self.request.RequestName = 'job_%d_request.xml' % self.jobID
      self.request.JobID = self.jobID
      self.request.SourceComponent = "Job_%d" % self.jobID

      res = systemCall(0, shlex.split('ls -al'))
      if res['OK'] and res['Value'][0] == 0:
        self.log.info('The contents of the working directory...')
        self.log.info(str(res['Value'][1]))
      else:
        self.log.error('Failed to list the log directory', str(res['Value'][2]))

      self.log.info('PRODUCTION_ID = %s, JOB_ID = %s ' % (self.production_id, self.prod_job_id))
      self.logdir = os.path.realpath('./job/log/%s/%s' % (self.production_id, self.prod_job_id))
      self.log.info('Selected log files will be temporarily stored in %s' % self.logdir)

      ##########################################
      # First determine the files which should be saved
      self.log.info('Determining the files to be saved in the logs.')
      res = self._determineRelevantFiles()
      if not res['OK']:
        self.log.error('Completely failed to select relevant log files.', res['Message'])
        return S_OK()
      selectedFiles = res['Value']
      self.log.info('The following %s files were selected to be saved:\n%s' % (len(selectedFiles),
                                                                               '\n'.join(selectedFiles)))

      #########################################
      # Create a temporary directory containing these files
      self.log.info('Populating a temporary directory for selected files.')
      res = self.__populateLogDirectory(selectedFiles)
      if not res['OK']:
        self.log.error('Completely failed to populate temporary log file directory.', res['Message'])
        self.setApplicationStatus('Failed To Populate Log Dir')
        return S_OK()
      self.log.info('%s populated with log files.' % self.logdir)

      #########################################
      # Make sure all the files in the log directory have the correct permissions
      result = self.__setLogFilePermissions(self.logdir)
      if not result['OK']:
        self.log.error('Could not set permissions of log files to 0755 with message:\n%s' % (result['Message']))

      # Instantiate the failover transfer client with the global request object
      if not self.failoverTransfer:
        self.failoverTransfer = FailoverTransfer(self.request)

      #########################################
      if not self._enableModule():
        self.log.info("Would have attempted to upload log files, but there's not JobID")
        return S_OK()

      # Attempt to uplaod logs to the LogSE
      self.log.info('Transferring log files to the %s' % self.logSE)

      res = returnSingleResult(StorageElement(self.logSE).getURL(self.logFilePath, protocol='https'))
      if not res['OK']:
        self.log.warn("Could not get dynamic URL for log", res)
        logHttpsURL = "http://lhcb-logs.cern.ch/storage%s" % self.logFilePath
      else:
        logHttpsURL = res['Value']

      logURL = '<a href="%s">Log file directory</a>' % logHttpsURL
      self.log.info('Logs for this job may be retrieved from %s' % logURL)
      self.log.info('putDirectory %s %s %s' % (self.logFilePath, os.path.realpath(self.logdir), self.logSE))

      res = returnSingleResult(StorageElement(self.logSE).putDirectory(
          {self.logFilePath: os.path.realpath(self.logdir)}))
      self.log.verbose(res)
      self.setJobParameter('Log URL', logURL)
      if res['OK']:
        self.log.info('Successfully upload log directory to %s' % self.logSE)
      else:
        self.log.error("Failed to upload log files with message '%s', uploading to failover SE" % res['Message'])
        # make a tar file
        tarFileName = os.path.basename(self.logLFNPath)
        try:
          res = tarFiles(tarFileName, selectedFiles, compression='gz')
          if not res['OK']:
            self.log.error('Failed to create tar of log files: %s' % res['Message'])
            self.setApplicationStatus('Failed to create tar of log files')
            # We do not fail the job for this case
            return S_OK()
        except IOError:
          self.log.error('Failed to create tar of log files: %s' % res['Message'])
          self.setApplicationStatus('Failed to create tar of log files')
          # We do not fail the job for this case
          return S_OK()
        self._uploadLogToFailoverSE(tarFileName)

      self.workflow_commons['Request'] = self.request

      return S_OK("Log Files uploaded")

    except Exception as e:  # pylint:disable=broad-except
      self.log.exception("Failure in UploadLogFile execute module", lException=e)
      return S_ERROR(str(e))

    finally:
      super(UploadLogFile, self).finalize(self.version)
Exemple #6
0
class UploadLogFile(ModuleBase):
  """ Upload to LogSE
  """

  #############################################################################

  def __init__(self, bkClient=None, dm=None):
    """Module initialization.
    """

    self.log = gLogger.getSubLogger("UploadLogFile")
    super(UploadLogFile, self).__init__(self.log, bkClientIn=bkClient, dm=dm)

    self.version = __RCSID__

    self.logSE = self.opsH.getValue('LogStorage/LogSE', 'LogSE')
    self.logSizeLimit = self.opsH.getValue('LogFiles/SizeLimit', 1 * 1024 * 1024)
    self.logExtensions = self.opsH.getValue('LogFiles/Extensions', [])
    self.logFilePath = ''
    self.logLFNPath = ''
    self.logdir = ''
    self.failoverTransfer = None
    self.failoverSEs = []

######################################################################

  def _resolveInputVariables(self):

    super(UploadLogFile, self)._resolveInputVariables()

    if 'LogTargetPath' in self.workflow_commons:
      self.logLFNPath = self.workflow_commons['LogTargetPath']
    else:
      self.log.info('LogFilePath parameter not found, creating on the fly')
      result = getLogPath(self.workflow_commons, self.bkClient)
      if not result['OK']:
        self.log.error('Could not create LogFilePath', result['Message'])
        return result
      self.logLFNPath = result['Value']['LogTargetPath'][0]

    if not isinstance(self.logLFNPath, str):
      self.logLFNPath = self.logLFNPath[0]

######################################################################

  def execute(self, production_id=None, prod_job_id=None, wms_job_id=None,
              workflowStatus=None, stepStatus=None,
              wf_commons=None, step_commons=None,
              step_number=None, step_id=None):
    """ Main executon method
    """

    try:

      super(UploadLogFile, self).execute(self.version, production_id, prod_job_id, wms_job_id,
                                         workflowStatus, stepStatus,
                                         wf_commons, step_commons, step_number, step_id)

      self._resolveInputVariables()

      self.request.RequestName = 'job_%d_request.xml' % self.jobID
      self.request.JobID = self.jobID
      self.request.SourceComponent = "Job_%d" % self.jobID

      res = systemCall(0, shlex.split('ls -al'))
      if res['OK'] and res['Value'][0] == 0:
        self.log.info('The contents of the working directory...')
        self.log.info(str(res['Value'][1]))
      else:
        self.log.error('Failed to list the log directory', str(res['Value'][2]))

      self.log.info('PRODUCTION_ID = %s, JOB_ID = %s ' % (self.production_id, self.prod_job_id))
      self.logdir = os.path.realpath('./job/log/%s/%s' % (self.production_id, self.prod_job_id))
      self.log.info('Selected log files will be temporarily stored in %s' % self.logdir)

      ##########################################
      # First determine the files which should be saved
      self.log.info('Determining the files to be saved in the logs.')
      res = self._determineRelevantFiles()
      if not res['OK']:
        self.log.error('Completely failed to select relevant log files.', res['Message'])
        return S_OK()
      selectedFiles = res['Value']
      self.log.info('The following %s files were selected to be saved:\n%s' % (len(selectedFiles),
                                                                               '\n'.join(selectedFiles)))

      #########################################
      # Create a temporary directory containing these files
      self.log.info('Populating a temporary directory for selected files.')
      res = self.__populateLogDirectory(selectedFiles)
      if not res['OK']:
        self.log.error('Completely failed to populate temporary log file directory.', res['Message'])
        self.setApplicationStatus('Failed To Populate Log Dir')
        return S_OK()
      self.log.info('%s populated with log files.' % self.logdir)

      #########################################
      # Make sure all the files in the log directory have the correct permissions
      result = self.__setLogFilePermissions(self.logdir)
      if not result['OK']:
        self.log.error('Could not set permissions of log files to 0755 with message:\n%s' % (result['Message']))

      # Instantiate the failover transfer client with the global request object
      if not self.failoverTransfer:
        self.failoverTransfer = FailoverTransfer(self.request)

      #########################################
      if not self._enableModule():
        self.log.info("Would have attempted to upload log files, but there's not JobID")
        return S_OK()

      # Attempt to uplaod logs to the LogSE
      self.log.info('Transferring log files to the %s' % self.logSE)

      res = returnSingleResult(StorageElement(self.logSE).getURL(self.logFilePath, protocol='https'))
      if not res['OK']:
        self.log.warn("Could not get dynamic URL for log", res)
        logHttpsURL = "http://lhcb-logs.cern.ch/storage%s" % self.logFilePath
      else:
        logHttpsURL = res['Value']

      logURL = '<a href="%s">Log file directory</a>' % logHttpsURL
      self.log.info('Logs for this job may be retrieved from %s' % logURL)
      self.log.info('putDirectory %s %s %s' % (self.logFilePath, os.path.realpath(self.logdir), self.logSE))

      res = returnSingleResult(StorageElement(self.logSE).putDirectory(
          {self.logFilePath: os.path.realpath(self.logdir)}))
      self.log.verbose(res)
      self.setJobParameter('Log URL', logURL)
      if res['OK']:
        self.log.info('Successfully upload log directory to %s' % self.logSE)
      else:
        self.log.error("Failed to upload log files with message '%s', uploading to failover SE" % res['Message'])
        # make a tar file
        tarFileName = os.path.basename(self.logLFNPath)
        try:
          res = tarFiles(tarFileName, selectedFiles, compression='gz')
          if not res['OK']:
            self.log.error('Failed to create tar of log files: %s' % res['Message'])
            self.setApplicationStatus('Failed to create tar of log files')
            # We do not fail the job for this case
            return S_OK()
        except IOError:
          self.log.error('Failed to create tar of log files: %s' % res['Message'])
          self.setApplicationStatus('Failed to create tar of log files')
          # We do not fail the job for this case
          return S_OK()
        self._uploadLogToFailoverSE(tarFileName)

      self.workflow_commons['Request'] = self.request

      return S_OK("Log Files uploaded")

    except Exception as e:  # pylint:disable=broad-except
      self.log.exception("Failure in UploadLogFile execute module", lException=e)
      return S_ERROR(str(e))

    finally:
      super(UploadLogFile, self).finalize(self.version)

  #############################################################################

  def _uploadLogToFailoverSE(self, tarFileName):
    """  Recover the logs to a failover storage element
    """

    # here because self.siteName is not known until execute() is invoked
    self.failoverSEs = getDestinationSEList('Tier1-Failover', self.siteName, outputmode='Any')
    random.shuffle(self.failoverSEs)
    self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (tarFileName,
                                                                               ', '.join(self.failoverSEs)))

    fileDict = {tarFileName: {'lfn': self.logLFNPath,
                              'workflowSE': self.failoverSEs}}
    metadata = self.getFileMetadata(fileDict)
    fileMetaDict = {'Size': metadata[tarFileName]['filedict']['Size'],
                    'LFN': metadata[tarFileName]['filedict']['LFN'],
                    'GUID': metadata[tarFileName]['filedict']['GUID'],
                    'Checksum': metadata[tarFileName]['filedict']['Checksum'],
                    'ChecksumType': metadata[tarFileName]['filedict']['ChecksumType']}

    result = self.failoverTransfer.transferAndRegisterFile(fileName=tarFileName,
                                                           localPath='%s/%s' % (os.getcwd(), tarFileName),
                                                           lfn=self.logLFNPath,
                                                           destinationSEList=self.failoverSEs,
                                                           fileMetaDict=fileMetaDict,
                                                           masterCatalogOnly=True)

    if not result['OK']:
      self.log.error("Failed to upload logs to all failover destinations (the job will not fail for this reason")
      self.setApplicationStatus('Failed To Upload Logs')
    else:
      uploadedSE = result['Value']['uploadedSE']
      self.log.info("Uploaded logs to failover SE %s" % uploadedSE)

      self.request = self.failoverTransfer.request

      self.__createLogUploadRequest(self.logSE, self.logLFNPath, uploadedSE)
      self.log.info("Successfully created failover request")

  def _determineRelevantFiles(self):
    """ The files which are below a configurable size will be stored in the logs.
        This will typically pick up everything in the working directory minus the output data files.
    """
    logFileExtensions = ['*.txt', '*.log', '*.out', '*.output',
                         '*.xml', '*.sh', '*.info', '*.err', 'prodConf*.py']  # '*.root',
    if self.logExtensions:
      self.log.info('Using list of log extensions from CS:\n%s' % (', '.join(self.logExtensions)))
      logFileExtensions = self.logExtensions
    else:
      self.log.info('Using default list of log extensions:\n%s' % (', '.join(logFileExtensions)))

    candidateFiles = []
    for ext in logFileExtensions:
      self.log.debug('Looking at log file wildcard: %s' % ext)
      globList = glob.glob(ext)
      for check in globList:
        if os.path.isfile(check):
          self.log.debug('Found locally existing log file: %s' % check)
          candidateFiles.append(check)

    selectedFiles = []
    try:
      for candidate in candidateFiles:
        fileSize = os.stat(candidate)[6]
        if fileSize < self.logSizeLimit:
          selectedFiles.append(candidate)
        else:
          self.log.info('Log file found to be greater than maximum of %s bytes, compressing' % self.logSizeLimit)
          tarFileName = os.path.basename(candidate) + '.gz'
          tarFiles(tarFileName, [candidate], compression='gz')
          selectedFiles.append(tarFileName)
      return S_OK(selectedFiles)
    except OSError as x:
      self.log.exception('Exception while determining files to save.', '', str(x))
      return S_ERROR('Could not determine log files')

  #############################################################################

  def __populateLogDirectory(self, selectedFiles):
    """ A temporary directory is created for all the selected files.
        These files are then copied into this directory before being uploaded
    """
    # Create the temporary directory
    mkDir(self.logdir)
    # Set proper permissions
    self.log.info('Changing log directory permissions to 0755')
    try:
      os.chmod(self.logdir, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH + stat.S_IXOTH)
    except OSError as x:
      self.log.error('Could not set logdir permissions to 0755:', '%s (%s)' % (self.logdir, str(x)))
    # Populate the temporary directory
    try:
      for fileS in selectedFiles:
        destinationFile = '%s/%s' % (self.logdir, os.path.basename(fileS))
        shutil.copy(fileS, destinationFile)
    except shutil.Error:
      self.log.warn('scr and dst are the same')
    except IOError as x:
      self.log.exception('Exception while trying to copy file.', fileS, str(x))
      self.log.info('File %s will be skipped and can be considered lost.' % fileS)

    # Now verify the contents of our target log dir
    successfulFiles = os.listdir(self.logdir)
    if not successfulFiles:
      self.log.info('Failed to copy any files to the target directory.')
      return S_ERROR()

    self.log.info('Prepared %s files in the temporary directory.' % self.logdir)
    return S_OK()

  #############################################################################

  def __createLogUploadRequest(self, targetSE, logFileLFN, uploadedSE):
    """ Set a request to upload job log files from the output sandbox
    """
    self.log.info('Setting log upload request for %s at %s' % (logFileLFN, targetSE))

    logUpload = Operation()
    logUpload.Type = 'LogUpload'
    logUpload.TargetSE = targetSE

    logFile = File()
    logFile.LFN = logFileLFN

    logUpload.addFile(logFile)
    self.request.addOperation(logUpload)

    logRemoval = Operation()
    logRemoval.Type = 'RemoveFile'
    logRemoval.TargetSE = uploadedSE

    logRemoval.addFile(logFile)
    self.request.addOperation(logRemoval)

  #############################################################################

  def __setLogFilePermissions(self, logDir):
    """ Sets the permissions of all the files in the log directory to ensure
        they are readable.
    """
    try:
      for toChange in os.listdir(logDir):
        if not os.path.islink('%s/%s' % (logDir, toChange)):
          self.log.debug('Changing permissions of %s/%s to 0755' % (logDir, toChange))
          os.chmod('%s/%s' % (logDir, toChange), stat.S_IRWXU |
                   stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH + stat.S_IXOTH)
    except OSError as x:
      self.log.error('Problem changing shared area permissions', str(x))
      return S_ERROR(x)

    return S_OK()
    def execute(self):
        """ Main execution function.
        """
        #Have to work out if the module is part of the last step i.e. 
        #user jobs can have any number of steps and we only want 
        #to run the finalization once.
        currentStep = int(self.step_commons['STEP_NUMBER'])
        totalSteps = int(self.workflow_commons['TotalSteps'])
        if currentStep == totalSteps:
            self.lastStep = True
        else:
            self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \
            at the last workflow step.' % (currentStep, totalSteps))            
            
        if not self.lastStep:
            #Not last step, do nothing, proceed happily.
            return S_OK()
        
        result = self.resolveInputVariables()
        if not result['OK']:
            self.log.error("Failed to resolve input parameters:", result['Message'])
            return result
        
        self.log.info('Initializing %s' % self.version)
        if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
            ##Something went wrong in the step or the workflow, do nothing.
            self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], 
                                                                       self.stepStatus['OK']))
            return S_OK('No output data upload attempted')
        
        self.request.RequestName = 'job_%d_request.xml' % int(self.jobID)
        self.request.JobID = self.jobID
        self.request.SourceComponent = "Job_%d" % int(self.jobID)
        
        if not self.userOutputData:
            self.log.info('No user output data is specified for this job, nothing to do')
            return S_OK('No output data to upload')
            
        #Determine the final list of possible output files for the
        #workflow and all the parameters needed to upload them.
        outputList = []
        possible_files= []
        for i in self.userOutputData:
            files = getGlobbedFiles(i)
            for possible_file in files:
                if possible_file in possible_files:
                    #Don't have twice the same file
                    continue
                outputList.append({'outputDataType' : i.split('.')[-1].upper(),#this would be used to sort the files in different dirs
                                   'outputDataSE' : self.userOutputSE,
                                   'outputFile' : os.path.basename(possible_file)})
                possible_files.append(os.path.basename(possible_file))
                
        self.log.info('Constructing user output LFN(s) for %s' % (', '.join(self.userOutputData)))
        if not self.jobID:
            self.jobID = 12345
        owner = ''
        if 'Owner' in self.workflow_commons:
            owner = self.workflow_commons['Owner']
        else:
            res = getCurrentOwner()
            if not res['OK']:
                self.log.error('Could not find proxy')
                return S_ERROR('Could not obtain owner from proxy')
            owner = res['Value']
        vo = ''
        if self.workflow_commons.has_key('VO'):
            vo = self.workflow_commons['VO']
        else:
            res = getVOfromProxyGroup()
            if not res['OK']:
                self.log.error('Failed finding the VO')
                return S_ERROR('Could not obtain VO from proxy')
            vo = res['Value']
        result = constructUserLFNs(int(self.jobID), vo, owner, 
                                   possible_files, self.userOutputPath)
        if not result['OK']:
            self.log.error('Could not create user LFNs', result['Message'])
            return result
        userOutputLFNs = result['Value']
        
        self.log.verbose('Calling getCandidateFiles( %s, %s)' % (outputList, userOutputLFNs))
        result = self.getCandidateFiles(outputList, userOutputLFNs)
        if not result['OK']:
            if not self.ignoreapperrors:
                self.log.error(result['Message'])
                self.setApplicationStatus(result['Message'])
                return S_OK()
        
        fileDict = result['Value']
        result = self.getFileMetadata(fileDict)
        if not result['OK']:
            if not self.ignoreapperrors:
                self.log.error(result['Message'])
                self.setApplicationStatus(result['Message'])
                return S_OK()
        
        if not result['Value']:
            if not self.ignoreapperrors:
                self.log.info('No output data files were determined to be uploaded for this workflow')
                self.setApplicationStatus('No Output Data Files To Upload')
                return S_OK()
        
        fileMetadata = result['Value']

        orderedSEs = self.userOutputSE
        
        
        self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs)))    
        final = {}
        for fileName, metadata in fileMetadata.items():
            final[fileName] = metadata
            final[fileName]['resolvedSE'] = orderedSEs
        
        #At this point can exit and see exactly what the module will upload
        if not self.enable:
            self.log.info('Module is disabled by control flag, would have attempted \
to upload the following files %s' % ', '.join(final.keys()))
            for fileName, metadata in final.items():
                self.log.info('--------%s--------' % fileName)
                for n, v in metadata.items():
                    self.log.info('%s = %s' %(n, v))
            
            return S_OK('Module is disabled by control flag')
        
        #Instantiate the failover transfer client with the global request object
        failoverTransfer = FailoverTransfer(self.request)
        
        #One by one upload the files with failover if necessary
        replication = {}
        failover = {}
        uploaded = []
        if not self.failoverTest:
            for fileName, metadata in final.items():
                self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, 
                                                                                           ', '.join(metadata['resolvedSE'])))
                replicateSE = ''
                result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'],
                                                                  metadata['resolvedSE'], fileMetaDict = metadata, 
                                                                  fileCatalog = self.userFileCatalog)
                if not result['OK']:
                    self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
                    failover[fileName] = metadata
                else:
                    #Only attempt replication after successful upload
                    lfn = metadata['lfn']
                    uploaded.append(lfn)          
                    seList = metadata['resolvedSE']
                    
                    if result['Value'].has_key('uploadedSE'):
                        uploadedSE = result['Value']['uploadedSE']            
                        for se in seList:
                            if not se == uploadedSE:
                                replicateSE = se
                                break
                  
                if replicateSE and lfn:
                    self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE))    
                    replication[lfn] = replicateSE            
        else:
            failover = final
        
        cleanUp = False
        for fileName, metadata in failover.items():
            random.shuffle(self.failoverSEs)
            targetSE = metadata['resolvedSE'][0]
            metadata['resolvedSE'] = self.failoverSEs
            result = failoverTransfer.transferAndRegisterFileFailover(fileName,
                                                                      metadata['localpath'],
                                                                      metadata['lfn'],
                                                                      targetSE,
                                                                      self.failoverSEs,
                                                                      fileMetaDict = metadata,
                                                                      fileCatalog = self.userFileCatalog)
            if not result['OK']:
                self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
                cleanUp = True
                continue #for users can continue even if one completely fails
            else:
                lfn = metadata['lfn']
                uploaded.append(lfn)
        
        #For files correctly uploaded must report LFNs to job parameters
        if uploaded:
            report = ', '.join( uploaded )
            self.jobReport.setJobParameter( 'UploadedOutputData', report )
        
        self.request = failoverTransfer.request
        
        #If some or all of the files failed to be saved to failover
        if cleanUp:
            self.workflow_commons['Request'] = self.request
            #Leave any uploaded files just in case it is useful for the user
            #do not try to replicate any files.
            return S_ERROR('Failed To Upload Output Data')
        
        #If there is now at least one replica for uploaded files can trigger replication
        rm = ReplicaManager()
        self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
        time.sleep(10)
        for lfn, repSE in replication.items():
            result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog)
            if not result['OK']:
                self.log.info('Replication failed with below error but file already exists in Grid storage with \
                at least one replica:\n%s' % (result))
        
        self.workflow_commons['Request'] = self.request
        self.generateFailoverFile()    
        
        self.setApplicationStatus('Job Finished Successfully')
        return S_OK('Output data uploaded')
  def execute(self):
    #Have to work out if the module is part of the last step i.e. 
    #user jobs can have any number of steps and we only want 
    #to run the finalization once.
    currentStep = int(self.step_commons['STEP_NUMBER'])
    totalSteps = int(self.workflow_commons['TotalSteps'])
    if currentStep==totalSteps:
      self.lastStep=True
    else:
      self.log.verbose('Current step = %s, total steps of workflow = %s, HandleProdOutputData will enable itself only at the last workflow step.' %(currentStep,totalSteps))            
        
    if not self.lastStep:
      return S_OK()
       
    self.result =self.resolveInputVariables()
    if not self.result['OK']:
      self.log.error(self.result['Message'])
      return self.result
    
    ###Instantiate object that will ensure that the files are registered properly
    failoverTransfer = FailoverTransfer(self.request)
    datatohandle = {}
    if self.generatorfile:
      if not os.path.exists(self.generatorfile):
        return S_ERROR("File %s does not exist, something went wrong before !"%(self.generatorfile))
      self.attributesdict['DataType'] = 'gen'
      lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'],
                                  self.attributesdict['DataType'],self.attributesdict['EvtType'],self.attributesdict['ProdID'],
                                  self.generatorfile],"/")
      datatohandle[self.generatorfile]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination}
    if self.mokkafile or self.slicfile:
      recofile = ''
      if self.mokkafile and not os.path.exists(self.mokkafile):
        return S_ERROR("File %s does not exist, something went wrong before !"%(self.mokkafile))
      else:
        recofile = self.mokkafile
      if self.slicfile and not os.path.exists(self.slicfile):
        return S_ERROR("File %s does not exist, something went wrong before !"%(self.slicfile))
      else:
        recofile = self.slicfile
      self.attributesdict['DataType'] = 'SIM'
      lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'],
                                  self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'],
                                  self.attributesdict['ProdID'],recofile],"/")
      datatohandle[recofile]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination}


    ##Below, look in file name if it contain REC or DST, to determine the data type.
    if self.marlinfiles:
      for file in self.marlinfiles:
        if file.find("REC")>-1:
          self.attributesdict['DataType'] = 'REC'
        if file.find("DST")>-1:
          self.attributesdict['DataType'] = 'DST'
        lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'],
                                    self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'],
                                    self.attributesdict['ProdID'],file],"/")
        datatohandle[file]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination}

        
    if self.lcsimfiles:
      for file in self.lcsimfiles:
        if file.find("DST")>-1:
          self.attributesdict['DataType'] = 'DST'
        lfnpath = string.join([self.basepath,self.attributesdict['Machine'],self.attributesdict['Energy'],
                                    self.attributesdict['DetectorModel'],self.attributesdict['DataType'],self.attributesdict['EvtType'],
                                    self.attributesdict['ProdID'],file],"/")
        datatohandle[file]={'lfn':lfnpath,'type':'gen','workflowSE':self.destination}
        
    result = self.getFileMetadata(datatohandle)
    if not result['OK']:
      self.setApplicationStatus(result['Message'])
      return S_OK()
    fileMetadata = result['Value']

    final = {}
    for fileName,metadata in fileMetadata.items():
      final[fileName]=metadata
      final[fileName]['resolvedSE']=self.destination
    #One by one upload the files with failover if necessary
    replication = {}
    failover = {}
    uploaded = []
    if not self.failoverTest:
      for fileName,metadata in final.items():
        self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, string.join(metadata['resolvedSE'],', ')))
        result = failoverTransfer.transferAndRegisterFile(fileName,metadata['localpath'],metadata['lfn'],metadata['resolvedSE'],fileGUID=metadata['guid'],fileCatalog=self.userFileCatalog)
        if not result['OK']:
          self.log.error('Could not transfer and register %s with metadata:\n %s' %(fileName,metadata))
          failover[fileName]=metadata
        else:
          #Only attempt replication after successful upload
          lfn = metadata['lfn']
          uploaded.append(lfn)          
          seList = metadata['resolvedSE']
          replicateSE = ''
          if result['Value'].has_key('uploadedSE'):
            uploadedSE = result['Value']['uploadedSE']            
            for se in seList:
              if not se == uploadedSE:
                replicateSE = se
                break
          
          if replicateSE and lfn:
            self.log.info('Will attempt to replicate %s to %s' %(lfn,replicateSE))    
            replication[lfn]=replicateSE            
    else:
      failover = final

    cleanUp = False
    for fileName,metadata in failover.items():
      random.shuffle(self.failoverSEs)
      targetSE = metadata['resolvedSE'][0]
      metadata['resolvedSE']=self.failoverSEs
      result = failoverTransfer.transferAndRegisterFileFailover(fileName,metadata['localpath'],metadata['lfn'],targetSE,metadata['resolvedSE'],fileGUID=metadata['guid'],fileCatalog=self.userFileCatalog)
      if not result['OK']:
        self.log.error('Could not transfer and register %s with metadata:\n %s' %(fileName,metadata))
        cleanUp = True
        continue #for users can continue even if one completely fails
      else:
        lfn = metadata['lfn']
        uploaded.append(lfn)

    #For files correctly uploaded must report LFNs to job parameters
    if uploaded:
      report = string.join( uploaded, ', ' )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    #Now after all operations, retrieve potentially modified request object
    result = failoverTransfer.getRequestObject()
    if not result['OK']:
      self.log.error(result)
      return S_ERROR('Could Not Retrieve Modified Request')

    self.request = result['Value']

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      self.workflow_commons['Request']=self.request
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')

    
    return S_OK()
  def execute(self):
    """ Main execution function.
    """
    #Have to work out if the module is part of the last step i.e. 
    #user jobs can have any number of steps and we only want 
    #to run the finalization once.
    currentStep = int(self.step_commons['STEP_NUMBER'])
    totalSteps = int(self.workflow_commons['TotalSteps'])
    if currentStep == totalSteps:
      self.lastStep = True
    else:
      self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \
      at the last workflow step.' % (currentStep, totalSteps))            
        
    if not self.lastStep:
      return S_OK()    
    
    result = self.resolveInputVariables()
    if not result['OK']:
      self.log.error(result['Message'])
      return result

    self.log.info('Initializing %s' % self.version)
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], 
                                                                   self.stepStatus['OK']))
      return S_OK('No output data upload attempted')
    
    if not self.userOutputData:
      self.log.info('No user output data is specified for this job, nothing to do')
      return S_OK('No output data to upload')
        
    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    outputList = []
    for i in self.userOutputData:
      outputList.append({'outputPath' : string.upper(string.split(i, '.')[-1]),
                         'outputDataSE' : self.userOutputSE,
                         'outputFile' : os.path.basename(i)})

    userOutputLFNs = []
    if self.userOutputData:
      self.log.info('Constructing user output LFN(s) for %s' % (string.join(self.userOutputData, ', ')))
      if not self.jobID:
        self.jobID = 12345
      owner = ''
      if self.workflow_commons.has_key('Owner'):
        owner = self.workflow_commons['Owner']
      else:
        res = self.getCurrentOwner()
        if not res['OK']:
          return S_ERROR('Could not obtain owner from proxy')
        owner = res['Value']
      vo = ''
      if self.workflow_commons.has_key('VO'):
        vo = self.workflow_commons['VO']
      else:
        res = self.getCurrentVO()
        if not res['OK']:
          return S_ERROR('Could not obtain VO from proxy')
        vo = res['Value']
      
      result = constructUserLFNs(int(self.jobID), vo, owner, self.userOutputData, self.userOutputPath)
      if not result['OK']:
        self.log.error('Could not create user LFNs', result['Message'])
        return result
      userOutputLFNs = result['Value']

    self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask))
    result = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask)
    if not result['OK']:
      if not self.ignoreapperrors:
        self.setApplicationStatus(result['Message'])
        return S_OK()
    
    fileDict = result['Value']      
    result = self.getFileMetadata(fileDict)
    if not result['OK']:
      if not self.ignoreapperrors:
        self.setApplicationStatus(result['Message'])
        return S_OK()

    if not result['Value']:
      if not self.ignoreapperrors:
        self.log.info('No output data files were determined to be uploaded for this workflow')
        self.setApplicationStatus('No Output Data Files To Upload')
        return S_OK()

    fileMetadata = result['Value']
    
    #First get the local (or assigned) SE to try first for upload and others in random fashion
    result = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local')
    if not result['OK']:
      self.log.error('Could not resolve output data SE', result['Message'])
      self.setApplicationStatus('Failed To Resolve OutputSE')
      return result      
    
    localSE = result['Value']
    self.log.verbose('Site Local SE for user outputs is: %s' % (localSE))
    orderedSEs = self.defaultOutputSE  
    for se in localSE:
      if se in orderedSEs:
        orderedSEs.remove(se)
    for se in self.userOutputSE:
      if se in orderedSEs:
        orderedSEs.remove(se)  

    orderedSEs = localSE + List.randomize(orderedSEs)    
    if self.userOutputSE:
      prependSEs = []
      for userSE in self.userOutputSE:
        if not userSE in orderedSEs:
          prependSEs.append(userSE)
      orderedSEs = prependSEs + orderedSEs
    
    self.log.info('Ordered list of output SEs is: %s' % (string.join(orderedSEs, ', ')))    
    final = {}
    for fileName, metadata in fileMetadata.items():
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = orderedSEs

    #At this point can exit and see exactly what the module will upload
    if not self.enable:
      self.log.info('Module is disabled by control flag, would have attempted \
      to upload the following files %s' % string.join(final.keys(), ', '))
      for fileName, metadata in final.items():
        self.log.info('--------%s--------' % fileName)
        for n, v in metadata.items():
          self.log.info('%s = %s' %(n, v))

      return S_OK('Module is disabled by control flag')

    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self.request)

    #One by one upload the files with failover if necessary
    replication = {}
    failover = {}
    uploaded = []
    if not self.failoverTest:
      for fileName, metadata in final.items():
        self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, 
                                                                                   string.join(metadata['resolvedSE'], 
                                                                                               ', ')))
        result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'],
                                                          metadata['resolvedSE'], fileGUID = metadata['guid'], 
                                                          fileCatalog = self.userFileCatalog)
        if not result['OK']:
          self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
          failover[fileName] = metadata
        else:
          #Only attempt replication after successful upload
          lfn = metadata['lfn']
          uploaded.append(lfn)          
          seList = metadata['resolvedSE']
          replicateSE = ''
          if result['Value'].has_key('uploadedSE'):
            uploadedSE = result['Value']['uploadedSE']            
            for se in seList:
              if not se == uploadedSE:
                replicateSE = se
                break
          
          if replicateSE and lfn:
            self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE))    
            replication[lfn] = replicateSE            
    else:
      failover = final

    cleanUp = False
    for fileName, metadata in failover.items():
      random.shuffle(self.failoverSEs)
      targetSE = metadata['resolvedSE'][0]
      metadata['resolvedSE'] = self.failoverSEs
      result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'],
                                                                targetSE, metadata['resolvedSE'], 
                                                                fileGUID = metadata['guid'], 
                                                                fileCatalog = self.userFileCatalog)
      if not result['OK']:
        self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
        cleanUp = True
        continue #for users can continue even if one completely fails
      else:
        lfn = metadata['lfn']
        uploaded.append(lfn)

    #For files correctly uploaded must report LFNs to job parameters
    if uploaded:
      report = string.join( uploaded, ', ' )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    #Now after all operations, retrieve potentially modified request object
    result = failoverTransfer.getRequestObject()
    if not result['OK']:
      self.log.error(result)
      return S_ERROR('Could Not Retrieve Modified Request')

    self.request = result['Value']

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      self.workflow_commons['Request'] = self.request
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')
    
    #If there is now at least one replica for uploaded files can trigger replication
    rm = ReplicaManager()
    self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
    time.sleep(10)
    for lfn, repSE in replication.items():
      result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog)
      if not result['OK']:
        self.log.info('Replication failed with below error but file already exists in Grid storage with \
        at least one replica:\n%s' % (result))

    self.workflow_commons['Request'] = self.request
    
    #Now must ensure if any pending requests are generated that these are propagated to the job wrapper
    reportRequest = None
    if self.jobReport:
      result = self.jobReport.generateRequest()
      if not result['OK']:
        self.log.warn('Could not generate request for job report with result:\n%s' % (result))
      else:
        reportRequest = result['Value']
    if reportRequest:
      self.log.info('Populating request with job report information')
      self.request.update(reportRequest)
    
    if not self.request.isEmpty()['Value']:
      request_string = self.request.toXML()['Value']
      # Write out the request string
      fname = 'user_job_%s_request.xml' % (self.jobID)
      xmlfile = open(fname, 'w')
      xmlfile.write(request_string)
      xmlfile.close()
      self.log.info('Creating failover request for deferred operations for job %s:' % self.jobID)
      result = self.request.getDigest()
      if result['OK']:
        digest = result['Value']
        self.log.info(digest)
    
    self.setApplicationStatus('Job Finished Successfully')
    return S_OK('Output data uploaded')
Exemple #10
0
    def execute(self,
                production_id=None,
                prod_job_id=None,
                wms_job_id=None,
                workflowStatus=None,
                stepStatus=None,
                wf_commons=None,
                step_commons=None,
                step_number=None,
                step_id=None,
                SEs=None,
                fileDescendants=None):
        """ Main execution function.

        1. Determine the final list of possible output files for the workflow
           and all the parameters needed to upload them.
        2. Verifying that the input files have no descendants (and exiting with error, otherwise)
        3. Sending the BK records for the steps of the job
        4. Transfer output files in their destination, register in the FC (with failover)
        5. Registering the output files in the Bookkeeping
    """

        try:

            super(UploadOutputData,
                  self).execute(self.version, production_id, prod_job_id,
                                wms_job_id, workflowStatus, stepStatus,
                                wf_commons, step_commons, step_number, step_id)

            # This returns all Tier1-Failover unless a specific one is defined for the site
            self.failoverSEs = getDestinationSEList('Tier1-Failover',
                                                    self.siteName,
                                                    outputmode='Any')
            random.shuffle(self.failoverSEs)

            self._resolveInputVariables()

            if not self._checkWFAndStepStatus():
                return S_OK(
                    "Failures detected in previous steps: no output data upload attempted"
                )

            # ## 1. Determine the final list of possible output files
            # ##    for the workflow and all the parameters needed to upload them.
            # ##

            self.log.verbose("Getting the list of candidate files")
            fileDict = self.getCandidateFiles(self.outputList,
                                              self.prodOutputLFNs,
                                              self.outputDataFileMask,
                                              self.outputDataStep)

            fileMetadata = self.getFileMetadata(fileDict)

            if not fileMetadata:
                self.log.info(
                    "No output data files were determined to be uploaded for this workflow"
                )
                return S_OK()

            # Get final, resolved SE list for files
            final = {}

            for fileName, metadata in fileMetadata.iteritems():
                if not SEs:
                    resolvedSE = getDestinationSEList(
                        metadata['workflowSE'], self.siteName, self.outputMode,
                        self.workflow_commons.get('runNumber'))
                else:
                    resolvedSE = SEs
                final[fileName] = metadata
                final[fileName]['resolvedSE'] = resolvedSE

            self.log.info("The following files will be uploaded",
                          ": %s" % (', '.join(final.keys())))
            for fileName, metadata in final.items():
                self.log.info('--------%s--------' % fileName)
                for name, val in metadata.iteritems():
                    self.log.info('%s = %s' % (name, val))

            if not self._enableModule():
                # At this point can exit and see exactly what the module would have uploaded
                self.log.info(
                    "Module disabled",
                    "would have attempted to upload the files %s" %
                    ', '.join(final.keys()))

            # ## 2. Prior to uploading any files must check (for productions with input data) that no descendant files
            # ##    already exist with replica flag in the BK.
            # ##

            if self.inputDataList:
                if fileDescendants is not None:
                    lfnsWithDescendants = fileDescendants
                else:
                    if not self._enableModule():
                        self.log.info(
                            "Module disabled",
                            "would have attempted to check the files %s" %
                            ', '.join(self.inputDataList))
                        lfnsWithDescendants = []
                    else:
                        lfnsWithDescendants = getFileDescendants(
                            self.production_id,
                            self.inputDataList,
                            dm=self.dataManager,
                            bkClient=self.bkClient)
                if not lfnsWithDescendants:
                    self.log.info(
                        "No descendants found, outputs can be uploaded")
                else:
                    self.log.error(
                        "Found descendants!!! Outputs won't be uploaded")
                    self.log.info("Files with descendants", ": %s"
                                  ' % '.join(lfnsWithDescendants))
                    self.log.info(
                        "The files above will be set as 'Processed', other lfns in input will be later reset as Unused"
                    )
                    self.fileReport.setFileStatus(int(self.production_id),
                                                  lfnsWithDescendants,
                                                  'Processed')
                    return S_ERROR("Input Data Already Processed")

            # ## 3. Sending the BK records for the steps of the job
            # ##

            bkFileExtensions = ['bookkeeping*.xml']
            bkFiles = []
            for ext in bkFileExtensions:
                self.log.debug("Looking at BK record wildcard: %s" % ext)
                globList = glob.glob(ext)
                for check in globList:
                    if os.path.isfile(check):
                        self.log.verbose(
                            "Found locally existing BK file record",
                            ": %s" % check)
                        bkFiles.append(check)

            # Unfortunately we depend on the file names to order the BK records
            bkFilesListTuples = []
            for bk in bkFiles:
                bkFilesListTuples.append(
                    (bk, int(bk.split('_')[-1].split('.')[0])))
            bkFiles = [
                bk[0] for bk in sorted(bkFilesListTuples, key=itemgetter(1))
            ]

            self.log.info("The following BK records will be sent",
                          ": %s" % (', '.join(bkFiles)))
            if self._enableModule():
                for bkFile in bkFiles:
                    with open(bkFile, 'r') as fd:
                        bkXML = fd.read()
                    self.log.info("Sending BK record", ":\n%s" % (bkXML))
                    result = self.bkClient.sendXMLBookkeepingReport(bkXML)
                    self.log.verbose(result)
                    if result['OK']:
                        self.log.info("Bookkeeping report sent",
                                      "for %s" % bkFile)
                    else:
                        self.log.error(
                            "Could not send Bookkeeping XML file to server",
                            ": %s" % result['Message'])
                        self.log.info("Preparing DISET request",
                                      "for %s" % bkFile)
                        bkDISETReq = Operation()
                        bkDISETReq.Type = 'ForwardDISET'
                        bkDISETReq.Arguments = DEncode.encode(
                            result['rpcStub'])
                        self.request.addOperation(bkDISETReq)
                        self.workflow_commons[
                            'Request'] = self.request  # update each time, just in case
            else:
                self.log.info(
                    "Would have attempted to send bk records, but module is disabled"
                )

            # ## 4. Transfer output files in their destination, register in the FC (with failover)
            # ##

            # Disable the watchdog check in case the file uploading takes a long time
            self._disableWatchdogCPUCheck()

            # Instantiate the failover transfer client with the global request object
            if not self.failoverTransfer:
                self.failoverTransfer = FailoverTransfer(self.request)

            # Track which files are successfully uploaded (not to failover) via
            performBKRegistration = []
            # Failover replicas are always added to the BK when they become available (actually, added to all the catalogs)

            failover = {}
            for fileName, metadata in final.items():
                targetSE = metadata['resolvedSE']
                self.log.info(
                    "Attempting to store file to SE",
                    "%s to the following SE(s):\n%s" %
                    (fileName, ', '.join(targetSE)))
                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID'],
                    'Checksum': metadata['filedict']['Checksum'],
                    'ChecksumType': metadata['filedict']['ChecksumType']
                }

                if not self._enableModule():
                    # At this point can exit and see exactly what the module would have uploaded
                    self.log.info(
                        "Module disabled",
                        "would have attempted to upload file %s" % fileName)
                    continue

                result = self.failoverTransfer.transferAndRegisterFile(
                    fileName=fileName,
                    localPath=metadata['localpath'],
                    lfn=metadata['filedict']['LFN'],
                    destinationSEList=targetSE,
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register",
                        " %s with metadata:\n %s" % (fileName, metadata))
                    failover[fileName] = metadata
                else:
                    self.log.info(
                        "File uploaded, will be registered in BK if all files uploaded for job",
                        "(%s)" % fileName)

                    # if the files are uploaded in the SE, independently if the registration in the FC is done,
                    # then we have to register all of them in the BKK
                    performBKRegistration.append(metadata)

            cleanUp = False
            for fileName, metadata in failover.items():
                self.log.info(
                    "Setting default catalog for failover transfer registration to master catalog"
                )
                random.shuffle(self.failoverSEs)
                targetSE = metadata['resolvedSE'][0]
                metadata['resolvedSE'] = self.failoverSEs

                fileMetaDict = {
                    'Size': metadata['filedict']['Size'],
                    'LFN': metadata['filedict']['LFN'],
                    'GUID': metadata['filedict']['GUID'],
                    'Checksum': metadata['filedict']['Checksum'],
                    'ChecksumType': metadata['filedict']['ChecksumType']
                }

                if not self._enableModule():
                    # At this point can exit and see exactly what the module would have uploaded
                    self.log.info(
                        "Module disabled",
                        "would have attempted to upload with failover file %s"
                        % fileName)
                    continue

                result = self.failoverTransfer.transferAndRegisterFileFailover(
                    fileName=fileName,
                    localPath=metadata['localpath'],
                    lfn=metadata['filedict']['LFN'],
                    targetSE=targetSE,
                    failoverSEList=metadata['resolvedSE'],
                    fileMetaDict=fileMetaDict,
                    masterCatalogOnly=True)
                if not result['OK']:
                    self.log.error(
                        "Could not transfer and register",
                        "%s in failover with metadata:\n %s" %
                        (fileName, metadata))
                    cleanUp = True
                    break  # no point continuing if one completely fails

            # Now after all operations, retrieve potentially modified request object
            self.request = self.failoverTransfer.request

            # If some or all of the files failed to be saved even to failover
            if cleanUp and self._enableModule():
                self._cleanUp(final)
                self.workflow_commons['Request'] = self.request
                return S_ERROR('Failed to upload output data')

            # For files correctly uploaded must report LFNs to job parameters
            if final and self._enableModule():
                report = ', '.join(final.keys())
                self.setJobParameter('UploadedOutputData', report)

            # ## 5. Can now register the successfully uploaded files in the BK i.e. set the BK replica flags
            # ##

            if not performBKRegistration:
                self.log.info(
                    "There are no files to perform the BK registration for, all are in failover"
                )
            elif self._enableModule():
                # performing BK registration

                # Getting what should be registered immediately, and what later
                lfnsToRegisterInBK = set([
                    metadata['filedict']['LFN']
                    for metadata in performBKRegistration
                ])
                lfnsToRegisterInBKNow = self._getLFNsForBKRegistration(
                    lfnsToRegisterInBK)
                lfnsToRegisterInBKLater = list(lfnsToRegisterInBK -
                                               set(lfnsToRegisterInBKNow))

                # Registering what should be registering immediately, and handling failures
                result = FileCatalog(
                    catalogs=['BookkeepingDB']).addFile(lfnsToRegisterInBKNow)
                self.log.verbose("BookkeepingDB.addFile: %s" % result)
                if not result['OK']:
                    self.log.error(result)
                    return S_ERROR("Could Not Perform BK Registration")
                if 'Failed' in result['Value'] and result['Value']['Failed']:
                    for lfn, error in result['Value']['Failed'].iteritems():
                        lfnMetadata = {}
                        for lfnMD in performBKRegistration:
                            if lfnMD[
                                    'lfn'] == lfn:  # the lfn is indeed both at lfnMD['lfn'] and at lfnMD['filedict']['LFN']
                                lfnMetadata = lfnMD['filedict']
                                break
                        self.setBKRegistrationRequest(lfn,
                                                      error=error,
                                                      metaData=lfnMetadata)

                # Adding a registration request for what whould be registered later
                if lfnsToRegisterInBKLater:
                    for lfnMD in performBKRegistration:
                        if lfnMD['lfn'] in lfnsToRegisterInBKLater:
                            lfnMetadata = lfnMD['filedict']
                            self.setBKRegistrationRequest(lfnMD['lfn'],
                                                          metaData=lfnMetadata)

            self.workflow_commons['Request'] = self.request

            return S_OK("Output data uploaded")

        except Exception as e:  # pylint:disable=broad-except
            self.log.exception('Exception in UploadOutputData', lException=e)
            self.setApplicationStatus(repr(e))
            return S_ERROR(str(e))

        finally:
            super(UploadOutputData, self).finalize(self.version)
Exemple #11
0
    def finalize(self):
        """ finalize method performs final operations after all the job
        steps were executed. Only production jobs are treated.
    """

        self.log.verbose("Starting UploadLogFile finalize")
        ##########################################
        # First determine the files which should be saved
        self.log.info("Determining the files to be saved in the logs.")
        res = self.determineRelevantFiles()
        if not res["OK"]:
            self.log.error("Completely failed to select relevant log files.", res["Message"])
            return S_OK()  # because if the logs are lost, it's not the end of the world.
        selectedFiles = res["Value"]
        self.log.info(
            "The following %s files were selected to be saved:\n%s"
            % (len(selectedFiles), string.join(selectedFiles, "\n"))
        )

        #########################################
        # Create a temporary directory containing these files
        self.log.info("Populating a temporary directory for selected files.")
        res = self.populateLogDirectory(selectedFiles)
        if not res["OK"]:
            self.log.error("Completely failed to populate temporary log file directory.", res["Message"])
            self.setApplicationStatus("Failed To Populate Log Dir")
            return S_OK()  # because if the logs are lost, it's not the end of the world.
        self.log.info("%s populated with log files." % self.logdir)

        #########################################
        # Create a tailored index page
        # self.log.info('Creating an index page for the logs')
        # result = self.__createLogIndex(selectedFiles)
        # if not result['OK']:
        #  self.log.error('Failed to create index page for logs', res['Message'])

        if not self.enable:
            self.log.info("Module is disabled by control flag")
            return S_OK("Module is disabled by control flag")

        #########################################
        # Make sure all the files in the log directory have the correct permissions
        result = self.__setLogFilePermissions(self.logdir)
        if not result["OK"]:
            self.log.error("Could not set permissions of log files to 0755 with message:\n%s" % (result["Message"]))

        #########################################
        # Attempt to uplaod logs to the LogSE
        self.log.info("Transferring log files to the %s" % self.logSE)
        res = S_ERROR()
        if not self.failoverTest:
            self.log.info("PutDirectory %s %s %s" % (self.logFilePath, os.path.realpath(self.logdir), self.logSE))
            res = self.rm.putStorageDirectory(
                {self.logFilePath: os.path.realpath(self.logdir)}, self.logSE, singleDirectory=True
            )
            self.log.verbose(res)
            if res["OK"]:
                self.log.info("Successfully upload log directory to %s" % self.logSE)
                # TODO: The logURL should be constructed using the LogSE and StorageElement()
                # storageElement = StorageElement(self.logSE)
                # pfn = storageElement.getPfnForLfn(self.logFilePath)['Value']
                # logURL = getPfnForProtocol(res['Value'],'http')['Value']
                logURL = "%s" % self.logFilePath
                self.setJobParameter("Log LFN", logURL)
                self.log.info("Logs for this job may be retrieved with dirac-ilc-get-prod-log -F %s" % logURL)
                return S_OK()

        #########################################
        # Recover the logs to a failover storage element
        self.log.error(
            "Completely failed to upload log files to %s, will attempt upload to failover SE" % self.logSE,
            res["Message"],
        )

        tarFileDir = os.path.dirname(self.logdir)
        self.logLFNPath = "%s.gz" % self.logLFNPath
        tarFileName = os.path.basename(self.logLFNPath)
        start = os.getcwd()
        os.chdir(self.logdir)
        logTarFiles = os.listdir(self.logdir)
        # comm = 'tar czvf %s %s' % (tarFileName,string.join(logTarFiles,' '))
        tfile = tarfile.open(tarFileName, "w:gz")
        for item in logTarFiles:
            tfile.add(item)
        tfile.close()
        # res = shellCall(0,comm)
        if not os.path.exists(tarFileName):
            res = S_ERROR("File was not created")
        os.chdir(start)
        if not res["OK"]:
            self.log.error("Failed to create tar file from directory", "%s %s" % (self.logdir, res["Message"]))
            self.setApplicationStatus("Failed To Create Log Tar Dir")
            return S_OK()  # because if the logs are lost, it's not the end of the world.

        # if res['Value'][0]: #i.e. non-zero status
        #  self.log.error('Failed to create tar file from directory','%s %s' % (self.logdir,res['Value']))
        #  self.setApplicationStatus('Failed To Create Log Tar Dir')
        #  return S_OK()#because if the logs are lost, it's not the end of the world.

        ############################################################
        # Instantiate the failover transfer client with the global request object
        failoverTransfer = FailoverTransfer(self.request)
        ##determine the experiment
        self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs)

        random.shuffle(self.failoverSEs)
        self.log.info(
            "Attempting to store file %s to the following SE(s):\n%s"
            % (tarFileName, string.join(self.failoverSEs, ", "))
        )
        result = failoverTransfer.transferAndRegisterFile(
            tarFileName,
            "%s/%s" % (tarFileDir, tarFileName),
            self.logLFNPath,
            self.failoverSEs,
            fileGUID=None,
            fileCatalog=["FileCatalog", "LcgFileCatalog"],
        )
        if not result["OK"]:
            self.log.error("Failed to upload logs to all destinations")
            self.setApplicationStatus("Failed To Upload Logs")
            return S_OK()  # because if the logs are lost, it's not the end of the world.

        # Now after all operations, retrieve potentially modified request object
        result = failoverTransfer.getRequestObject()
        if not result["OK"]:
            self.log.error(result)
            return S_ERROR("Could not retrieve modified request")

        self.request = result["Value"]
        res = self.createLogUploadRequest(self.logSE, self.logLFNPath)
        if not res["OK"]:
            self.log.error("Failed to create failover request", res["Message"])
            self.setApplicationStatus("Failed To Upload Logs To Failover")
        else:
            self.log.info("Successfully created failover request")

        self.workflow_commons["Request"] = self.request
        return S_OK()
  def execute(self):
    """ Main execution function.
    """
    self.log.info('Initializing %s' % self.version)
    result = self.resolveInputVariables()
    if not result['OK']:
      self.log.error("Failed to resolve input parameters:", result['Message'])
      return result

    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK']))
      return S_OK('No output data upload attempted')

    ##determine the experiment
    example_file = self.prodOutputLFNs[0]
    if "/ilc/prod/clic" in example_file:
      self.experiment = "CLIC"
    elif "/ilc/prod/ilc/sid" in example_file:
      self.experiment = 'ILC_SID'
    elif "/ilc/prod/ilc/mc-dbd" in example_file:
      self.experiment = 'ILC_ILD' 
    else:
      self.log.warn("Failed to determine experiment, reverting to default")
      
    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    result = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask)
    if not result['OK']:
      self.log.error(result['Message'])
      self.setApplicationStatus(result['Message'])
      return result
    
    fileDict = result['Value']      
    result = self.getFileMetadata(fileDict)
    if not result['OK']:
      self.log.error(result['Message'])
      self.setApplicationStatus(result['Message'])
      return result

    if not result['Value']:
      self.log.info('No output data files were determined to be uploaded for this workflow')
      return S_OK()

    fileMetadata = result['Value']

    #Get final, resolved SE list for files
    final = {}
    for fileName, metadata in fileMetadata.items():
      result = getDestinationSEList(metadata['workflowSE'], DIRAC.siteName(), self.outputMode)
      if not result['OK']:
        self.log.error('Could not resolve output data SE', result['Message'])
        self.setApplicationStatus('Failed To Resolve OutputSE')
        return result
      
      resolvedSE = result['Value']
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = resolvedSE

    self.log.info('The following files will be uploaded: %s' % (', '.join(final.keys() )))
    for fileName, metadata in final.items():
      self.log.info('--------%s--------' % fileName)
      for metaName, metaValue in metadata.items():
        self.log.info('%s = %s' % (metaName, metaValue))

    #At this point can exit and see exactly what the module would have uploaded
    if not self.enable:
      self.log.info('Module is disabled by control flag, would have attempted to upload the \
      following files %s' % ', '.join(final.keys()))
      return S_OK('Module is disabled by control flag')

    #Disable the watchdog check in case the file uploading takes a long time
    self.log.info('Creating DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK in order to disable the Watchdog prior to upload')
    fopen = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK','w')
    fopen.write('%s' % time.asctime())
    fopen.close()
    
    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self._getRequestContainer())

    catalogs = self.ops.getValue('Production/%s/Catalogs' % self.experiment,
                                 ['FileCatalog', 'LcgFileCatalog'])



    #One by one upload the files with failover if necessary
    failover = {}
    if not self.failoverTest:
      for fileName, metadata in final.iteritems():
        self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, 
                                                                                   ', '.join(metadata['resolvedSE'])))
        result = failoverTransfer.transferAndRegisterFile(fileName, 
                                                          metadata['localpath'], 
                                                          metadata['lfn'], 
                                                          metadata['resolvedSE'], 
                                                          fileMetaDict = metadata['filedict'],
                                                          fileCatalog = catalogs)
        if not result['OK']:
          self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict']))
          failover[fileName] = metadata
        else:
          #lfn = metadata['lfn']
          pass
    else:
      failover = final

    self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs)  

    cleanUp = False
    for fileName, metadata in failover.iteritems():
      self.log.info('Setting default catalog for failover transfer to FileCatalog')
      failovers = self.failoverSEs
      targetSE = metadata['resolvedSE'][0]
      try:#remove duplicate site, otherwise it will do nasty things where processing the request
        failovers.remove(targetSE)
      except ValueError:
        pass
      random.shuffle(failovers)
      metadata['resolvedSE'] = failovers
      result = failoverTransfer.transferAndRegisterFileFailover(fileName, 
                                                                metadata['localpath'],
                                                                metadata['lfn'], 
                                                                targetSE, 
                                                                metadata['resolvedSE'],
                                                                fileMetaDict = metadata['filedict'],
                                                                fileCatalog = catalogs)
      if not result['OK']:
        self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata['filedict']))
        cleanUp = True
        break #no point continuing if one completely fails

    os.remove("DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK") #cleanup the mess

    self.workflow_commons['Request'] = failoverTransfer.request

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      lfns = []
      for fileName, metadata in final.items():
        lfns.append(metadata['lfn'])

      result = self._cleanUp(lfns)
      return S_ERROR('Failed to upload output data')

    return S_OK('Output data uploaded')
Exemple #13
0
  def execute(self):
    """ Main execution function.
    """
    self.log.info('Initializing %s' % self.version)
    result = self.resolveInputVariables()
    if not result['OK']:
      self.log.error(result['Message'])
      return result

    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK']))
      return S_OK('No output data upload attempted')

    ##determine the experiment
    example_file = self.prodOutputLFNs[0]
    if "/ilc/prod/clic" in example_file:
      self.experiment = "CLIC"
    elif "/ilc/prod/ilc/sid" in example_file:
      self.experiment = 'ILC_SID'
    elif "/ilc/prod/ilc/mc-dbd" in example_file:
      self.experiment = 'ILC_ILD' 
    else:
      self.log.warn("Failed to determine experiment, reverting to default")
      
    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    result = self.getCandidateFiles(self.outputList, self.prodOutputLFNs, self.outputDataFileMask)
    if not result['OK']:
      self.setApplicationStatus(result['Message'])
      return result
    
    fileDict = result['Value']      
    result = self.getFileMetadata(fileDict)
    if not result['OK']:
      self.setApplicationStatus(result['Message'])
      return result

    if not result['Value']:
      self.log.info('No output data files were determined to be uploaded for this workflow')
      return S_OK()

    fileMetadata = result['Value']

    #Get final, resolved SE list for files
    final = {}
    for fileName, metadata in fileMetadata.items():
      result = getDestinationSEList(metadata['workflowSE'], DIRAC.siteName(), self.outputMode)
      if not result['OK']:
        self.log.error('Could not resolve output data SE', result['Message'])
        self.setApplicationStatus('Failed To Resolve OutputSE')
        return result
      
      resolvedSE = result['Value']
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = resolvedSE

    self.log.info('The following files will be uploaded: %s' % (string.join(final.keys(), ', ')))
    for fileName, metadata in final.items():
      self.log.info('--------%s--------' % fileName)
      for n, v in metadata.items():
        self.log.info('%s = %s' % (n, v))

    #At this point can exit and see exactly what the module would have uploaded
    if not self.enable:
      self.log.info('Module is disabled by control flag, would have attempted to upload the \
      following files %s' % string.join(final.keys(), ', '))
      return S_OK('Module is disabled by control flag')

    #Disable the watchdog check in case the file uploading takes a long time
    self.log.info('Creating DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK in order to disable the Watchdog prior to upload')
    fopen = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK','w')
    fopen.write('%s' % time.asctime())
    fopen.close()
    
    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self.request)

    catalogs = ['FileCatalog', 'LcgFileCatalog']


    #One by one upload the files with failover if necessary
    failover = {}
    if not self.failoverTest:
      for fileName, metadata in final.items():
        self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, 
                                                                                   string.join(metadata['resolvedSE'], 
                                                                                               ', ')))
        result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], 
                                                          metadata['lfn'], metadata['resolvedSE'], 
                                                          fileGUID = metadata['guid'], fileCatalog = catalogs)
        if not result['OK']:
          self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
          failover[fileName] = metadata
        else:
          lfn = metadata['lfn']
    else:
      failover = final

    self.failoverSEs = self.ops.getValue("Production/%s/FailOverSE" % self.experiment, self.failoverSEs)  

    cleanUp = False
    for fileName, metadata in failover.items():
      self.log.info('Setting default catalog for failover transfer to FileCatalog')
      random.shuffle(self.failoverSEs)
      targetSE = metadata['resolvedSE'][0]
      metadata['resolvedSE'] = self.failoverSEs
      result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'],
                                                                metadata['lfn'], targetSE, metadata['resolvedSE'],
                                                                fileGUID = metadata['guid'], fileCatalog = catalogs)
      if not result['OK']:
        self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
        cleanUp = True
        break #no point continuing if one completely fails

    os.remove("DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK") #cleanup the mess

    #Now after all operations, retrieve potentially modified request object
    result = failoverTransfer.getRequestObject()
    if not result['OK']:
      self.log.error(result)
      return S_ERROR('Could not retrieve modified request')

    self.request = result['Value']

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      lfns = []
      for fileName, metadata in final.items():
        lfns.append(metadata['lfn'])

      result = self.__cleanUp(lfns)
      self.workflow_commons['Request'] = self.request
      return S_ERROR('Failed to upload output data')

#    #Can now register the successfully uploaded files in the BK
#    if not performBKRegistration:
#      self.log.info('There are no files to perform the BK registration for, all could be saved to failover')
#    else:
#      rm = ReplicaManager()
#      result = rm.addCatalogFile(performBKRegistration,catalogs=['BookkeepingDB'])
#      self.log.verbose(result)
#      if not result['OK']:
#        self.log.error(result)
#        return S_ERROR('Could Not Perform BK Registration')
#      if result['Value']['Failed']:
#        for lfn,error in result['Value']['Failed'].items():
#          self.log.info('BK registration for %s failed with message: "%s" setting failover request' %(lfn,error))
#          result = self.request.addSubRequest({'Attributes':{'Operation':'registerFile','ExecutionOrder':0, 'Catalogue':'BookkeepingDB'}},'register')
#          if not result['OK']:
#            self.log.error('Could not set registerFile request:\n%s' %result)
#            return S_ERROR('Could Not Set BK Registration Request')
#          fileDict = {'LFN':lfn,'Status':'Waiting'}
#          index = result['Value']
#          self.request.setSubRequestFiles(index,'register',[fileDict])

    self.workflow_commons['Request'] = self.request
    return S_OK('Output data uploaded')
Exemple #14
0
  def execute(self):
    """ Main execution function.
    """
    #Have to work out if the module is part of the last step i.e.
    #user jobs can have any number of steps and we only want
    #to run the finalization once. Not a problem if this is not the last step so return S_OK()
    resultLS = self.isLastStep()
    if not resultLS['OK']:
      return S_OK()

    self.logWorkingDirectory()

    resultIV = self.resolveInputVariables()
    if not resultIV['OK']:
      self.log.error("Failed to resolve input parameters:", resultIV['Message'])
      return resultIV

    self.log.info('Initializing %s' % self.version)
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'],
                                                                   self.stepStatus['OK']))
      return S_OK('No output data upload attempted')

    if not self.userOutputData:
      self.log.info('No user output data is specified for this job, nothing to do')
      return S_OK('No output data to upload')

    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    outputList = self.getOutputList()

    userOutputLFNs = []
    if self.userOutputData:
      resultOLfn = self.constructOutputLFNs()
      if not resultOLfn['OK']:
        self.log.error('Could not create user LFNs', resultOLfn['Message'])
        return resultOLfn
      userOutputLFNs = resultOLfn['Value']

    self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask))
    self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors)
    resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask)
    if not resultCF['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultCF['Message'])
        self.setApplicationStatus(resultCF['Message'])
        return S_OK()
    fileDict = resultCF['Value']

    resultFMD = self.getFileMetadata(fileDict)
    if not resultFMD['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultFMD['Message'])
        self.setApplicationStatus(resultFMD['Message'])
        return S_OK()

    if not resultFMD['Value']:
      if not self.ignoreapperrors:
        self.log.info('No output data files were determined to be uploaded for this workflow')
        self.setApplicationStatus('No Output Data Files To Upload')
        return S_OK()

    fileMetadata = resultFMD['Value']

    #First get the local (or assigned) SE to try first for upload and others in random fashion
    resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local')
    if not resultSEL['OK']:
      self.log.error('Could not resolve output data SE', resultSEL['Message'])
      self.setApplicationStatus('Failed To Resolve OutputSE')
      return resultSEL
    localSE = resultSEL['Value']

    orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE]

    orderedSEs = localSE + List.randomize(orderedSEs)
    if self.userOutputSE:
      prependSEs = []
      for userSE in self.userOutputSE:
        if userSE not in orderedSEs:
          prependSEs.append(userSE)
      orderedSEs = prependSEs + orderedSEs

    self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs)))
    final = {}
    for fileName, metadata in fileMetadata.iteritems():
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = orderedSEs

    #At this point can exit and see exactly what the module will upload
    self.printOutputInfo(final)
    if not self.enable:
      return S_OK('Module is disabled by control flag')

    self.injectJobIndex( final )

    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self._getRequestContainer())

    #One by one upload the files with failover if necessary
    filesToReplicate = {}
    filesToFailover = {}
    filesUploaded = []
    if not self.failoverTest:
      self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate)
    else:
      filesToFailover = final

    ##if there are files to be failovered, we do it now
    resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded)
    cleanUp = resultTRFF['Value']['cleanUp']

    #For files correctly uploaded must report LFNs to job parameters
    if filesUploaded:
      report = ', '.join( filesUploaded )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    self.workflow_commons['Request'] = failoverTransfer.request

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')

    #If there is now at least one replica for uploaded files can trigger replication
    datMan = DataManager( catalogs = self.userFileCatalog )
    self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
    time.sleep(10)
    for lfn, repSE in filesToReplicate.items():
      resultRAR = datMan.replicateAndRegister(lfn, repSE)
      if not resultRAR['OK']:
        self.log.info('Replication failed with below error but file already exists in Grid storage with \
        at least one replica:\n%s' % (resultRAR))

    self.generateFailoverFile()

    self.setApplicationStatus('Job Finished Successfully')
    return S_OK('Output data uploaded')
Exemple #15
0
    def execute(self):
        """ Main execution function.
        """
        #Have to work out if the module is part of the last step i.e.
        #user jobs can have any number of steps and we only want
        #to run the finalization once.
        currentStep = int(self.step_commons['STEP_NUMBER'])
        totalSteps = int(self.workflow_commons['TotalSteps'])
        if currentStep == totalSteps:
            self.lastStep = True
        else:
            self.log.verbose(
                'Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \
            at the last workflow step.' % (currentStep, totalSteps))

        if not self.lastStep:
            #Not last step, do nothing, proceed happily.
            return S_OK()

        result = self.resolveInputVariables()
        if not result['OK']:
            self.log.error("Failed to resolve input parameters:",
                           result['Message'])
            return result

        self.log.info('Initializing %s' % self.version)
        if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
            ##Something went wrong in the step or the workflow, do nothing.
            self.log.verbose(
                'Workflow status = %s, step status = %s' %
                (self.workflowStatus['OK'], self.stepStatus['OK']))
            return S_OK('No output data upload attempted')

        self.request.RequestName = 'job_%d_request.xml' % int(self.jobID)
        self.request.JobID = self.jobID
        self.request.SourceComponent = "Job_%d" % int(self.jobID)

        if not self.userOutputData:
            self.log.info(
                'No user output data is specified for this job, nothing to do')
            return S_OK('No output data to upload')

        #Determine the final list of possible output files for the
        #workflow and all the parameters needed to upload them.
        outputList = []
        possible_files = []
        for i in self.userOutputData:
            files = getGlobbedFiles(i)
            for possible_file in files:
                if possible_file in possible_files:
                    #Don't have twice the same file
                    continue
                outputList.append({
                    'outputDataType': i.split('.')[-1].upper(
                    ),  #this would be used to sort the files in different dirs
                    'outputDataSE': self.userOutputSE,
                    'outputFile': os.path.basename(possible_file)
                })
                possible_files.append(os.path.basename(possible_file))

        self.log.info('Constructing user output LFN(s) for %s' %
                      (', '.join(self.userOutputData)))
        if not self.jobID:
            self.jobID = 12345
        owner = ''
        if 'Owner' in self.workflow_commons:
            owner = self.workflow_commons['Owner']
        else:
            res = getCurrentOwner()
            if not res['OK']:
                self.log.error('Could not find proxy')
                return S_ERROR('Could not obtain owner from proxy')
            owner = res['Value']
        vo = ''
        if self.workflow_commons.has_key('VO'):
            vo = self.workflow_commons['VO']
        else:
            res = getVOfromProxyGroup()
            if not res['OK']:
                self.log.error('Failed finding the VO')
                return S_ERROR('Could not obtain VO from proxy')
            vo = res['Value']
        result = constructUserLFNs(int(self.jobID), vo, owner, possible_files,
                                   self.userOutputPath)
        if not result['OK']:
            self.log.error('Could not create user LFNs', result['Message'])
            return result
        userOutputLFNs = result['Value']

        self.log.verbose('Calling getCandidateFiles( %s, %s)' %
                         (outputList, userOutputLFNs))
        result = self.getCandidateFiles(outputList, userOutputLFNs)
        if not result['OK']:
            if not self.ignoreapperrors:
                self.log.error(result['Message'])
                self.setApplicationStatus(result['Message'])
                return S_OK()

        fileDict = result['Value']
        result = self.getFileMetadata(fileDict)
        if not result['OK']:
            if not self.ignoreapperrors:
                self.log.error(result['Message'])
                self.setApplicationStatus(result['Message'])
                return S_OK()

        if not result['Value']:
            if not self.ignoreapperrors:
                self.log.info(
                    'No output data files were determined to be uploaded for this workflow'
                )
                self.setApplicationStatus('No Output Data Files To Upload')
                return S_OK()

        fileMetadata = result['Value']

        orderedSEs = self.userOutputSE

        self.log.info('Ordered list of output SEs is: %s' %
                      (', '.join(orderedSEs)))
        final = {}
        for fileName, metadata in fileMetadata.items():
            final[fileName] = metadata
            final[fileName]['resolvedSE'] = orderedSEs

        #At this point can exit and see exactly what the module will upload
        if not self.enable:
            self.log.info(
                'Module is disabled by control flag, would have attempted \
to upload the following files %s' % ', '.join(final.keys()))
            for fileName, metadata in final.items():
                self.log.info('--------%s--------' % fileName)
                for n, v in metadata.items():
                    self.log.info('%s = %s' % (n, v))

            return S_OK('Module is disabled by control flag')

        #Instantiate the failover transfer client with the global request object
        failoverTransfer = FailoverTransfer(self.request)

        #One by one upload the files with failover if necessary
        replication = {}
        failover = {}
        uploaded = []
        if not self.failoverTest:
            for fileName, metadata in final.items():
                self.log.info(
                    "Attempting to store file %s to the following SE(s):\n%s" %
                    (fileName, ', '.join(metadata['resolvedSE'])))
                replicateSE = ''
                result = failoverTransfer.transferAndRegisterFile(
                    fileName,
                    metadata['localpath'],
                    metadata['lfn'],
                    metadata['resolvedSE'],
                    fileMetaDict=metadata,
                    fileCatalog=self.userFileCatalog)
                if not result['OK']:
                    self.log.error(
                        'Could not transfer and register %s with metadata:\n %s'
                        % (fileName, metadata))
                    failover[fileName] = metadata
                else:
                    #Only attempt replication after successful upload
                    lfn = metadata['lfn']
                    uploaded.append(lfn)
                    seList = metadata['resolvedSE']

                    if result['Value'].has_key('uploadedSE'):
                        uploadedSE = result['Value']['uploadedSE']
                        for se in seList:
                            if not se == uploadedSE:
                                replicateSE = se
                                break

                if replicateSE and lfn:
                    self.log.info('Will attempt to replicate %s to %s' %
                                  (lfn, replicateSE))
                    replication[lfn] = replicateSE
        else:
            failover = final

        cleanUp = False
        for fileName, metadata in failover.items():
            random.shuffle(self.failoverSEs)
            targetSE = metadata['resolvedSE'][0]
            metadata['resolvedSE'] = self.failoverSEs
            result = failoverTransfer.transferAndRegisterFileFailover(
                fileName,
                metadata['localpath'],
                metadata['lfn'],
                targetSE,
                self.failoverSEs,
                fileMetaDict=metadata,
                fileCatalog=self.userFileCatalog)
            if not result['OK']:
                self.log.error(
                    'Could not transfer and register %s with metadata:\n %s' %
                    (fileName, metadata))
                cleanUp = True
                continue  #for users can continue even if one completely fails
            else:
                lfn = metadata['lfn']
                uploaded.append(lfn)

        #For files correctly uploaded must report LFNs to job parameters
        if uploaded:
            report = ', '.join(uploaded)
            self.jobReport.setJobParameter('UploadedOutputData', report)

        self.request = failoverTransfer.request

        #If some or all of the files failed to be saved to failover
        if cleanUp:
            self.workflow_commons['Request'] = self.request
            #Leave any uploaded files just in case it is useful for the user
            #do not try to replicate any files.
            return S_ERROR('Failed To Upload Output Data')

        #If there is now at least one replica for uploaded files can trigger replication
        rm = ReplicaManager()
        self.log.info(
            'Sleeping for 10 seconds before attempting replication of recently uploaded files'
        )
        time.sleep(10)
        for lfn, repSE in replication.items():
            result = rm.replicateAndRegister(lfn,
                                             repSE,
                                             catalog=self.userFileCatalog)
            if not result['OK']:
                self.log.info(
                    'Replication failed with below error but file already exists in Grid storage with \
                at least one replica:\n%s' % (result))

        self.workflow_commons['Request'] = self.request
        self.generateFailoverFile()

        self.setApplicationStatus('Job Finished Successfully')
        return S_OK('Output data uploaded')