Example #1
0
  def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams,
                   proxyChain, processors, wholeNode = False ):
    """ Submit job to the Computing Element instance after creating a custom
        Job Wrapper with the available job parameters.
    """
    logLevel = self.am_getOption( 'DefaultLogLevel', 'INFO' )
    defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate',
                                                'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py' )
    jobDesc = { "jobID": jobID,
                "jobParams": jobParams,
                "resourceParams": resourceParams,
                "optimizerParams": optimizerParams,
                "extraOptions": self.extraOptions,
                "defaultWrapperLocation": defaultWrapperLocation }
    result = createJobWrapper( log = self.log, logLevel = logLevel, **jobDesc )
    if not result['OK']:
      return result

    wrapperFile = result['Value']
    self.__report( jobID, 'Matched', 'Submitted To CE' )

    self.log.info( 'Submitting JobWrapper %s to %sCE' % ( os.path.basename( wrapperFile ), self.ceName ) )

    # Pass proxy to the CE
    proxy = proxyChain.dumpAllToString()
    if not proxy['OK']:
      self.log.error( 'Invalid proxy', proxy )
      return S_ERROR( 'Payload Proxy Not Found' )

    payloadProxy = proxy['Value']
    submission = self.computingElement.submitJob( wrapperFile, payloadProxy,
                                                  numberOfProcessors = processors,
                                                  wholeNode = wholeNode,
                                                  jobDesc = jobDesc,
                                                  log = self.log,
                                                  logLevel = logLevel )
    ret = S_OK( 'Job submitted' )

    if submission['OK']:
      batchID = submission['Value']
      self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) )
      self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) )
      self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) )
      if 'PayloadFailed' in submission:
        ret['PayloadFailed'] = submission['PayloadFailed']
        return ret
      time.sleep( self.jobSubmissionDelay )
    else:
      self.log.error( 'Job submission failed', jobID )
      self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) )
      if 'ReschedulePayload' in submission:
        rescheduleFailedJob( jobID, submission['Message'] )
        return S_OK()  # Without this job is marked as failed at line 265 above
      else:
        if 'Value' in submission:
          self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % ( str( submission['Value'] ) ) )
      return S_ERROR( '%s CE Error: %s' % ( self.ceName, submission['Message'] ) )

    return ret
Example #2
0
    def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams,
                    jobJDL, proxyChain):
        """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
        result = self.__createJobWrapper(jobID, jobParams, resourceParams,
                                         optimizerParams)

        if not result['OK']:
            return result

        wrapperData = result['Value']
        wrapperFile = wrapperData['execFile']
        self.__report(jobID, 'Matched', 'Submitted To CE')

        wrapperName = os.path.basename(wrapperFile)
        self.log.info('Submitting %s to %sCE' % (wrapperName, self.ceName))

        #Pass proxy to the CE
        proxy = proxyChain.dumpAllToString()
        if not proxy['OK']:
            self.log.error(proxy)
            return S_ERROR('Payload Proxy Not Found')

        payloadProxy = proxy['Value']
        # FIXME: how can we set the batchID before we submit, this makes no sense
        batchID = 'dc%s' % (jobID)
        submission = self.computingElement.submitJob(wrapperFile, payloadProxy,
                                                     wrapperData)

        ret = S_OK('Job submitted')

        if submission['OK']:
            batchID = submission['Value']
            self.log.info('Job %s submitted as %s' % (jobID, batchID))
            self.log.verbose('Set JobParameter: Local batch ID %s' % (batchID))
            self.__setJobParam(jobID, 'LocalBatchID', str(batchID))
            if 'PayloadFailed' in submission:
                ret['PayloadFailed'] = submission['PayloadFailed']
                return ret
            time.sleep(self.jobSubmissionDelay)
        else:
            self.log.error('Job submission failed', jobID)
            self.__setJobParam(jobID, 'ErrorMessage',
                               '%s CE Submission Error' % (self.ceName))
            if 'ReschedulePayload' in submission:
                rescheduleFailedJob(jobID, submission['Message'],
                                    self.__report)
            else:
                if 'Value' in submission:
                    self.log.error(
                        'Error in DIRAC JobWrapper:',
                        'exit code = %s' % (str(submission['Value'])))
                # make sure the Job is declared Failed
                self.__report(jobID, 'Failed', submission['Message'])
            return S_ERROR('%s CE Submission Error: %s' %
                           (self.ceName, submission['Message']))

        return ret
Example #3
0
  def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams, proxyChain ):
    """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
    logLevel = self.am_getOption( 'DefaultLogLevel', 'INFO' )
    defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate',
                                                'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py' )
    result = createJobWrapper( jobID, jobParams, resourceParams, optimizerParams,
                               extraOptions = self.extraOptions, defaultWrapperLocation = defaultWrapperLocation,
                               log = self.log, logLevel = logLevel )
    if not result['OK']:
      return result

    wrapperFile = result['Value']
    self.__report( jobID, 'Matched', 'Submitted To CE' )

    self.log.info( 'Submitting %s to %sCE' % ( os.path.basename( wrapperFile ), self.ceName ) )

    #Pass proxy to the CE
    proxy = proxyChain.dumpAllToString()
    if not proxy['OK']:
      self.log.error( proxy )
      return S_ERROR( 'Payload Proxy Not Found' )

    payloadProxy = proxy['Value']
    # FIXME: how can we set the batchID before we submit, this makes no sense
    batchID = 'dc%s' % ( jobID )
    submission = self.computingElement.submitJob( wrapperFile, payloadProxy )

    ret = S_OK( 'Job submitted' )

    if submission['OK']:
      batchID = submission['Value']
      self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) )
      self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) )
      self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) )
      if 'PayloadFailed' in submission:
        ret['PayloadFailed'] = submission['PayloadFailed']
        return ret
      time.sleep( self.jobSubmissionDelay )
    else:
      self.log.error( 'Job submission failed', jobID )
      self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) )
      if 'ReschedulePayload' in submission:
        rescheduleFailedJob( jobID, submission['Message'], self.__report )
      else:
        if 'Value' in submission:
          self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % ( str( submission['Value'] ) ) )
        # make sure the Job is declared Failed
        self.__report( jobID, 'Failed', submission['Message'] )
      return S_ERROR( '%s CE Submission Error: %s' % ( self.ceName, submission['Message'] ) )

    return ret
Example #4
0
    def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, jobJDL, proxyChain):
        """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
        result = self.__createJobWrapper(jobID, jobParams, resourceParams, optimizerParams)

        if not result["OK"]:
            return result

        wrapperData = result["Value"]
        wrapperFile = wrapperData["execFile"]
        self.__report(jobID, "Matched", "Submitted To CE")

        wrapperName = os.path.basename(wrapperFile)
        self.log.info("Submitting %s to %sCE" % (wrapperName, self.ceName))

        # Pass proxy to the CE
        proxy = proxyChain.dumpAllToString()
        if not proxy["OK"]:
            self.log.error(proxy)
            return S_ERROR("Payload Proxy Not Found")

        payloadProxy = proxy["Value"]
        # FIXME: how can we set the batchID before we submit, this makes no sense
        batchID = "dc%s" % (jobID)
        submission = self.computingElement.submitJob(wrapperFile, payloadProxy, wrapperData)

        ret = S_OK("Job submitted")

        if submission["OK"]:
            batchID = submission["Value"]
            self.log.info("Job %s submitted as %s" % (jobID, batchID))
            self.log.verbose("Set JobParameter: Local batch ID %s" % (batchID))
            self.__setJobParam(jobID, "LocalBatchID", str(batchID))
            if "PayloadFailed" in submission:
                ret["PayloadFailed"] = submission["PayloadFailed"]
                return ret
            time.sleep(self.jobSubmissionDelay)
        else:
            self.log.error("Job submission failed", jobID)
            self.__setJobParam(jobID, "ErrorMessage", "%s CE Submission Error" % (self.ceName))
            if "ReschedulePayload" in submission:
                rescheduleFailedJob(jobID, submission["Message"], self.__report)
            else:
                if "Value" in submission:
                    self.log.error("Error in DIRAC JobWrapper:", "exit code = %s" % (str(submission["Value"])))
                # make sure the Job is declared Failed
                self.__report(jobID, "Failed", submission["Message"])
            return S_ERROR("%s CE Submission Error: %s" % (self.ceName, submission["Message"]))

        return ret
Example #5
0
  def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams, jobJDL, proxyChain ):
    """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
    result = self.__createJobWrapper( jobID, jobParams, resourceParams, optimizerParams )

    if not result['OK']:
      return result

    wrapperFile = result['Value']
    self.__report( jobID, 'Matched', 'Submitted To CE' )

    wrapperName = os.path.basename( wrapperFile )
    self.log.info( 'Submitting %s to %sCE' % ( wrapperName, self.ceName ) )

    #Pass proxy to the CE
    proxy = proxyChain.dumpAllToString()
    if not proxy['OK']:
      self.log.error( proxy )
      return S_ERROR( 'Payload Proxy Not Found' )

    payloadProxy = proxy['Value']
    # FIXME: how can we set the batchID before we submit, this makes no sense
    batchID = 'dc%s' % ( jobID )
    submission = self.computingElement.submitJob( wrapperFile, payloadProxy )

    ret = S_OK( 'Job submitted' )

    if submission['OK']:
      batchID = submission['Value']
      self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) )
      self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) )
      self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) )
      if 'PayloadFailed' in submission:
        ret['PayloadFailed'] = submission['PayloadFailed']
        return ret
      time.sleep( self.jobSubmissionDelay )
    else:
      self.log.error( 'Job submission failed', jobID )
      self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) )
      if 'ReschedulePayload' in submission:
        rescheduleFailedJob( jobID, submission['Message'], self.__report )
      return S_ERROR( '%s CE Submission Error: %s' % ( self.ceName, submission['Message'] ) )

    return ret
Example #6
0
    def __rescheduleFailedJob(self, jobID, message, jobParams, stop=True):
        """
    Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager
    """
        jobReport = JobReport(int(jobID), "JobAgent@%s" % self.siteName)

        rescheduleResult = rescheduleFailedJob(jobID, message, jobReport)

        self.__sendAccountingRecord(rescheduleResult, message, jobParams)

        if rescheduleResult == "Rescheduled":
            self.log.info("Job Rescheduled %s" % (jobID))
            return self.__finish("Job Rescheduled", stop)

        return self.__finish("Problem Rescheduling Job", stop)
Example #7
0
  def __rescheduleFailedJob( self, jobID, message, jobParams, stop = True ):
    """
    Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager
    """
    jobReport = JobReport( int( jobID ), 'JobAgent@%s' % self.siteName )

    rescheduleResult = rescheduleFailedJob( jobID, message, jobReport )

    self.__sendAccountingRecord( rescheduleResult, message, jobParams )

    if rescheduleResult == 'Rescheduled':
      self.log.info( 'Job Rescheduled %s' % ( jobID ) )
      return self.__finish( 'Job Rescheduled', stop )

    return self.__finish( 'Problem Rescheduling Job', stop )
Example #8
0
def execute(arguments):
    """The only real function executed here"""

    global gJobReport

    jobID = arguments["Job"].get("JobID", 0)
    os.environ["JOBID"] = str(jobID)
    jobID = int(jobID)

    if "WorkingDirectory" in arguments:
        wdir = os.path.expandvars(arguments["WorkingDirectory"])
        if os.path.isdir(wdir):
            os.chdir(wdir)
        else:
            try:
                os.makedirs(
                    wdir
                )  # this will raise an exception if wdir already exists (which is ~OK)
                if os.path.isdir(wdir):
                    os.chdir(wdir)
            except OSError as osError:
                if osError.errno == errno.EEXIST and os.path.isdir(wdir):
                    gLogger.exception(
                        "JobWrapperTemplate found that the working directory already exists"
                    )
                    rescheduleResult = rescheduleFailedJob(
                        jobID, "Working Directory already exists")
                else:
                    gLogger.exception(
                        "JobWrapperTemplate could not create working directory"
                    )
                    rescheduleResult = rescheduleFailedJob(
                        jobID, "Could Not Create Working Directory")
                return 1

    gJobReport = JobReport(jobID, "JobWrapper")

    try:
        job = JobWrapper(jobID, gJobReport)
        job.initialize(arguments)  # initialize doesn't return S_OK/S_ERROR
    except Exception as exc:  # pylint: disable=broad-except
        gLogger.exception("JobWrapper failed the initialization phase",
                          lException=exc)
        rescheduleResult = rescheduleFailedJob(
            jobID=jobID,
            minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION,
            jobReport=gJobReport)
        job.sendJobAccounting(
            status=rescheduleResult,
            minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION)
        return 1

    if "InputSandbox" in arguments["Job"]:
        gJobReport.commit()
        try:
            result = job.transferInputSandbox(arguments["Job"]["InputSandbox"])
            if not result["OK"]:
                gLogger.warn(result["Message"])
                raise JobWrapperError(result["Message"])
        except JobWrapperError:
            gLogger.exception("JobWrapper failed to download input sandbox")
            rescheduleResult = rescheduleFailedJob(
                jobID=jobID,
                minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX,
                jobReport=gJobReport)
            job.sendJobAccounting(
                status=rescheduleResult,
                minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX)
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            gLogger.exception(
                "JobWrapper raised exception while downloading input sandbox",
                lException=exc)
            rescheduleResult = rescheduleFailedJob(
                jobID=jobID,
                minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX,
                jobReport=gJobReport)
            job.sendJobAccounting(
                status=rescheduleResult,
                minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX)
            return 1
    else:
        gLogger.verbose("Job has no InputSandbox requirement")

    gJobReport.commit()

    if "InputData" in arguments["Job"]:
        if arguments["Job"]["InputData"]:
            try:
                result = job.resolveInputData()
                if not result["OK"]:
                    gLogger.warn(result["Message"])
                    raise JobWrapperError(result["Message"])
            except JobWrapperError:
                gLogger.exception("JobWrapper failed to resolve input data")
                rescheduleResult = rescheduleFailedJob(
                    jobID=jobID,
                    minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION,
                    jobReport=gJobReport)
                job.sendJobAccounting(
                    status=rescheduleResult,
                    minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION)
                return 1
            except Exception as exc:  # pylint: disable=broad-except
                gLogger.exception(
                    "JobWrapper raised exception while resolving input data",
                    lException=exc)
                rescheduleResult = rescheduleFailedJob(
                    jobID=jobID,
                    minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION,
                    jobReport=gJobReport)
                job.sendJobAccounting(
                    status=rescheduleResult,
                    minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION)
                return 1
        else:
            gLogger.verbose("Job has a null InputData requirement:")
            gLogger.verbose(arguments)
    else:
        gLogger.verbose("Job has no InputData requirement")

    gJobReport.commit()

    try:
        result = job.execute()
        if not result["OK"]:
            gLogger.error("Failed to execute job", result["Message"])
            raise JobWrapperError((result["Message"], result["Errno"]))
    except JobWrapperError as exc:
        if exc.value[1] == 0 or str(exc.value[0]) == "0":
            gLogger.verbose("JobWrapper exited with status=0 after execution")
        if exc.value[1] == DErrno.EWMSRESC:
            gLogger.warn("Asked to reschedule job")
            rescheduleResult = rescheduleFailedJob(
                jobID=jobID,
                minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION,
                jobReport=gJobReport)
            job.sendJobAccounting(
                status=rescheduleResult,
                minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION)
            return 1
        gLogger.exception("Job failed in execution phase")
        gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False)
        gJobReport.setJobStatus(
            status=JobStatus.FAILED,
            minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC,
            sendFlag=False)
        job.sendFailoverRequest()
        job.sendJobAccounting(status=JobStatus.FAILED,
                              minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC)
        return 1
    except Exception as exc:  # pylint: disable=broad-except
        gLogger.exception("Job raised exception during execution phase",
                          lException=exc)
        gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False)
        gJobReport.setJobStatus(
            status=JobStatus.FAILED,
            minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC,
            sendFlag=False)
        job.sendFailoverRequest()
        job.sendJobAccounting(status=JobStatus.FAILED,
                              minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC)
        return 1

    if "OutputSandbox" in arguments["Job"] or "OutputData" in arguments["Job"]:
        try:
            result = job.processJobOutputs()
            if not result["OK"]:
                gLogger.warn(result["Message"])
                raise JobWrapperError(result["Message"])
        except JobWrapperError as exc:
            gLogger.exception("JobWrapper failed to process output files")
            gJobReport.setJobParameter("Error Message",
                                       repr(exc),
                                       sendFlag=False)
            gJobReport.setJobStatus(
                status=JobStatus.FAILED,
                minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS,
                sendFlag=False)
            job.sendFailoverRequest()
            job.sendJobAccounting(
                status=JobStatus.FAILED,
                minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS)

            return 2
        except Exception as exc:  # pylint: disable=broad-except
            gLogger.exception(
                "JobWrapper raised exception while processing output files",
                lException=exc)
            gJobReport.setJobParameter("Error Message",
                                       repr(exc),
                                       sendFlag=False)
            gJobReport.setJobStatus(
                status=JobStatus.FAILED,
                minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS,
                sendFlag=False)
            job.sendFailoverRequest()
            job.sendJobAccounting(
                status=JobStatus.FAILED,
                minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS)
            return 2
    else:
        gLogger.verbose("Job has no OutputData or OutputSandbox requirement")

    try:
        # Failed jobs will return !=0 / successful jobs will return 0
        return job.finalize()
    except Exception as exc:  # pylint: disable=broad-except
        gLogger.exception(
            "JobWrapper raised exception during the finalization phase",
            lException=exc)
        return 2
Example #9
0
def execute(arguments):
    """ The only real function executed here
  """

    global gJobReport

    jobID = arguments['Job']['JobID']
    os.environ['JOBID'] = jobID
    jobID = int(jobID)

    if 'WorkingDirectory' in arguments:
        wdir = os.path.expandvars(arguments['WorkingDirectory'])
        if os.path.isdir(wdir):
            os.chdir(wdir)
        else:
            try:
                os.makedirs(
                    wdir
                )  # this will raise an exception if wdir already exists (which is ~OK)
                if os.path.isdir(wdir):
                    os.chdir(wdir)
            except OSError as osError:
                if osError.errno == errno.EEXIST and os.path.isdir(wdir):
                    gLogger.exception(
                        'JobWrapperTemplate found that the working directory already exists'
                    )
                    rescheduleResult = rescheduleFailedJob(
                        jobID, 'Working Directory already exists')
                else:
                    gLogger.exception(
                        'JobWrapperTemplate could not create working directory'
                    )
                    rescheduleResult = rescheduleFailedJob(
                        jobID, 'Could Not Create Working Directory')
                return 1

    gJobReport = JobReport(jobID, 'JobWrapper')

    try:
        job = JobWrapper(jobID, gJobReport)
        job.initialize(arguments)  # initialize doesn't return S_OK/S_ERROR
    except Exception as exc:  #pylint: disable=broad-except
        gLogger.exception('JobWrapper failed the initialization phase',
                          lException=exc)
        rescheduleResult = rescheduleFailedJob(jobID,
                                               'Job Wrapper Initialization',
                                               gJobReport)
        try:
            job.sendJobAccounting(rescheduleResult,
                                  'Job Wrapper Initialization')
        except Exception as exc:  #pylint: disable=broad-except
            gLogger.exception('JobWrapper failed sending job accounting',
                              lException=exc)
        return 1

    if 'InputSandbox' in arguments['Job']:
        gJobReport.commit()
        try:
            result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except JobWrapperError:
            gLogger.exception('JobWrapper failed to download input sandbox')
            rescheduleResult = rescheduleFailedJob(jobID,
                                                   'Input Sandbox Download',
                                                   gJobReport)
            job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
            return 1
        except Exception as exc:  #pylint: disable=broad-except
            gLogger.exception(
                'JobWrapper raised exception while downloading input sandbox',
                lException=exc)
            rescheduleResult = rescheduleFailedJob(jobID,
                                                   'Input Sandbox Download',
                                                   gJobReport)
            job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
            return 1
    else:
        gLogger.verbose('Job has no InputSandbox requirement')

    gJobReport.commit()

    if 'InputData' in arguments['Job']:
        if arguments['Job']['InputData']:
            try:
                result = job.resolveInputData()
                if not result['OK']:
                    gLogger.warn(result['Message'])
                    raise JobWrapperError(result['Message'])
            except JobWrapperError:
                gLogger.exception('JobWrapper failed to resolve input data')
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Input Data Resolution', gJobReport)
                job.sendJobAccounting(rescheduleResult,
                                      'Input Data Resolution')
                return 1
            except Exception as exc:  #pylint: disable=broad-except
                gLogger.exception(
                    'JobWrapper raised exception while resolving input data',
                    lException=exc)
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Input Data Resolution', gJobReport)
                job.sendJobAccounting(rescheduleResult,
                                      'Input Data Resolution')
                return 1
        else:
            gLogger.verbose('Job has a null InputData requirement:')
            gLogger.verbose(arguments)
    else:
        gLogger.verbose('Job has no InputData requirement')

    gJobReport.commit()

    try:
        result = job.execute(arguments)
        if not result['OK']:
            gLogger.error('Failed to execute job', result['Message'])
            raise JobWrapperError((result['Message'], result['Errno']))
    except JobWrapperError as exc:
        if exc.value[1] == 0 or str(exc.value[0]) == '0':
            gLogger.verbose('JobWrapper exited with status=0 after execution')
        if exc.value[1] == DErrno.EWMSRESC:
            gLogger.warn("Asked to reschedule job")
            rescheduleResult = rescheduleFailedJob(jobID,
                                                   'JobWrapper execution',
                                                   gJobReport)
            job.sendJobAccounting(rescheduleResult, 'JobWrapper execution')
            return 1
        gLogger.exception('Job failed in execution phase')
        gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
        gJobReport.setJobStatus('Failed',
                                'Exception During Execution',
                                sendFlag=False)
        job.sendFailoverRequest('Failed', 'Exception During Execution')
        return 1
    except Exception as exc:  #pylint: disable=broad-except
        gLogger.exception('Job raised exception during execution phase',
                          lException=exc)
        gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
        gJobReport.setJobStatus('Failed',
                                'Exception During Execution',
                                sendFlag=False)
        job.sendFailoverRequest('Failed', 'Exception During Execution')
        return 1

    if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']:
        try:
            result = job.processJobOutputs(arguments)
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except JobWrapperError as exc:
            gLogger.exception('JobWrapper failed to process output files')
            gJobReport.setJobParameter('Error Message',
                                       str(exc),
                                       sendFlag=False)
            gJobReport.setJobStatus('Failed',
                                    'Uploading Job Outputs',
                                    sendFlag=False)
            job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
            return 2
        except Exception as exc:  # pylint: disable=broad-except
            gLogger.exception(
                'JobWrapper raised exception while processing output files',
                lException=exc)
            gJobReport.setJobParameter('Error Message',
                                       str(exc),
                                       sendFlag=False)
            gJobReport.setJobStatus('Failed',
                                    'Uploading Job Outputs',
                                    sendFlag=False)
            job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
            return 2
    else:
        gLogger.verbose('Job has no OutputData or OutputSandbox requirement')

    try:
        # Failed jobs will return 1 / successful jobs will return 0
        return job.finalize()
    except Exception as exc:  #pylint: disable=broad-except
        gLogger.exception(
            'JobWrapper raised exception during the finalization phase',
            lException=exc)
        return 2
Example #10
0
def execute(arguments):
  """ The only real function executed here
  """

  global gJobReport

  jobID = arguments['Job']['JobID']
  os.environ['JOBID'] = jobID
  jobID = int(jobID)

  if 'WorkingDirectory' in arguments:
    wdir = os.path.expandvars(arguments['WorkingDirectory'])
    if os.path.isdir(wdir):
      os.chdir(wdir)
    else:
      try:
        os.makedirs(wdir)  # this will raise an exception if wdir already exists (which is ~OK)
        if os.path.isdir(wdir):
          os.chdir(wdir)
      except OSError as osError:
        if osError.errno == errno.EEXIST and os.path.isdir(wdir):
          gLogger.exception('JobWrapperTemplate found that the working directory already exists')
          rescheduleResult = rescheduleFailedJob(jobID, 'Working Directory already exists')
        else:
          gLogger.exception('JobWrapperTemplate could not create working directory')
          rescheduleResult = rescheduleFailedJob(jobID, 'Could Not Create Working Directory')
        return 1

  gJobReport = JobReport(jobID, 'JobWrapper')

  try:
    job = JobWrapper(jobID, gJobReport)
    job.initialize(arguments)  # initialize doesn't return S_OK/S_ERROR
  except Exception as exc:  # pylint: disable=broad-except
    gLogger.exception('JobWrapper failed the initialization phase', lException=exc)
    rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport)
    try:
      job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization')
    except Exception as exc:  # pylint: disable=broad-except
      gLogger.exception('JobWrapper failed sending job accounting', lException=exc)
    return 1

  if 'InputSandbox' in arguments['Job']:
    gJobReport.commit()
    try:
      result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
      if not result['OK']:
        gLogger.warn(result['Message'])
        raise JobWrapperError(result['Message'])
    except JobWrapperError:
      gLogger.exception('JobWrapper failed to download input sandbox')
      rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
      job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
      return 1
    except Exception as exc:  # pylint: disable=broad-except
      gLogger.exception('JobWrapper raised exception while downloading input sandbox', lException=exc)
      rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport)
      job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
      return 1
  else:
    gLogger.verbose('Job has no InputSandbox requirement')

  gJobReport.commit()

  if 'InputData' in arguments['Job']:
    if arguments['Job']['InputData']:
      try:
        result = job.resolveInputData()
        if not result['OK']:
          gLogger.warn(result['Message'])
          raise JobWrapperError(result['Message'])
      except JobWrapperError:
        gLogger.exception('JobWrapper failed to resolve input data')
        rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
        job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
        return 1
      except Exception as exc:  # pylint: disable=broad-except
        gLogger.exception('JobWrapper raised exception while resolving input data', lException=exc)
        rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport)
        job.sendJobAccounting(rescheduleResult, 'Input Data Resolution')
        return 1
    else:
      gLogger.verbose('Job has a null InputData requirement:')
      gLogger.verbose(arguments)
  else:
    gLogger.verbose('Job has no InputData requirement')

  gJobReport.commit()

  try:
    result = job.execute(arguments)
    if not result['OK']:
      gLogger.error('Failed to execute job', result['Message'])
      raise JobWrapperError((result['Message'], result['Errno']))
  except JobWrapperError as exc:
    if exc.value[1] == 0 or str(exc.value[0]) == '0':
      gLogger.verbose('JobWrapper exited with status=0 after execution')
    if exc.value[1] == DErrno.EWMSRESC:
      gLogger.warn("Asked to reschedule job")
      rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport)
      job.sendJobAccounting(rescheduleResult, 'JobWrapper execution')
      return 1
    gLogger.exception('Job failed in execution phase')
    gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
    gJobReport.setJobStatus(
        'Failed', 'Exception During Execution', sendFlag=False)
    job.sendFailoverRequest('Failed', 'Exception During Execution')
    return 1
  except Exception as exc:  # pylint: disable=broad-except
    gLogger.exception('Job raised exception during execution phase', lException=exc)
    gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
    gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False)
    job.sendFailoverRequest('Failed', 'Exception During Execution')
    return 1

  if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']:
    try:
      result = job.processJobOutputs()
      if not result['OK']:
        gLogger.warn(result['Message'])
        raise JobWrapperError(result['Message'])
    except JobWrapperError as exc:
      gLogger.exception('JobWrapper failed to process output files')
      gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
      gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False)
      job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
      return 2
    except Exception as exc:  # pylint: disable=broad-except
      gLogger.exception('JobWrapper raised exception while processing output files', lException=exc)
      gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False)
      gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False)
      job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
      return 2
  else:
    gLogger.verbose('Job has no OutputData or OutputSandbox requirement')

  try:
    # Failed jobs will return 1 / successful jobs will return 0
    return job.finalize()
  except Exception as exc:  # pylint: disable=broad-except
    gLogger.exception('JobWrapper raised exception during the finalization phase', lException=exc)
    return 2
Example #11
0
def execute ( arguments ):

  global gJobReport

  jobID = arguments['Job']['JobID']
  os.environ['JOBID'] = jobID
  jobID = int( jobID )

  if arguments.has_key( 'WorkingDirectory' ):
    wdir = os.path.expandvars( arguments['WorkingDirectory'] )
    if os.path.isdir( wdir ):
      os.chdir( wdir )
    else:
      try:
        os.makedirs( wdir )
        if os.path.isdir( wdir ):
          os.chdir( wdir )
      except Exception:
        gLogger.exception( 'JobWrapperTemplate could not create working directory' )
        rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
        return 1

  gJobReport = JobReport( jobID, 'JobWrapper' )

  try:
    job = JobWrapper( jobID, gJobReport )
    job.initialize( arguments )
  except Exception:
    gLogger.exception( 'JobWrapper failed the initialization phase' )
    rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
    job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' )
    return 1

  if arguments['Job'].has_key( 'InputSandbox' ):
    gJobReport.commit()
    try:
      result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
      if not result['OK']:
        gLogger.warn( result['Message'] )
        raise JobWrapperError( result['Message'] )
    except Exception:
      gLogger.exception( 'JobWrapper failed to download input sandbox' )
      rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport )
      job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' )
      return 1
  else:
    gLogger.verbose( 'Job has no InputSandbox requirement' )

  gJobReport.commit()

  if arguments['Job'].has_key( 'InputData' ):
    if arguments['Job']['InputData']:
      try:
        result = job.resolveInputData()
        if not result['OK']:
          gLogger.warn( result['Message'] )
          raise JobWrapperError( result['Message'] )
      except Exception, x:
        gLogger.exception( 'JobWrapper failed to resolve input data' )
        rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport )
        job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' )
        return 1
    else:
      gLogger.verbose( 'Job has a null InputData requirement:' )
      gLogger.verbose( arguments )
Example #12
0
    def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams,
                    proxyChain):
        """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
        logLevel = self.am_getOption('DefaultLogLevel', 'INFO')
        defaultWrapperLocation = self.am_getOption(
            'JobWrapperTemplate',
            'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py')
        result = createJobWrapper(
            jobID,
            jobParams,
            resourceParams,
            optimizerParams,
            extraOptions=self.extraOptions,
            defaultWrapperLocation=defaultWrapperLocation,
            log=self.log,
            logLevel=logLevel)
        if not result['OK']:
            return result

        wrapperFile = result['Value']
        self.__report(jobID, 'Matched', 'Submitted To CE')

        self.log.info('Submitting %s to %sCE' %
                      (os.path.basename(wrapperFile), self.ceName))

        # Pass proxy to the CE
        proxy = proxyChain.dumpAllToString()
        if not proxy['OK']:
            self.log.error('Invalid proxy', proxy)
            return S_ERROR('Payload Proxy Not Found')

        payloadProxy = proxy['Value']
        # FIXME: how can we set the batchID before we submit, this makes no sense
        batchID = 'dc%s' % (jobID)
        submission = self.computingElement.submitJob(wrapperFile, payloadProxy)

        ret = S_OK('Job submitted')

        if submission['OK']:
            batchID = submission['Value']
            self.log.info('Job %s submitted as %s' % (jobID, batchID))
            self.log.verbose('Set JobParameter: Local batch ID %s' % (batchID))
            self.__setJobParam(jobID, 'LocalBatchID', str(batchID))
            if 'PayloadFailed' in submission:
                ret['PayloadFailed'] = submission['PayloadFailed']
                return ret
            time.sleep(self.jobSubmissionDelay)
        else:
            self.log.error('Job submission failed', jobID)
            self.__setJobParam(jobID, 'ErrorMessage',
                               '%s CE Submission Error' % (self.ceName))
            if 'ReschedulePayload' in submission:
                rescheduleFailedJob(jobID, submission['Message'])
                return S_OK(
                )  # Without this job is marked as failed at line 265 above
            else:
                if 'Value' in submission:
                    self.log.error(
                        'Error in DIRAC JobWrapper:',
                        'exit code = %s' % (str(submission['Value'])))
                # make sure the Job is declared Failed
                self.__report(jobID, 'Failed', submission['Message'])
            return S_ERROR('%s CE Submission Error: %s' %
                           (self.ceName, submission['Message']))

        return ret
Example #13
0
def execute ( arguments ):

  global gJobReport

  jobID = arguments['Job']['JobID']
  os.environ['JOBID'] = jobID
  jobID = int( jobID )
  # Fix in the environment to get a reasonable performance from dCache,
  # until we move to a new version of root
#  os.environ['DCACHE_RAHEAD'] = str(1)
#  os.environ['DCACHE_RA_BUFFER'] = str(50*1024)

  if arguments.has_key( 'WorkingDirectory' ):
    wdir = os.path.expandvars( arguments['WorkingDirectory'] )
    if os.path.isdir( wdir ):
      os.chdir( wdir )
    else:
      try:
        os.makedirs( wdir )
        if os.path.isdir( wdir ):
          os.chdir( wdir )
      except Exception:
        gLogger.exception( 'JobWrapperTemplate could not create working directory' )
        rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
        return 1

  #root = arguments['CE']['Root']
  gJobReport = JobReport( jobID, 'JobWrapper' )

  try:
    job = JobWrapper( jobID, gJobReport )
    job.initialize( arguments )
  except Exception:
    gLogger.exception( 'JobWrapper failed the initialization phase' )
    rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
    job.sendWMSAccounting( 'Failed', 'Job Wrapper Initialization' )
    return 1

  if arguments['Job'].has_key( 'InputSandbox' ):
    gJobReport.commit()
    try:
      result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
      if not result['OK']:
        gLogger.warn( result['Message'] )
        raise JobWrapperError( result['Message'] )
    except Exception:
      gLogger.exception( 'JobWrapper failed to download input sandbox' )
      rescheduleFailedJob( jobID, 'Input Sandbox Download' )
      job.sendWMSAccounting( 'Failed', 'Input Sandbox Download' )
      return 1
  else:
    gLogger.verbose( 'Job has no InputSandbox requirement' )

  gJobReport.commit()

  if arguments['Job'].has_key( 'InputData' ):
    if arguments['Job']['InputData']:
      try:
        result = job.resolveInputData()
        if not result['OK']:
          gLogger.warn( result['Message'] )
          raise JobWrapperError( result['Message'] )
      except Exception, x:
        gLogger.exception( 'JobWrapper failed to resolve input data' )
        rescheduleFailedJob( jobID, 'Input Data Resolution' )
        job.sendWMSAccounting( 'Failed', 'Input Data Resolution' )
        return 1
    else:
      gLogger.verbose( 'Job has a null InputData requirement:' )
      gLogger.verbose( arguments )
Example #14
0
def execute(arguments):

    global gJobReport

    jobID = arguments['Job']['JobID']
    os.environ['JOBID'] = jobID
    jobID = int(jobID)

    if arguments.has_key('WorkingDirectory'):
        wdir = os.path.expandvars(arguments['WorkingDirectory'])
        if os.path.isdir(wdir):
            os.chdir(wdir)
        else:
            try:
                os.makedirs(wdir)
                if os.path.isdir(wdir):
                    os.chdir(wdir)
            except Exception:
                gLogger.exception(
                    'JobWrapperTemplate could not create working directory')
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Could Not Create Working Directory')
                return 1

    gJobReport = JobReport(jobID, 'JobWrapper')

    try:
        job = JobWrapper(jobID, gJobReport)
        job.initialize(arguments)
    except Exception:
        gLogger.exception('JobWrapper failed the initialization phase')
        rescheduleResult = rescheduleFailedJob(jobID,
                                               'Job Wrapper Initialization',
                                               gJobReport)
        job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization')
        return 1

    if arguments['Job'].has_key('InputSandbox'):
        gJobReport.commit()
        try:
            result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except Exception:
            gLogger.exception('JobWrapper failed to download input sandbox')
            rescheduleResult = rescheduleFailedJob(jobID,
                                                   'Input Sandbox Download',
                                                   gJobReport)
            job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
            return 1
    else:
        gLogger.verbose('Job has no InputSandbox requirement')

    gJobReport.commit()

    if arguments['Job'].has_key('InputData'):
        if arguments['Job']['InputData']:
            try:
                result = job.resolveInputData()
                if not result['OK']:
                    gLogger.warn(result['Message'])
                    raise JobWrapperError(result['Message'])
            except Exception, x:
                gLogger.exception('JobWrapper failed to resolve input data')
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Input Data Resolution', gJobReport)
                job.sendJobAccounting(rescheduleResult,
                                      'Input Data Resolution')
                return 1
        else:
            gLogger.verbose('Job has a null InputData requirement:')
            gLogger.verbose(arguments)
Example #15
0
    def _submitJob(self,
                   jobID,
                   jobParams,
                   resourceParams,
                   optimizerParams,
                   proxyChain,
                   processors=1,
                   wholeNode=False,
                   maxNumberOfProcessors=0,
                   mpTag=False):
        """ Submit job to the Computing Element instance after creating a custom
        Job Wrapper with the available job parameters.
    """
        logLevel = self.am_getOption('DefaultLogLevel', 'INFO')
        defaultWrapperLocation = self.am_getOption(
            'JobWrapperTemplate',
            'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py')
        jobDesc = {
            "jobID": jobID,
            "jobParams": jobParams,
            "resourceParams": resourceParams,
            "optimizerParams": optimizerParams,
            "extraOptions": self.extraOptions,
            "defaultWrapperLocation": defaultWrapperLocation
        }
        result = createJobWrapper(log=self.log, logLevel=logLevel, **jobDesc)
        if not result['OK']:
            return result

        wrapperFile = result['Value']
        self.__report(jobID, 'Matched', 'Submitted To CE')

        self.log.info(
            'Submitting JobWrapper',
            '%s to %sCE' % (os.path.basename(wrapperFile), self.ceName))

        # Pass proxy to the CE
        proxy = proxyChain.dumpAllToString()
        if not proxy['OK']:
            self.log.error('Invalid proxy', proxy)
            return S_ERROR('Payload Proxy Not Found')

        payloadProxy = proxy['Value']
        submission = self.computingElement.submitJob(
            wrapperFile,
            payloadProxy,
            numberOfProcessors=processors,
            maxNumberOfProcessors=maxNumberOfProcessors,
            wholeNode=wholeNode,
            mpTag=mpTag,
            jobDesc=jobDesc,
            log=self.log,
            logLevel=logLevel)
        ret = S_OK('Job submitted')

        if submission['OK']:
            batchID = submission['Value']
            self.log.info('Job submitted', '%s as %s' % (jobID, batchID))
            if 'PayloadFailed' in submission:
                ret['PayloadFailed'] = submission['PayloadFailed']
                return ret
            time.sleep(self.jobSubmissionDelay)
        else:
            self.log.error('Job submission failed', jobID)
            self.__setJobParam(jobID, 'ErrorMessage',
                               '%s CE Submission Error' % (self.ceName))
            if 'ReschedulePayload' in submission:
                rescheduleFailedJob(jobID, submission['Message'])
                return S_OK()  # Without this, the job is marked as failed
            else:
                if 'Value' in submission:
                    self.log.error(
                        'Error in DIRAC JobWrapper:',
                        'exit code = %s' % (str(submission['Value'])))
            return S_ERROR('%s CE Error: %s' %
                           (self.ceName, submission['Message']))

        return ret
Example #16
0
def execute(arguments):

    global gJobReport

    jobID = arguments['Job']['JobID']
    os.environ['JOBID'] = jobID
    jobID = int(jobID)

    if arguments.has_key('WorkingDirectory'):
        wdir = os.path.expandvars(arguments['WorkingDirectory'])
        if os.path.isdir(wdir):
            os.chdir(wdir)
        else:
            try:
                os.makedirs(wdir)
                if os.path.isdir(wdir):
                    os.chdir(wdir)
            except Exception:
                gLogger.exception(
                    'JobWrapperTemplate could not create working directory')
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Could Not Create Working Directory')
                return 1

    gJobReport = JobReport(jobID, 'JobWrapper')

    try:
        job = JobWrapper(jobID, gJobReport)
        job.initialize(arguments)
    except Exception as e:
        gLogger.exception('JobWrapper failed the initialization phase',
                          lException=e)
        rescheduleResult = rescheduleFailedJob(jobID,
                                               'Job Wrapper Initialization',
                                               gJobReport)
        try:
            job.sendJobAccounting(rescheduleResult,
                                  'Job Wrapper Initialization')
        except Exception as e:
            gLogger.exception('JobWrapper failed sending job accounting',
                              lException=e)
        return 1

    if arguments['Job'].has_key('InputSandbox'):
        gJobReport.commit()
        try:
            result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except Exception:
            gLogger.exception('JobWrapper failed to download input sandbox')
            rescheduleResult = rescheduleFailedJob(jobID,
                                                   'Input Sandbox Download',
                                                   gJobReport)
            job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download')
            return 1
    else:
        gLogger.verbose('Job has no InputSandbox requirement')

    gJobReport.commit()

    if arguments['Job'].has_key('InputData'):
        if arguments['Job']['InputData']:
            try:
                result = job.resolveInputData()
                if not result['OK']:
                    gLogger.warn(result['Message'])
                    raise JobWrapperError(result['Message'])
            except Exception as x:
                gLogger.exception('JobWrapper failed to resolve input data')
                rescheduleResult = rescheduleFailedJob(
                    jobID, 'Input Data Resolution', gJobReport)
                job.sendJobAccounting(rescheduleResult,
                                      'Input Data Resolution')
                return 1
        else:
            gLogger.verbose('Job has a null InputData requirement:')
            gLogger.verbose(arguments)
    else:
        gLogger.verbose('Job has no InputData requirement')

    gJobReport.commit()

    try:
        result = job.execute(arguments)
        if not result['OK']:
            gLogger.error('Failed to execute job', result['Message'])
            raise JobWrapperError(result['Message'])
    except Exception as x:
        if str(x) == '0':
            gLogger.verbose('JobWrapper exited with status=0 after execution')
        else:
            gLogger.exception('Job failed in execution phase')
            gJobReport.setJobParameter('Error Message', str(x), sendFlag=False)
            gJobReport.setJobStatus('Failed',
                                    'Exception During Execution',
                                    sendFlag=False)
            job.sendFailoverRequest('Failed', 'Exception During Execution')
            return 1

    if arguments['Job'].has_key('OutputSandbox') or arguments['Job'].has_key(
            'OutputData'):
        try:
            result = job.processJobOutputs(arguments)
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except Exception as x:
            gLogger.exception('JobWrapper failed to process output files')
            gJobReport.setJobParameter('Error Message', str(x), sendFlag=False)
            gJobReport.setJobStatus('Failed',
                                    'Uploading Job Outputs',
                                    sendFlag=False)
            job.sendFailoverRequest('Failed', 'Uploading Job Outputs')
            return 2
    else:
        gLogger.verbose('Job has no OutputData or OutputSandbox requirement')

    try:
        # Failed jobs will return 1 / successful jobs will return 0
        return job.finalize(arguments)
    except Exception:
        gLogger.exception('JobWrapper failed the finalization phase')
        return 2
def execute( arguments ):

  global gJobReport

  jobID = arguments['Job']['JobID']
  os.environ['JOBID'] = jobID
  jobID = int( jobID )

  if arguments.has_key( 'WorkingDirectory' ):
    wdir = os.path.expandvars( arguments['WorkingDirectory'] )
    if os.path.isdir( wdir ):
      os.chdir( wdir )
    else:
      try:
        os.makedirs( wdir )
        if os.path.isdir( wdir ):
          os.chdir( wdir )
      except Exception:
        gLogger.exception( 'JobWrapperTemplate could not create working directory' )
        rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' )
        return 1

  gJobReport = JobReport( jobID, 'JobWrapper' )

  try:
    job = JobWrapper( jobID, gJobReport )
    job.initialize( arguments )
  except Exception as e:
    gLogger.exception( 'JobWrapper failed the initialization phase', lException = e )
    rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport )
    try:
      job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' )
    except Exception as e:
      gLogger.exception( 'JobWrapper failed sending job accounting', lException = e )
    return 1

  if arguments['Job'].has_key( 'InputSandbox' ):
    gJobReport.commit()
    try:
      result = job.transferInputSandbox( arguments['Job']['InputSandbox'] )
      if not result['OK']:
        gLogger.warn( result['Message'] )
        raise JobWrapperError( result['Message'] )
    except Exception:
      gLogger.exception( 'JobWrapper failed to download input sandbox' )
      rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport )
      job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' )
      return 1
  else:
    gLogger.verbose( 'Job has no InputSandbox requirement' )

  gJobReport.commit()

  if arguments['Job'].has_key( 'InputData' ):
    if arguments['Job']['InputData']:
      try:
        result = job.resolveInputData()
        if not result['OK']:
          gLogger.warn( result['Message'] )
          raise JobWrapperError( result['Message'] )
      except Exception as x:
        gLogger.exception( 'JobWrapper failed to resolve input data' )
        rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport )
        job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' )
        return 1
    else:
      gLogger.verbose( 'Job has a null InputData requirement:' )
      gLogger.verbose( arguments )
  else:
    gLogger.verbose( 'Job has no InputData requirement' )

  gJobReport.commit()

  try:
    result = job.execute( arguments )
    if not result['OK']:
      gLogger.error( 'Failed to execute job', result['Message'] )
      raise JobWrapperError( result['Message'] )
  except Exception as x:
    if str( x ) == '0':
      gLogger.verbose( 'JobWrapper exited with status=0 after execution' )
    else:
      gLogger.exception( 'Job failed in execution phase' )
      gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
      gJobReport.setJobStatus( 'Failed', 'Exception During Execution', sendFlag = False )
      job.sendFailoverRequest( 'Failed', 'Exception During Execution' )
      return 1

  if arguments['Job'].has_key( 'OutputSandbox' ) or arguments['Job'].has_key( 'OutputData' ):
    try:
      result = job.processJobOutputs( arguments )
      if not result['OK']:
        gLogger.warn( result['Message'] )
        raise JobWrapperError( result['Message'] )
    except Exception as x:
      gLogger.exception( 'JobWrapper failed to process output files' )
      gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False )
      gJobReport.setJobStatus( 'Failed', 'Uploading Job Outputs', sendFlag = False )
      job.sendFailoverRequest( 'Failed', 'Uploading Job Outputs' )
      return 2
  else:
    gLogger.verbose( 'Job has no OutputData or OutputSandbox requirement' )

  try:
    # Failed jobs will return 1 / successful jobs will return 0
    return job.finalize( arguments )
  except Exception:
    gLogger.exception( 'JobWrapper failed the finalization phase' )
    return 2
Example #18
0
def execute(arguments):

    global gJobReport

    jobID = arguments['Job']['JobID']
    os.environ['JOBID'] = jobID
    jobID = int(jobID)
    # Fix in the environment to get a reasonable performance from dCache,
    # until we move to a new version of root
    #  os.environ['DCACHE_RAHEAD'] = str(1)
    #  os.environ['DCACHE_RA_BUFFER'] = str(50*1024)

    if arguments.has_key('WorkingDirectory'):
        wdir = os.path.expandvars(arguments['WorkingDirectory'])
        if os.path.isdir(wdir):
            os.chdir(wdir)
        else:
            try:
                os.makedirs(wdir)
                if os.path.isdir(wdir):
                    os.chdir(wdir)
            except Exception:
                gLogger.exception(
                    'JobWrapperTemplate could not create working directory')
                rescheduleFailedJob(jobID,
                                    'Could Not Create Working Directory')
                return 1

    #root = arguments['CE']['Root']
    gJobReport = JobReport(jobID, 'JobWrapper')

    try:
        job = JobWrapper(jobID, gJobReport)
        job.initialize(arguments)
    except Exception:
        gLogger.exception('JobWrapper failed the initialization phase')
        rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport)
        job.sendWMSAccounting('Failed', 'Job Wrapper Initialization')
        return 1

    if arguments['Job'].has_key('InputSandbox'):
        gJobReport.commit()
        try:
            result = job.transferInputSandbox(arguments['Job']['InputSandbox'])
            if not result['OK']:
                gLogger.warn(result['Message'])
                raise JobWrapperError(result['Message'])
        except Exception:
            gLogger.exception('JobWrapper failed to download input sandbox')
            rescheduleFailedJob(jobID, 'Input Sandbox Download')
            job.sendWMSAccounting('Failed', 'Input Sandbox Download')
            return 1
    else:
        gLogger.verbose('Job has no InputSandbox requirement')

    gJobReport.commit()

    if arguments['Job'].has_key('InputData'):
        if arguments['Job']['InputData']:
            try:
                result = job.resolveInputData()
                if not result['OK']:
                    gLogger.warn(result['Message'])
                    raise JobWrapperError(result['Message'])
            except Exception, x:
                gLogger.exception('JobWrapper failed to resolve input data')
                rescheduleFailedJob(jobID, 'Input Data Resolution')
                job.sendWMSAccounting('Failed', 'Input Data Resolution')
                return 1
        else:
            gLogger.verbose('Job has a null InputData requirement:')
            gLogger.verbose(arguments)
Example #19
0
    def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, proxyChain):
        """Submit job to the Computing Element instance after creating a custom
       Job Wrapper with the available job parameters.
    """
        logLevel = self.am_getOption("DefaultLogLevel", "INFO")
        defaultWrapperLocation = self.am_getOption(
            "JobWrapperTemplate", "DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py"
        )
        result = createJobWrapper(
            jobID,
            jobParams,
            resourceParams,
            optimizerParams,
            extraOptions=self.extraOptions,
            defaultWrapperLocation=defaultWrapperLocation,
            log=self.log,
            logLevel=logLevel,
        )
        if not result["OK"]:
            return result

        wrapperFile = result["Value"]
        self.__report(jobID, "Matched", "Submitted To CE")

        self.log.info("Submitting %s to %sCE" % (os.path.basename(wrapperFile), self.ceName))

        # Pass proxy to the CE
        proxy = proxyChain.dumpAllToString()
        if not proxy["OK"]:
            self.log.error("Invalid proxy", proxy)
            return S_ERROR("Payload Proxy Not Found")

        payloadProxy = proxy["Value"]
        # FIXME: how can we set the batchID before we submit, this makes no sense
        batchID = "dc%s" % (jobID)
        submission = self.computingElement.submitJob(wrapperFile, payloadProxy)

        ret = S_OK("Job submitted")

        if submission["OK"]:
            batchID = submission["Value"]
            self.log.info("Job %s submitted as %s" % (jobID, batchID))
            self.log.verbose("Set JobParameter: Local batch ID %s" % (batchID))
            self.__setJobParam(jobID, "LocalBatchID", str(batchID))
            if "PayloadFailed" in submission:
                ret["PayloadFailed"] = submission["PayloadFailed"]
                return ret
            time.sleep(self.jobSubmissionDelay)
        else:
            self.log.error("Job submission failed", jobID)
            self.__setJobParam(jobID, "ErrorMessage", "%s CE Submission Error" % (self.ceName))
            if "ReschedulePayload" in submission:
                rescheduleFailedJob(jobID, submission["Message"])
                return S_OK()  # Without this job is marked as failed at line 265 above
            else:
                if "Value" in submission:
                    self.log.error("Error in DIRAC JobWrapper:", "exit code = %s" % (str(submission["Value"])))
                # make sure the Job is declared Failed
                self.__report(jobID, "Failed", submission["Message"])
            return S_ERROR("%s CE Submission Error: %s" % (self.ceName, submission["Message"]))

        return ret