Ejemplo n.º 1
0
    def testB(self):
        """change state test"""
        try:
         JobState.register("jobClassID2","Processing",2,1,"myWorkflowID")
         JobState.create("jobClassID2","cacheDir/location/2somewhere")
         JobState.inProgress("jobClassID2")

         # retries=racers=0
         self.assertEqual(JobState.general("jobClassID2"), {'Retries': 0, 'CacheDirLocation': 'cacheDir/location/2somewhere', 'MaxRacers': 1, 'Racers': 0, 'State': 'inProgress', 'MaxRetries': 2, 'JobType': 'Processing'})

         JobState.submit("jobClassID2")

         # retries0,=racers=1
         self.assertEqual(JobState.general("jobClassID2"),{'Retries': 0, 'CacheDirLocation': 'cacheDir/location/2somewhere', 'MaxRacers': 1, 'Racers': 1, 'State': 'inProgress', 'MaxRetries': 2, 'JobType': 'Processing'})

         JobState.runFailure("jobClassID2","jobInstanceID2.1",
              "some.location2.1","job/Report/Location2.1.xml")

         # retries= 1, racers=0
         self.assertEqual(JobState.general("jobClassID2"),
              {'CacheDirLocation': 'cacheDir/location/2somewhere', 
               'MaxRacers': 1, 'Racers': 0, 'State': 'inProgress', 
               'MaxRetries': 2, 'Retries': 1, 'JobType': 'Processing'})

         JobState.submit("jobClassID2")

         # retries= 1, racers=1
         self.assertEqual(JobState.general("jobClassID2"),{'Retries': 1L, 'CacheDirLocation': 'cacheDir/location/2somewhere', 'MaxRacers': 1L, 'Racers': 1L, 'State': 'inProgress', 'MaxRetries': 2L, 'JobType': 'Processing'})

        except StandardError, ex:
            msg = "Failed State Change TestB:\n"
            msg += str(ex)
            self.fail(msg)
Ejemplo n.º 2
0
    def testE(self):
        try:
         JobState.register("jobClassID5","Processing",2,2,"myWorkflowID")
         JobState.create("jobClassID5","cacheDir/location/5somewhere")
         JobState.inProgress("jobClassID5")
         JobState.submit("jobClassID5")

        # now introduce some failures until we have more failures
        # then retries (this raises an error)

         JobState.runFailure("jobClassID5","jobInstanceID5.1",
              "some.location5.1","job/Report/Location5.1.xml")
         try:
              JobState.runFailure("jobClassID5","jobInstanceID5.2",
                   "some.location5.1","job/Report/Location5.1.xml")
         except ProdException, ex:
              print('>>>Test succeeded for exception 1/1 in testE of JobState_t.py\n')
         JobState.finished("jobClassID5")
Ejemplo n.º 3
0
            JobState.inProgress("jobClassID1")
         except ProdException, ex:
            print('>>>Test succeeded for exception 3/3 in testA of JobState_t.py\n')
         JobState.create("jobClassID1","cacheDir/location/1somewhere")
         JobState.inProgress("jobClassID1")
         # retries=racers=0;
         self.assertEqual(JobState.general("jobClassID1"), {'Retries': 0, 'CacheDirLocation': 'cacheDir/location/1somewhere', 'MaxRacers': 1, 'Racers': 0, 'State': 'inProgress', 'MaxRetries': 3, 'JobType': 'Processing'}
)


         JobState.submit("jobClassID1")

         # retries=0, racers=1;
         self.assertEqual(JobState.general("jobClassID1"), {'Retries': 0L, 'CacheDirLocation': 'cacheDir/location/1somewhere', 'MaxRacers': 1L, 'Racers': 1L, 'State': 'inProgress', 'MaxRetries': 3L, 'JobType': 'Processing'})

         JobState.runFailure("jobClassID1","jobInstanceID1.1",
              "some.location1.1","job/Report/Location1.1.xml")
         JobState.submit("jobClassID1")
        except StandardError, ex:
            msg = "Failed State Change TestA:\n"
            msg += str(ex)
            self.fail(msg)
        Session.commit_all()
        Session.close_all()



    def testB(self):
        """change state test"""
        try:
         JobState.register("jobClassID2","Processing",2,1,"myWorkflowID")
         JobState.create("jobClassID2","cacheDir/location/2somewhere")
Ejemplo n.º 4
0
    def handleError(self,payload):
         """
         The payload of a job failure is a url to the job report
         """
         jobReportUrl= payload
         # prepare to retrieve the job report file.
         # NOTE: we assume that the report file has a relative unique name
         # NOTE: if that is not the case we need to add a unique identifier to it.
         slash = jobReportUrl.rfind('/')
         fileName = jobReportUrl[slash+1:]
         urllib.urlretrieve(jobReportUrl, \
              self.args['jobReportLocation']+'/'+fileName)
         logging.debug(">RunFailureHandler<:Retrieving job report from %s " % jobReportUrl)

         jobReport=readJobReport(self.args['jobReportLocation']+'/'+fileName)
         #NOTE: is this the right way to extract the job id.
         jobId=jobReport[0].jobSpecId
         logging.debug(">RunFailureHandler<:Retrieving jobId from job report "+\
                       "(used to dynamically load error handler) " \
                       "jobId="+str(jobId))

         # create the jobReportLocation jobId hierarchy if not exists.
         pipe=os.popen("mkdir -p "+self.args['jobReportLocation']+'/'+jobId)
         pipe.close()
         # move the report file to this new location.
         pipe=os.popen("mv "+self.args['jobReportLocation']+'/'+fileName+" "+ \
                       self.args['jobReportLocation']+'/'+jobId)
         logging.debug(">RunFailureHandler<:Moving job report to permanent storage: " \
                       +self.args['jobReportLocation']+'/'+jobId)
         pipe.close()


         reportLocation=self.args['jobReportLocation']+'/'+ \
                           jobId+'/'+fileName
         generalInfo=JobState.general(jobId)
         # a submit event with delay
         delay=int(self.args['DelayFactor'])*(int(generalInfo['Retries']+1))
         delay=convertSeconds(delay) 
         logging.debug(">RunFailureHandler<: re-submitting with delay (h:m:s) "+\
                      str(delay))

         if self.args['ReportAction'] == 'move' :
            # count how many files are in the dir (to generate unique ids
            # when moving files
            try:
               lastID = len(os.listdir(os.path.dirname(payload)))
               target = os.path.join(os.path.dirname(payload),\
               os.path.basename(payload).split('.')[0] +\
               str(lastID) +\
               '.xml')
               logging.debug('Moving file: '+ payload + ' to: ' + target)
               shutil.move(payload,target)
            except:
               pass
         try:
              JobState.runFailure(jobId,jobReportLocation= reportLocation)

              # check the cache dir size. If it is beyond the threshold, purge it.
              dirSizeBytes=dirSize(generalInfo['CacheDirLocation'],0,0,0)
              dirSizeMegaBytes=convertSize(dirSizeBytes,'m')
              logging.debug(">RunFailureHandler<:Cache dir. size is "+\
                            str(dirSizeMegaBytes)+" MB. Maximum allowed is "+\
                            str(self.maxCacheDirSizeMB)+" MB ")
              jobspecfile="%s/%s-JobSpec.xml" % (generalInfo['CacheDirLocation'],jobId)
              # if necessary first a partial cleanup is done, which after it
              # is finished publishes the proper event.

              # retrieve the number of retries and publish
              if(float(dirSizeMegaBytes)>float(self.maxCacheDirSizeMB)):
                  newPayload=jobId+",SubmitJob,"+jobId+","+str(delay)
                  logging.debug(">RunFailureHandler<: Reached maximum cache size. "+\
                      "Performing partial cache cleanup first.")
                  self.publishEvent("PartialJobCleanup",newPayload,delay)
              else:
                  logging.debug(">RunFailureHandler<:Registered "+\
                                "a job run failure,"\
                                "publishing a submit job event")
                  if self.args['QueueFailures']:
                      JobQueueAPI.reQueueJob(jobId)
                  else:
                      self.publishEvent("SubmitJob",jobspecfile,delay)

         except ProdException,ex:
              if(ex["ErrorNr"]==3013): 
                  logging.debug(">RunFailureHandler<:Registered "+\
                  "a job run failure "+ \
                  "Maximum number of retries reached!" +\
                  " Submitting a failure job and cleanup event ")
                  JobState.failed(jobId)
                  self.publishEvent("FailureCleanup",(jobId))
                  self.publishEvent("GeneralJobFailure",(jobId))