Exemple #1
0
 def get_queryset(self, request):
     qs = super(RunningJobAdmin, self).get_queryset(request)
     return qs.exclude(state__in=[
         Failed.instance().name,
         Completed.instance().name,
         CompletedWithWarning.instance().name
     ])
Exemple #2
0
    def run_all_jobs(first_run=True):
        """
        run all jobs sequentially
        """
        succeed_jobs = 0
        failed_jobs = 0
        ignored_jobs = 0
        error_jobs = 0
        for j in Job.objects.exclude(
                state__in=[Failed.instance().name,
                           Completed.instance().name]).order_by('id'):
            try:
                JobStatemachine.run(j, first_run)
                if j.state == "Completed":
                    if j.launched is None:
                        ignored_jobs += 1
                    else:
                        succeed_jobs += 1
                elif j.state == "Failed":
                    failed_jobs += 1
                else:
                    error_jobs += 1
            except:
                logger.error(
                    "job(id={0},name={1}) runs into a exception{2}".format(
                        j.id, j.publish.name,
                        JobState.get_exception_message()))
                error_jobs += 1

        return (succeed_jobs, failed_jobs, ignored_jobs, error_jobs)
    def run_all_jobs(first_run=True):
        """
        run all jobs sequentially
        """
        succeed_jobs = 0
        failed_jobs = 0
        ignored_jobs = 0
        error_jobs = 0
        for j in Job.objects.exclude(state__in = [Failed.instance().name,Completed.instance().name]).order_by('id'):
            try:
                JobStatemachine.run(j,first_run)
                if j.state == "Completed":
                    if j.launched is None:
                        ignored_jobs += 1
                    else:
                        succeed_jobs += 1
                elif j.state == "Failed":
                    failed_jobs += 1
                else:
                    error_jobs += 1
            except:
                logger.error("job(id={0},name={1}) runs into a exception{2}".format(j.id,j.publish.name,JobState.get_exception_message()))
                error_jobs += 1

        return (succeed_jobs,failed_jobs,ignored_jobs,error_jobs)
Exemple #4
0
 def get_queryset(self, request):
     qs = super(EffectiveJobAdmin, self).get_queryset(request)
     return qs.exclude(state__in=[
         Failed.instance().name,
         Completed.instance().name,
         CompletedWithWarning.instance().name
     ],
                       launched=None)
Exemple #5
0
 def sync_status(self, o):
     if o.state in [
             Completed.instance().name,
             CompletedWithWarning.instance().name
     ] and o.launched:
         return "<a href='/monitor/publishsyncstatus/?q={0}'>Sync status</a>".format(
             o.id)
     else:
         return ""
 def run_all_jobs(first_run=True):
     """
     run all jobs sequentially
     """
     for j in Job.objects.exclude(state__in = [Failed.instance().name,Completed.instance().name]).order_by('id'):
         try:
             JobStatemachine.run(j,first_run)
         except:
             logger.error("job(id={0},name={1}) runs into a exception{2}".format(j.id,j.publish.name,JobState.get_exception_message()))
Exemple #7
0
    def clean(self):
        """
        clean the outdated jobs.
        """
        #import ipdb;ipdb.set_trace()
        #find all publishes which has published at least one time
        outdated_date = None
        if self.expire_days:
            outdated_date = timezone.now() - timedelta(self.expire_days)
            self.logger.info(
                "Begin to clean the jobs finished before {0}, but at least {1} latest successful jobs for each publish will be preserved."
                .format(outdated_date, self.min_jobs))
        else:
            self.logger.info(
                "Begin to clean all jobs, except {1} latest successful jobs for each publish"
                .format(outdated_date, self.min_jobs))

        deleted_jobs = 0
        for p in Publish.objects.filter(job_id__isnull=False):
            #get the earlist job which should be kept.
            earliest_job = None
            try:
                earliest_job = p.job_set.filter(
                    state=Completed.instance().name,
                    launched__isnull=False).order_by('-finished')[self.min_jobs
                                                                  - 1]
            except IndexError:
                #the number of existing jobs is less than min_jobs, no need to clean jobs.
                continue
            jobs = p.job_set.filter(pk__lt=earliest_job.pk)
            if self.expire_days:
                #if spefified expire days, only expired jobs will be deleted
                jobs = jobs.filter(finished__lt=outdated_date)

            #find all the publish's jobs and delete it.
            for j in jobs:
                #check whether this job is referenced by Input or Normalise
                if Input.objects.filter(
                        job_id=j.pk).exists() or Normalise.objects.filter(
                            job_id=j.pk).exists():
                    #still referenced by input or normalise, can not delete
                    continue
                j.delete()
                deleted_jobs += 1
                self.logger.debug("Delete outdated job({0})".format(j.pk))

        if deleted_jobs == 1:
            self.logger.info(
                "{0} outdated job has been deleted.".format(deleted_jobs))
        elif deleted_jobs > 1:
            self.logger.info(
                "{0} outdated job have been deleted.".format(deleted_jobs))
        else:
            self.logger.info("Not find any outdated jobs.")
 def run_all_jobs(first_run=True):
     """
     run all jobs sequentially
     """
     for j in Job.objects.exclude(
             state__in=[Failed.instance().name,
                        Completed.instance().name]).order_by('id'):
         try:
             JobStatemachine.run(j, first_run)
         except:
             logger.error(
                 "job(id={0},name={1}) runs into a exception{2}".format(
                     j.id, j.publish.name,
                     JobState.get_exception_message()))
    def clean(self):
        """
        clean the outdated jobs.
        """
        #find all publishes which has published at least one time
        outdated_date = None
        if self.expire_days:
            outdated_date = timezone.localtime(timezone.now()) - timedelta(self.expire_days)
            self.logger.info("Begin to clean the jobs finished before {0}, but at least {1} latest successful jobs for each publish will be preserved.".format(outdated_date,self.min_jobs))
        else:
            self.logger.info("Begin to clean all jobs, except {1} latest successful jobs for each publish".format(outdated_date,self.min_jobs))

        deleted_jobs = 0
        for p in Publish.objects.filter(job_id__isnull = False):
            #get the earlist job which should be kept.
            earliest_job = None
            try:
                earliest_job = p.job_set.filter(state=Completed.instance().name,launched__isnull=False).order_by('-finished')[self.min_jobs - 1]    
            except IndexError:
                #the number of existing jobs is less than min_jobs, no need to clean jobs.
                continue
            jobs = p.job_set.filter(pk__lt = earliest_job.pk)
            if self.expire_days:
                #if spefified expire days, only expired jobs will be deleted
                jobs = jobs.filter(finished__lt = outdated_date)

            #find all the publish's jobs and delete it.
            for j in jobs:
                #check whether this job is referenced by Input or Normalise
                if Input.objects.filter(job_id=j.pk).exists() or Normalise.objects.filter(job_id=j.pk).exists():
                    #still referenced by input or normalise, can not delete 
                    continue;
                j.delete()
                deleted_jobs += 1
                self.logger.debug("Delete outdated job({0})".format(j.pk))

        outdated_date = timezone.now() - timedelta(7)
        for j in Job.objects.filter(publish__isnull = True,created__lt = outdated_date):
            j.delete()
            deleted_jobs += 1
            self.logger.debug("Delete outdated job({0})".format(j.pk))

        if deleted_jobs == 1:
            self.logger.info("{0} outdated job has been deleted.".format(deleted_jobs))
        elif deleted_jobs > 1:
            self.logger.info("{0} outdated job have been deleted.".format(deleted_jobs))
        else:
            self.logger.info("Not find any outdated jobs.")

        return deleted_jobs
Exemple #10
0
 def sync_status(self,o):
     if o.state == Completed.instance().name and o.launched:
         return "<a href='/monitor/publishsyncstatus/?q={0}'>Sync status</a>".format(o.id)
     else:
         return ""
Exemple #11
0
 def get_queryset(self,request):
     qs = super(EffectiveJobAdmin,self).get_queryset(request)
     return qs.exclude(state__in = [Failed.instance().name,Completed.instance().name],launched=None)
Exemple #12
0
 def get_queryset(self,request):
     qs = super(RunningJobAdmin,self).get_queryset(request)
     return qs.exclude(state__in = [Failed.instance().name,Completed.instance().name])
    def execute(self, job, previous_state):
        """
        The job will continue to wait, if
        1. If the publish is still in a running harvest
        2. some dependent input is failed with the same batch id
        3. some dependent normalise is failed with the same batch id
        4. some dependent input is harvested by other jobs with different batch_id and still dependent by other jobs.
        """
        #import ipdb;ipdb.set_trace()
        if job.publish.running > 0:
            #havest job for the same publish is still running.
            return (HarvestStateOutcome.failed,
                    "Harvest job for the same publish is still running.")
        else:
            result = None
            #if some inputs already failed, then the job will continue to wait
            for o in job.inputs:
                if o.job_batch_id and o.job_batch_id == job.batch_id:
                    #input is already executed by the job belonging to the same job batch
                    if o.job_status:
                        #execute successful
                        pass
                    else:
                        #execute failed
                        try:
                            j = Job.objects.get(pk=o.job_id)
                            if j.state in [
                                    Failed.instance().name,
                                    Completed.instance().name,
                                    CompletedWithWarning.instance().name
                            ]:
                                #failed job already finished. current job can execute
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job execute successfully or cancelled
                                result = (HarvestStateOutcome.failed,
                                          o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute
                            pass
                elif o.job_batch_id:
                    #input is already executed by the job belonging to different job batch,
                    dependent_jobs = []
                    for j in Job.objects.filter(
                            batch_id=o.job_batch_id).exclude(state__in=[
                                Failed.instance().name,
                                Completed.instance().name,
                                CompletedWithWarning.instance().name
                            ]):
                        for i in j.inputs:
                            if i.id == o.id:
                                #input is used by other running jobs, the current job will continue to wait
                                dependent_jobs.append({
                                    "id": j.id,
                                    "batch_id": j.batch_id,
                                    "publish": j.publish.table_name,
                                    "state": j.state
                                })

                    if dependent_jobs:
                        #still have some running harvest job dependents on the inputed data. the current job must wait until all dependent job finished.
                        result = (
                            HarvestStateOutcome.failed,
                            "The dependent input {0} is still used by running jobs {1}"
                            .format(o.name, dependent_jobs))
                        break
                else:
                    #input is not executed before or no job is dependent on it.
                    pass

            if result:
                #already failed
                return result

            #if some normalise already failed, then the job will continue to wait
            for o in job.normalises:
                if o.job_batch_id and o.job_batch_id == job.batch_id:
                    #normalise is already executed
                    if o.job_status:
                        #executed successful
                        pass
                    else:
                        #executed failed
                        try:
                            j = Job.objects.get(pk=o.job_id)
                            if j.state in [
                                    Failed.instance().name,
                                    Completed.instance().name,
                                    CompletedWithWarning.instance().name
                            ]:
                                #failed job already cancelled. current job can execute
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job execute successfully or cancelled
                                result = (HarvestStateOutcome.failed,
                                          o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute
                            pass
                else:
                    #normalise is not executed before
                    pass

            if not result:
                result = (HarvestStateOutcome.succeed, None)

                if job.publish.is_up_to_date(job):
                    #publis is up to date, no need to run.
                    if (job.is_manually_created):
                        result = (
                            HarvestStateOutcome.succeed,
                            "Publish is up to date, but forced by custodian")
                    else:
                        return (
                            HarvestStateOutcome.up_to_date,
                            "Publish is up to date, no need to publish again.")

            return result
    def execute(self, job, previous_state):
        """
        igore the input if it is already imported with the same batchid ,
        failed if some input is failed with the same batchid.
        """
        self._pre_execute(job, previous_state)
        result = None
        job_state = None
        #go through all outdated input tables to import.
        for o in self._input_tables(job, previous_state):
            if o.job_batch_id and o.job_batch_id == job.batch_id:
                #input table already executed by a job belonging to the same batch
                job_state = HarvestState.get_jobstate(o.job_state)
                if job_state == self:
                    #this input table is on the same state.
                    if o.job_status:
                        #already executed successfully
                        continue
                    elif o.job_id == job.id:
                        #faild by the same job. execute it again.
                        pass
                    else:
                        #failed by other job, check whether the failed job is still running or finished.
                        try:
                            j = Job.objects.get(pk=o.job_id)
                            if j.state in [
                                    Failed.instance().name,
                                    Completed.instance().name,
                                    CompletedWithWarning.instance().name
                            ]:
                                #failed job has been failed or completed, current job can execute again
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job cancelled or execute successfully.
                                result = (HarvestStateOutcome.failed,
                                          o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute again.
                            pass
                elif self.is_upstate(job_state):
                    #this input table is on a state after the current state, the current state should have been executed successfully.
                    continue
                else:
                    #this input table is on a state before the current state
                    if o.job_status:
                        #execute the current state
                        pass
                    else:
                        #In general, it is impossible to reach here.
                        #because the logic can go here only when the previous state has been executed successfully.
                        result = (HarvestStateOutcome.failed, o.job_message)
                        break
            #execute
            try:
                result = self._execute(job, previous_state, o)
                if result and result[0] != JobStateOutcome.succeed:
                    #failed
                    o.job_status = False
                    o.job_message = result[1]
                    break
                else:
                    #update the status in input table to prevent other job execute it again
                    o.job_status = True
                    o.job_message = result[
                        1] if result and result[1] else 'Succeed'
            except KeyboardInterrupt:
                result = (HarvestStateOutcome.shutdown,
                          self.get_exception_message())
                #update the status in input table to prevent other job execute it again
                o.job_status = False
                o.job_message = result[1]
                break
            except SystemExit:
                result = (HarvestStateOutcome.shutdown,
                          self.get_exception_message())
                #update the status in input table to prevent other job execute it again
                o.job_status = False
                o.job_message = result[1]
                break
            except:
                result = (HarvestStateOutcome.failed,
                          self.get_exception_message())
                #update the status in input table to prevent other job execute it again
                o.job_status = False
                o.job_message = result[1]
                break
            finally:
                o.job_state = self.name
                o.job_batch_id = job.batch_id
                o.job_id = job.id
                o.save(update_fields=[
                    'job_state', 'job_status', 'job_message', 'job_batch_id',
                    'job_id'
                ])

        if not result:
            result = (HarvestStateOutcome.succeed, None)

        return result
    def execute(self,job,previous_state):
        """
        The job will continue to wait, if
        1. If the publish is still in a running harvest
        2. some dependent input is failed with the same batch id
        3. some dependent normalise is failed with the same batch id
        4. some dependent input is harvested by other jobs with different batch_id and still dependent by other jobs.
        """
        #import ipdb;ipdb.set_trace()
        if job.publish.running > 0:
            #havest job for the same publish is still running.
            return (HarvestStateOutcome.failed, "Harvest job for the same publish is still running.")
        else:
            result = None
            #if some inputs already failed, then the job will continue to wait
            for o in job.inputs:
                if o.job_batch_id and o.job_batch_id == job.batch_id:
                    #input is already executed by the job belonging to the same job batch
                    if o.job_status:
                        #execute successful
                        pass
                    else:
                        #execute failed
                        try:
                            j = Job.objects.get(pk = o.job_id)
                            if j.state in [Failed.instance().name,Completed.instance().name]:
                                #failed job already cancelled. current job can execute
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job execute successfully or cancelled
                                result = (HarvestStateOutcome.failed,o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute
                            pass
                elif o.job_batch_id:
                    #input is already executed by the job belonging to different job batch,
                    dependent_jobs = []
                    for j in Job.objects.filter(batch_id = o.job_batch_id).exclude(state__in = [Failed.instance().name,Completed.instance().name]):
                        for i in j.inputs:
                            if i.id == o.id:
                                #input is used by other running jobs, the current job will continue to wait
                                dependent_jobs.append({"id":i.id, "publish":i.name, "state": i.state})

                    if dependent_jobs:
                        #still have some running harvest job dependents on the inputed data. the current job must wait until all dependent job finished.
                        result = (HarvestStateOutcome.failed,"The dependent input {0} is still used by running jobs {1}".format(o.name, dependent_jobs))
                        break
                else:
                    #input is not executed before or no job is dependent on it.
                    pass

            if result:
                #already failed
                return result

            #if some normalise already failed, then the job will continue to wait
            for o in job.normalises:
                if o.job_batch_id and o.job_batch_id == job.batch_id:
                    #normalise is already executed
                    if o.job_status:
                        #executed successful
                        pass
                    else:
                        #executed failed
                        try:
                            j = Job.objects.get(pk = o.job_id)
                            if j.state in [Failed.instance().name,Completed.instance().name]:
                                #failed job already cancelled. current job can execute
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job execute successfully or cancelled
                                result = (HarvestStateOutcome.failed,o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute
                            pass
                else:
                    #normalise is not executed before
                    pass

            if not result:
                result = (HarvestStateOutcome.succeed,None)

                if job.publish.is_up_to_date(job):
                    #publis is up to date, no need to run.
                    if (job.is_manually_created):
                        result = (HarvestStateOutcome.succeed, "Publish is up to date, but forced by custodian")
                    else:
                        return (HarvestStateOutcome.up_to_date,"Publish is up to date, no need to publish again.")

            return result
    def execute(self,job,previous_state):
        """
        igore the input if it is already imported with the same batchid ,
        failed if some input is failed with the same batchid.
        """
        self._pre_execute(job,previous_state)
        result = None
        job_state = None
        #go through all outdated input tables to import.
        for o in self._input_tables(job,previous_state):
            if o.job_batch_id and o.job_batch_id == job.batch_id:
                #input table already executed by a job belonging to the same batch
                job_state = HarvestState.get_jobstate(o.job_state)
                if job_state == self:
                    #this input table is on the same state.
                    if o.job_status:
                        #already executed successfully
                        continue
                    elif o.job_id == job.id:
                        #faild by the same job. execute it again.
                        pass
                    else:
                        #failed by other job, check whether the failed job is still running or finished.
                        try:
                            j = Job.objects.get(pk=o.job_id)
                            if j.state in [Failed.instance().name,Completed.instance().name]:
                                #failed job has been failed or completed, current job can execute again
                                pass
                            else:
                                #failed job is still running, current job must wait until the failed job cancelled or execute successfully.
                                result = (HarvestStateOutcome.failed,o.job_message)
                                break
                        except:
                            #failed job can not found, current job can execute again.
                            pass
                elif self.is_upstate(job_state):
                    #this input table is on a state after the current state, the current state should have been executed successfully.
                    continue
                else:
                    #this input table is on a state before the current state
                    if o.job_status:
                        #execute the current state
                        pass
                    else:
                        #In general, it is impossible to reach here.
                        #because the logic can go here only when the previous state has been executed successfully.
                        result = (HarvestStateOutcome.failed,o.job_message)
                        break
            #execute
            try:
                result = self._execute(job,previous_state,o)
                if result and result[0] != JobStateOutcome.succeed:
                    #failed
                    o.job_status = False
                    o.job_message = result[1]
                    break
                else:
                    #update the status in input table to prevent other job execute it again
                    o.job_status = True
                    o.job_message = 'Succeed'
            except:
                result = (HarvestStateOutcome.failed, self.get_exception_message())
                #update the status in input table to prevent other job execute it again
                o.job_status = False
                o.job_message = result[1]
                break
            finally:
                o.job_state = self.name
                o.job_batch_id = job.batch_id
                o.job_id = job.id
                o.save(update_fields=['job_state','job_status','job_message','job_batch_id','job_id'])

        if not result:
            result = (HarvestStateOutcome.succeed,None)

        return result