def dispatch(self, job, run_times): # none sense to run in repeat if len(run_times) > 1: run_times = run_times[-1:] run_time = run_times[0] # allow hosts hosts = re.split('[,;,; \\r\\n]', job.conf.hosts) # 里面有两个中文字符, 2 chinese symbol included if self._local_hostname not in hosts: logging.error('jog ignored. local ip %s not in hosts %s' % (self._local_hostname, job.conf.hosts )) return DispatchCode.IGNORED #logging.error('run_time %s' % run_time) # run in just one of these hosts if job.conf.host_strategy == 1: if not self._zk.client_state == KazooState.CONNECTED: msg = 'zk state is %s at host %s' % (self._zk.client_state, self.ip ) logging.error(msg) add_log(job.conf, run_time=datetime_to_utc_timestamp(run_time), output=msg) return DispatchCode.FAIL_TO_DISPATCH else: if self._zk.exists(self._get_job_running_path(job)): data, stat = self._zk.get(self._get_job_running_path(job)) logging.info('job ignored cause of exist_strategy=1 and the last running still going on. id=%s name=%s run_time=%s host=%s zk data:%s' % (job.id, job.conf.name, run_time, self.ip, data.decode("utf-8") )) run_time = None return DispatchCode.IGNORED else: try: self._zk.ensure_path(self._zk_root) self._zk.create(self._get_job_running_path(job), json.dumps({'job': job.id, 'host': self._local_hostname , 'run_time': run_time}), ephemeral=True) except Exception, e: logging.error('job ignored cause of fail to create zk ephemeral node. id=%s name=%s run_time=%s host=%s zk path:%s' % (job.id, job.conf.name, run_time, self.ip, self._get_job_running_path(job) )) run_time = None return DispatchCode.FAIL_TO_DISPATCH
def _get_jobs(self, *conditions): jobs = [] selectable = select([x for x in self.wm_jobs_t.c]) # selectable = selectable.order_by(self.wm_jobs_t.c.next_run_time) selectable = selectable.where(*conditions).where(self.wm_jobs_t.c.status == 1) if conditions else selectable failed_job_ids = set() for row in self.engine.execute(selectable): try: jobs.append(self._reconstitute_job(row)) except: logging.exception('Unable to restore job "%s" -- removing it', row.id) failed_job_ids.add(row.id) # Remove all the jobs we failed to restore if failed_job_ids: # delete = self.jobs_t.delete().where(self.jobs_t.c.id.in_(failed_job_ids)) # logic delete msg = 'job %s update status to 2 cause of failing to _reconstitute_job' % ','.join(list(failed_job_ids)) logging.error(msg) update = self.wm_jobs_t.update().where(self.wm_jobs_t.c.id.in_(failed_job_ids)).values(status='2') self.engine.execute(update) # TODO ... add history here from apscheduler.history import add_log conf = JobConf() conf.id = 0 conf.cmd = ' ' add_log(conf, output=msg) return jobs
def _get_jobs(self, *conditions): jobs = [] selectable = select([x for x in self.wm_jobs_t.c]) # selectable = selectable.order_by(self.wm_jobs_t.c.next_run_time) selectable = selectable.where(*conditions).where( self.wm_jobs_t.c.status == 1) if conditions else selectable failed_job_ids = set() for row in self.engine.execute(selectable): try: jobs.append(self._reconstitute_job(row)) except: logging.exception('Unable to restore job "%s" -- removing it', row.id) failed_job_ids.add(row.id) # Remove all the jobs we failed to restore if failed_job_ids: # delete = self.jobs_t.delete().where(self.jobs_t.c.id.in_(failed_job_ids)) # logic delete msg = 'job %s update status to 2 cause of failing to _reconstitute_job' % ','.join( list(failed_job_ids)) logging.error(msg) update = self.wm_jobs_t.update().where( self.wm_jobs_t.c.id.in_(failed_job_ids)).values(status='2') self.engine.execute(update) # TODO ... add history here from apscheduler.history import add_log conf = JobConf() conf.id = 0 conf.cmd = ' ' add_log(conf, output=msg) return jobs
def dispatch(self, job, run_times): # none sense to run in repeat if len(run_times) > 1: run_times = run_times[-1:] run_time = run_times[0] # allow hosts hosts = re.split( '[,;,; \\r\\n]', job.conf.hosts) # 里面有两个中文字符, 2 chinese symbol included if self._local_hostname not in hosts: logging.error('jog ignored. local ip %s not in hosts %s' % (self._local_hostname, job.conf.hosts)) return DispatchCode.IGNORED #logging.error('run_time %s' % run_time) # run in just one of these hosts if job.conf.host_strategy == 1: if not self._zk.client_state == KazooState.CONNECTED: msg = 'zk state is %s at host %s' % (self._zk.client_state, self.ip) logging.error(msg) add_log(job.conf, run_time=datetime_to_utc_timestamp(run_time), output=msg) return DispatchCode.FAIL_TO_DISPATCH else: if self._zk.exists(self._get_job_running_path(job)): data, stat = self._zk.get(self._get_job_running_path(job)) logging.info( 'job ignored cause of exist_strategy=1 and the last running still going on. id=%s name=%s run_time=%s host=%s zk data:%s' % (job.id, job.conf.name, run_time, self.ip, data.decode("utf-8"))) run_time = None return DispatchCode.IGNORED else: try: self._zk.ensure_path(self._zk_root) self._zk.create(self._get_job_running_path(job), json.dumps({ 'job': job.id, 'host': self._local_hostname, 'run_time': run_time }), ephemeral=True) except Exception, e: logging.error( 'job ignored cause of fail to create zk ephemeral node. id=%s name=%s run_time=%s host=%s zk path:%s' % (job.id, job.conf.name, run_time, self.ip, self._get_job_running_path(job))) run_time = None return DispatchCode.FAIL_TO_DISPATCH
def _process_jobs(self): """ Iterates through jobs in every jobstore, starts jobs that are due and figures out how long to wait for the next round. """ if not self._executor: logging.error('_executor init error') raise SchedulerInitError() if not self._dispatcher: logging.error('_dispatcher init error') raise SchedulerInitError() if not self._jobstore: logging.error('_jobstore init error') raise SchedulerInitError() logging.debug('Looking for jobs to run') now = datetime.now(self.timezone) next_wakeup_time = None with self._jobstore_lock: for job in self._jobstore.get_due_jobs(now): run_times = job._get_run_times(now) #run_times = run_times[-1:] if run_times and job.coalesce else run_times # 0, restore just one time. 1. restore for every lack run_times = run_times[-1:] if len( run_times ) > 1 and job.conf.restore_strategy == 0 else run_times #logging.info('run_times after store_strategy processed: %s %s' % (run_times, job.conf.restore_strategy)) if run_times: code = None try: logging.info( "dispatch_job jobs %s cmd=%s run_time=%s" % (job.id, job.conf.cmd, run_times[-1])) code = self._dispatcher.dispatch(job, run_times) except MaxInstancesReachedError: logging.warning( 'Execution of job "%s" skipped: maximum number of running instances reached (%d)', job, job.max_instances) except Exception, e: logging.exception( 'Error dispatch job "%s" to dispatcher "%s"', job.id, str(e)) if code == DispatchCode.DONE: # Update the job if it has a next execution time. Otherwise remove it from the job store. job_next_run = job.trigger.get_next_fire_time( run_times[-1], now) #print('jjjjjjjjjjjjjjjjjjj job_id = %s job_next_run=%s previous_fire_time=%s, now=%s' % (job.id, job_next_run, run_times[-1], now)) if job_next_run: job._modify(next_run_time=job_next_run) self._jobstore.update_job(job) else: self._jobstore.remove_job(job.id) from apscheduler.history import add_log add_log( job.conf, output= 'job will NOT be run any more, so remove it.') logging.warn( 'job will NOT be run any more, so remove it. job_id=%s' % job.conf.id) # Set a new next wakeup time if there isn't one yet or the jobstore has an even earlier one jobstore_next_run_time = self._jobstore.get_next_run_time() logging.debug("jobstore_next_run_time %s " % jobstore_next_run_time) if jobstore_next_run_time and ( next_wakeup_time is None or jobstore_next_run_time < next_wakeup_time): next_wakeup_time = jobstore_next_run_time
def _process_jobs(self): """ Iterates through jobs in every jobstore, starts jobs that are due and figures out how long to wait for the next round. """ if not self._executor: logging.error('_executor init error') raise SchedulerInitError() if not self._dispatcher: logging.error('_dispatcher init error') raise SchedulerInitError() if not self._jobstore: logging.error('_jobstore init error') raise SchedulerInitError() logging.debug('Looking for jobs to run') now = datetime.now(self.timezone) next_wakeup_time = None with self._jobstore_lock: for job in self._jobstore.get_due_jobs(now): run_times = job._get_run_times(now) #run_times = run_times[-1:] if run_times and job.coalesce else run_times # 0, restore just one time. 1. restore for every lack run_times = run_times[-1:] if len(run_times) > 1 and job.conf.restore_strategy == 0 else run_times #logging.info('run_times after store_strategy processed: %s %s' % (run_times, job.conf.restore_strategy)) if run_times: code = None try: logging.info("dispatch_job jobs %s cmd=%s run_time=%s" % (job.id, job.conf.cmd, run_times[-1]) ) code = self._dispatcher.dispatch(job, run_times) except MaxInstancesReachedError: logging.warning( 'Execution of job "%s" skipped: maximum number of running instances reached (%d)', job, job.max_instances) except Exception, e: logging.exception('Error dispatch job "%s" to dispatcher "%s"', job.id, str(e)) if code == DispatchCode.DONE: # Update the job if it has a next execution time. Otherwise remove it from the job store. job_next_run = job.trigger.get_next_fire_time(run_times[-1], now) #print('jjjjjjjjjjjjjjjjjjj job_id = %s job_next_run=%s previous_fire_time=%s, now=%s' % (job.id, job_next_run, run_times[-1], now)) if job_next_run: job._modify(next_run_time=job_next_run) self._jobstore.update_job(job) else: self._jobstore.remove_job(job.id) from apscheduler.history import add_log add_log(job.conf, output='job will NOT be run any more, so remove it.') logging.warn('job will NOT be run any more, so remove it. job_id=%s' % job.conf.id) # Set a new next wakeup time if there isn't one yet or the jobstore has an even earlier one jobstore_next_run_time = self._jobstore.get_next_run_time() logging.debug("jobstore_next_run_time %s " % jobstore_next_run_time) if jobstore_next_run_time and (next_wakeup_time is None or jobstore_next_run_time < next_wakeup_time): next_wakeup_time = jobstore_next_run_time