class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): addEvent("schedule.cron", self.cron) addEvent("schedule.interval", self.interval) addEvent("schedule.remove", self.remove) self.sched = Sched(misfire_grace_time=60) self.sched.start() self.started = True def remove(self, identifier): for cron_type in ["intervals", "crons"]: try: self.sched.unschedule_job(getattr(self, cron_type)[identifier]["job"]) log.debug("%s unscheduled %s", (cron_type.capitalize(), identifier)) except: pass def doShutdown(self): self.stop() return super(Scheduler, self).doShutdown() def stop(self): if self.started: log.debug("Stopping scheduler") self.sched.shutdown() log.debug("Scheduler stopped") self.started = False def cron(self, identifier="", handle=None, day="*", hour="*", minute="*"): log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s', (identifier, day, hour, minute)) self.remove(identifier) self.crons[identifier] = { "handle": handle, "day": day, "hour": hour, "minute": minute, "job": self.sched.add_cron_job(handle, day=day, hour=hour, minute=minute), } def interval(self, identifier="", handle=None, hours=0, minutes=0, seconds=0): log.info( "Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s", (identifier, hours, minutes, seconds) ) self.remove(identifier) self.intervals[identifier] = { "handle": handle, "hours": hours, "minutes": minutes, "seconds": seconds, "job": self.sched.add_interval_job(handle, hours=hours, minutes=minutes, seconds=seconds), }
class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): addEvent('schedule.cron', self.cron) addEvent('schedule.interval', self.interval) addEvent('schedule.start', self.start) addEvent('schedule.restart', self.start) addEvent('app.load', self.start) addEvent('app.shutdown', self.stop) self.sched = Sched(misfire_grace_time=60) def remove(self, identifier): for type in ['interval', 'cron']: try: self.sched.unschedule_job( getattr(self, type)[identifier]['job']) log.debug('%s unscheduled %s' % (type.capitalize(), identifier)) except: pass def start(self): # Stop all running self.stop() # Crons for identifier in self.crons: try: self.remove(identifier) cron = self.crons[identifier] job = self.sched.add_cron_job(cron['handle'], day=cron['day'], hour=cron['hour'], minute=cron['minute']) cron['job'] = job except ValueError, e: log.error("Failed adding cronjob: %s" % e) # Intervals for identifier in self.intervals: try: self.remove(identifier) interval = self.intervals[identifier] job = self.sched.add_interval_job(interval['handle'], hours=interval['hours'], minutes=interval['minutes'], seconds=interval['seconds'], repeat=interval['repeat']) interval['job'] = job except ValueError, e: log.error("Failed adding interval cronjob: %s" % e)
class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): addEvent('schedule.cron', self.cron) addEvent('schedule.interval', self.interval) addEvent('schedule.remove', self.remove) self.sched = Sched(misfire_grace_time = 60) self.sched.start() self.started = True def remove(self, identifier): for cron_type in ['intervals', 'crons']: try: self.sched.unschedule_job(getattr(self, cron_type)[identifier]['job']) log.debug('%s unscheduled %s', (cron_type.capitalize(), identifier)) except: pass def doShutdown(self): super(Scheduler, self).doShutdown() self.stop() def stop(self): if self.started: log.debug('Stopping scheduler') self.sched.shutdown() log.debug('Scheduler stopped') self.started = False def cron(self, identifier = '', handle = None, day = '*', hour = '*', minute = '*'): log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s', (identifier, day, hour, minute)) self.remove(identifier) self.crons[identifier] = { 'handle': handle, 'day': day, 'hour': hour, 'minute': minute, 'job': self.sched.add_cron_job(handle, day = day, hour = hour, minute = minute) } def interval(self, identifier = '', handle = None, hours = 0, minutes = 0, seconds = 0): log.info('Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s', (identifier, hours, minutes, seconds)) self.remove(identifier) self.intervals[identifier] = { 'handle': handle, 'hours': hours, 'minutes': minutes, 'seconds': seconds, 'job': self.sched.add_interval_job(handle, hours = hours, minutes = minutes, seconds = seconds) }
class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): addEvent("schedule.cron", self.cron) addEvent("schedule.interval", self.interval) addEvent("schedule.start", self.start) addEvent("schedule.restart", self.start) addEvent("app.load", self.start) self.sched = Sched(misfire_grace_time=60) def remove(self, identifier): for type in ["interval", "cron"]: try: self.sched.unschedule_job(getattr(self, type)[identifier]["job"]) log.debug("%s unscheduled %s", (type.capitalize(), identifier)) except: pass def start(self): # Stop all running self.stop() # Crons for identifier in self.crons: try: self.remove(identifier) cron = self.crons[identifier] job = self.sched.add_cron_job(cron["handle"], day=cron["day"], hour=cron["hour"], minute=cron["minute"]) cron["job"] = job except ValueError, e: log.error("Failed adding cronjob: %s", e) # Intervals for identifier in self.intervals: try: self.remove(identifier) interval = self.intervals[identifier] job = self.sched.add_interval_job( interval["handle"], hours=interval["hours"], minutes=interval["minutes"], seconds=interval["seconds"], ) interval["job"] = job except ValueError, e: log.error("Failed adding interval cronjob: %s", e)
class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): sl = logging.getLogger('apscheduler.scheduler') sl.disabled = True addEvent('schedule.cron', self.cron) addEvent('schedule.interval', self.interval) addEvent('schedule.start', self.start) addEvent('schedule.restart', self.start) addEvent('app.load', self.start) addEvent('app.shutdown', self.stop) self.sched = Sched(misfire_grace_time = 60) def remove(self, identifier): for type in ['interval', 'cron']: try: self.sched.unschedule_job(getattr(self, type)[identifier]['job']) log.debug('%s unscheduled %s' % (type.capitalize(), identifier)) except: pass def start(self): # Stop all running self.stop() # Crons for identifier in self.crons: try: self.remove(identifier) cron = self.crons[identifier] job = self.sched.add_cron_job(cron['handle'], day = cron['day'], hour = cron['hour'], minute = cron['minute']) cron['job'] = job except ValueError, e: log.error("Failed adding cronjob: %s" % e) # Intervals for identifier in self.intervals: try: self.remove(identifier) interval = self.intervals[identifier] job = self.sched.add_interval_job(interval['handle'], hours = interval['hours'], minutes = interval['minutes'], seconds = interval['seconds'], repeat = interval['repeat']) interval['job'] = job except ValueError, e: log.error("Failed adding interval cronjob: %s" % e)
def recordAndRegulateTemp(number_of_hours,temperature,csvWriter): sched = Scheduler() sched.start() job = sched.add_interval_job(my_job, minutes=5, args = [temperature,csvWriter,temps]) start_time = time.time() while time.time() - start_time < (3600*int(number_of_hours)): text = "time left: " + str(round((3600*int(number_of_hours)) - (time.time()-start_time),0))+ " seconds\n" sys.stdout.write(text); sys.stdout.flush() # print "temp list: " + str(temps) time.sleep(60) sched.unschedule_job(job)
class TimeScheduler: instance = None def __init__(self): ''' ''' @staticmethod def getInstance(): if TimeScheduler.instance is None: TimeScheduler.instance = TimeScheduler() return TimeScheduler.instance def init(self,threadpool = None): if threadpool is None : self.sched = Scheduler({'apscheduler.threadpool.core_threads':1, 'apscheduler.threadpool.max_threads':1, 'apscheduler.threadpool.keepalive':1}) else: self.sched = Scheduler({'apscheduler.threadpool':threadpool}) self.sched.daemonic = False def registerCronExp(self,handler,year=None, month=None, day=None, hour=None, minute=None, second=None, start_date=None): return self.sched.add_cron_job(handler.execute,year, month, day, None,None, hour, minute, second,None) def registerCron(self, handler ,year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None): return self.sched.add_cron_job(handler.execute,year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None) ''' register interval task ''' def registerInterval(self, handler,weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None): return self.sched.add_interval_job(handler.execute,weeks,days,hours, minutes, seconds,start_date) def registerDate(self, handler,date): return self.sched.add_date_job(handler.execute,date) def unregister(self,job): self.sched.unschedule_job(job) def start(self): self.sched.start()
def recordAndRegulateTemp(number_of_hours, temperature, csvWriter): sched = Scheduler() sched.start() job = sched.add_interval_job(my_job, minutes=5, args=[temperature, csvWriter, temps]) start_time = time.time() while time.time() - start_time < (3600 * int(number_of_hours)): text = "time left: " + str( round((3600 * int(number_of_hours)) - (time.time() - start_time), 0)) + " seconds\n" sys.stdout.write(text) sys.stdout.flush() # print "temp list: " + str(temps) time.sleep(60) sched.unschedule_job(job)
class Job_Manager(object): def __init__(self, config): self.scheduler = Scheduler(config["SCHEDULER"]) if self.scheduler is not None: self.scheduler.start() def add_job(self, task, interval, name, *args): args = args if args is not None else None self.scheduler.add_interval_job(task, seconds=interval, args=args, name=name, max_instances=50) def remove_job(self, name): matchedJobs = self.__get_jobs(name) self.__remove_jobs(matchedJobs) def __get_jobs(self, name): return [job for job in self.scheduler.get_jobs() if job.name == name] def __remove_jobs(self, matchedJobs): for job in matchedJobs: self.scheduler.unschedule_job(job)
class MyScheduler: EVENTS = { '1': 'EVENT_SCHEDULER_START', '2': 'EVENT_SCHEDULER_SHUTDOWN', '3': 'EVENT_JOBSTORE_ADDED', '4': 'EVENT_JOBSTORE_REMOVED', '5': 'EVENT_JOBSTORE_JOB_ADDED', '32': 'EVENT_JOBSTORE_JOB_REMOVED', '64': 'EVENT_JOB_EXECUTED', '128': 'EVENT_JOB_ERROR', '256': 'EVENT_JOB_MISSED' } def __init__(self, db_path='sqlite:///scheduler.db'): self.scheduler = Scheduler() self.scheduler.add_jobstore(SQLAlchemyJobStore(url=db_path), 'default') def start(self): self.scheduler.start() def add_job(self, job, date, args): job = self.scheduler.add_date_job(job, date, args) print job def jobs(self): return self.scheduler.get_jobs() def remove_job(self, notfication_id): jobs = self.jobs() for job in jobs: if int(job.args[0]) == int(notfication_id): self.scheduler.unschedule_job(job) return True return False def shutdown(self): self.scheduler.shutdown()
class SchedulerService(object): def __init__(self, config, task): self.config = config self.task = task self.task_thread = Scheduler() self.job = None def run(self): self.task.validate() self.task_thread.add_listener(self.reconfigure_interval, EVENT_JOB_EXECUTED) self.task_thread.add_listener(self.reconfigure_interval, EVENT_JOB_ERROR) self.task_thread.start() self.reconfigure_interval(None) def reconfigure_interval(self, event): if event: self.task_thread.unschedule_job(event.job) new_interval = self.task.get_new_interval() log.debug("=== interval for job:'" + str(self.task) + "' set to :'" + str(new_interval) + "'===") self.job = self.task_thread.add_interval_job(self.task.do, seconds=new_interval)
class SchedulerDaemon(Daemon): def __init__(self, pid, config): super( SchedulerDaemon, self ).__init__(pid) self.config = config # set DaemonArgs for CommandDispatcher daemonArgs = DaemonArgs(config) # setup logger self.logger = None if os.path.exists(daemonArgs.log_file): logging.config.fileConfig(daemonArgs.log_file) self.logger = logging.getLogger('framework') # sftp settings self.sftpHost = self.config.get("sftp", "host") self.sftpPort = int(self.config.get("sftp", "port")) self.sftpRemotePath = self.config.get("sftp", "remote_path") self.sftpUsername = self.config.get("sftp", "username") self.sftpPassword = self.config.get("sftp", "password") or None self.sftpPrivateKey = self.config.get("sftp", "pkey") or None self.sftpPrivateKeyPassword = self.config.get("sftp", "pkey_password") or None self.sftpPrivateKeyType = self.config.get("sftp", "pkey_type") or None if self.sftpPrivateKeyType.lower() != 'rsa' \ and self.sftpPrivateKeyType.lower() != 'dss': self.sftpPrivateKeyType = None self.jobSubmitInterval = int(self.config.get("scheduler", "jobsubmit_interval")) or 10 self.jobCleanupInterval = int(self.config.get("scheduler", "jobcleanup_interval")) or 30 self.scheduler = Scheduler(daemonic=True) self.cronScheduleSequence = ('minute', 'hour', 'day', 'month', 'day_of_week') @transaction.commit_on_success def saveJob(self, status, frameworkJobId, scheduledJob): now = datetime.now() newJob = None #create new job if frameworkJobId is not None: newJob, created = Job.objects.get_or_create( frameworkid=frameworkJobId ) newJob.name = scheduledJob.job_name newJob.started = now newJob.workflow = scheduledJob.workflow newJob.is_public = scheduledJob.is_public newJob.owner = scheduledJob.created_by newJob.schedule = scheduledJob newJob.status = status else: newJob = Job( name=scheduledJob.job_name, started = now, workflow = scheduledJob.workflow, is_public = scheduledJob.is_public, owner = scheduledJob.created_by, schedule = scheduledJob, status = status ) newJob.save() @transaction.commit_on_success def submitJobToFramework(self, **kwargs): jobCommand = 'job' daemonArgs = DaemonArgs(self.config) daemonArgs.command = jobCommand unScheduledJob = kwargs['unScheduledJob'] is_fileFeeder = False fileFeederUploadedFile = None del daemonArgs.param[:] # go through all parameters for parameter in unScheduledJob.parameters.all(): # add parameter to daemonArgs.param if parameter.service and parameter.param_key and parameter.param_value: # check if a file feeder is used if parameter.service == settings.FILE_FEEDER_ID: is_fileFeeder = True fileFeederUploadedFile = parameter.param_value remoteFeederFile = os.path.join(self.sftpRemotePath, parameter.param_value) parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, remoteFeederFile ) else: parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, parameter.param_value ) self.logger.debug("add parameter string: %s" % parameterString) daemonArgs.param.append([parameterString]) # in case of a filefeeder upload file to framework server if is_fileFeeder: self.logger.debug("is file feeder") sftp = None transport = None try: transport = Transport((self.sftpHost, self.sftpPort)) if self.sftpPassword: transport.connect(username=self.sftpUsername, password=self.sftpPassword) else: privateKey = None if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'rsa': privateKey = RSAKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword ) if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'dss': privateKey = DSSKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword ) transport.connect(username=self.sftpUsername, pkey=privateKey) sftp = SFTPClient.from_transport(transport) filePath = os.path.join( settings.MEDIA_ROOT, fileFeederUploadedFile ) remotePath = os.path.join( self.sftpRemotePath, fileFeederUploadedFile ) self.logger.debug("uploading file from %s to %s on remote machine" % (filePath, remotePath)) sftp.put(filePath, remotePath) # sftp.put(filePath, remotePath, confirm=False) sftp.chmod( remotePath, 0644 ) self.logger.debug("put OK") except IOError as e: self.logger.error("IOError: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except PasswordRequiredException as e: self.logger.error("PasswordRequiredException: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except SSHException as e: self.logger.error("SSH Exception: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except Exception as e: self.logger.error("Unkown SFTP problem. Will continue with next scheduled job. %s" % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) finally: if sftp is not None: sftp.close() if transport is not None: transport.close() # set job workflow daemonArgs.jd_workflow = unScheduledJob.workflow.name frameworkJobId = None try: setattr(daemonArgs, jobCommand, 'submit') frameworkJobId = self.sendFrameworkCommand(jobCommand, daemonArgs) self.saveJob(Job.PROCESSING_STATUS, frameworkJobId, unScheduledJob) except WorkflowNotDeployedException: # The workflow is not deployed in the framework. To prevent the scheduler retrying continuously # we disable this job unScheduledJob.status = Schedule.DEACTIVATE_STATUS unScheduledJob.save() except: self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) finally: daemonArgs.clean(jobCommand) if unScheduledJob.scheduled_start is not None: unScheduledJob.status = Schedule.DEACTIVATED_STATUS unScheduledJob.save() def updateProcessingJobs(self): jobCommand = 'job' processingJobs = Job.objects.filter(status=Job.PROCESSING_STATUS) daemonArgs = DaemonArgs(self.config) if len(list(processingJobs)) != 0: jobs_dict = {} try: setattr(daemonArgs, jobCommand, 'list') jobs_dict = self.sendFrameworkCommand(jobCommand, daemonArgs) except: return finally: daemonArgs.clean(jobCommand) for processingJob in processingJobs: if processingJob.frameworkid in jobs_dict \ and int(processingJob.status) != int(jobs_dict[processingJob.frameworkid]): try: setattr(daemonArgs, jobCommand, 'details') setattr(daemonArgs, 'gjd_id', processingJob.frameworkid) job_details = self.sendFrameworkCommand(jobCommand, daemonArgs) except: continue finally: daemonArgs.clean(jobCommand) daemonArgs.clean('gjd_id') processingJob.status = jobs_dict[processingJob.frameworkid] processingJob.finished = job_details['job_end_time'] processingJob.save() elif processingJob.frameworkid not in jobs_dict: processingJob.status = Job.COMPLETED_STATUS processingJob.finished = None processingJob.save() def checkJobs(self): scheduledJobs = self.scheduler.get_jobs() # remove scheduled jobs which are set to be deleted or deactivated deleteAndDeactivateJobs = Schedule.objects.filter( Q(status=Schedule.DELETE_STATUS) | Q(status=Schedule.DEACTIVATE_STATUS) ) for deleteAndDeactivateJob in deleteAndDeactivateJobs: for scheduledJob in scheduledJobs: if scheduledJob.name == deleteAndDeactivateJob.job_name: self.scheduler.unschedule_job(scheduledJob) deleteAndDeactivateJob.status = Schedule.DEACTIVATED_STATUS\ if deleteAndDeactivateJob.status == Schedule.DEACTIVATE_STATUS\ else Schedule.DELETED_STATUS deleteAndDeactivateJob.save() # add/update unscheduled jobs split_re = re.compile("\s+") unScheduledJobs = Schedule.objects.filter( Q(status=Schedule.NEW_STATUS) | Q(status=Schedule.UPDATE_STATUS) ) for unScheduledJob in unScheduledJobs: if unScheduledJob.status == Schedule.UPDATE_STATUS: for scheduledJob in scheduledJobs: if scheduledJob.name == unScheduledJob.job_name: self.scheduler.unschedule_job(scheduledJob) if unScheduledJob.scheduled_start is not None: schedule = { 'kwargs': { 'unScheduledJob': unScheduledJob }, 'name': unScheduledJob.job_name } try: newJob = self.scheduler.add_date_job(self.submitJobToFramework, unScheduledJob.scheduled_start, **schedule) self.logger.debug( 'Job will run on %s' % newJob.next_run_time ) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: unScheduledJob.status = Schedule.ACTIVE_STATUS unScheduledJob.save() else: cronList = split_re.split(unScheduledJob.cron_expression) schedule = dict(itertools.izip(self.cronScheduleSequence, cronList)) schedule['kwargs'] = { 'unScheduledJob': unScheduledJob } schedule['name'] = unScheduledJob.job_name try: newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule) self.logger.debug( 'First run of job will be on %s' % newJob.next_run_time ) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: unScheduledJob.status = Schedule.ACTIVE_STATUS unScheduledJob.save() def cleanup(self): try: self.updateProcessingJobs() except Exception as e: self.logger.error("Unknown error while updating processing jobs: %s" % str(e)) raise Exception def onNotification(self, eventType, body): if eventType == 'JobFinished': # sleep is added, because a failing job can be quicker than # Django save the frameworkid of that job time.sleep(1) event = JobFinished() event.ParseFromString(body) self.logger.debug('Job with ID %s is finished with status %s', str(event.job), str(event.status)) Job.objects.update() finishedJob = Job.objects.get(frameworkid=event.job) finishedJob.status = event.status finishedJob.finished = datetime.now() finishedJob.save() return True def run(self): self.logger.info('Started scheduler') # add active schedules to scheduler split_re = re.compile("\s+") scheduledJobs = Schedule.objects.filter( status=Schedule.ACTIVE_STATUS ) for scheduledJob in scheduledJobs: if scheduledJob.scheduled_start is not None: schedule = { 'kwargs': { 'unScheduledJob': scheduledJob }, 'name': scheduledJob.job_name } try: newJob = self.scheduler.add_date_job(self.submitJobToFramework, scheduledJob.scheduled_start, **schedule) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: cronList = split_re.split(scheduledJob.cron_expression) schedule = dict(itertools.izip(self.cronScheduleSequence, cronList)) schedule['kwargs'] = { 'unScheduledJob': scheduledJob } schedule['name'] = scheduledJob.job_name try: newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception # add job scheduling mechanism and cleanup to scheduler and start scheduler try: self.scheduler.add_interval_job(self.checkJobs, seconds=self.jobSubmitInterval) self.scheduler.add_interval_job(self.cleanup, minutes=self.jobCleanupInterval) self.scheduler.start() except Exception as e: self.logger.error("Unknown error while initializing scheduler: %s" % str(e)) raise Exception # initialize bus instance for receiving job notifications try: notificationBus = Bus.createConfigurableBus(self.logger, self.config, 'notifications') notificationBus.openFwChannel() notificationBus.attachToMonitoring(self.onNotification) notificationBus.close() except BusException, e: self.logger.error("Cannot connect to HSN2 Bus because '%s'" % e) raise Exception except BusTimeoutException, e: self.logger.error("Response timeout") raise Exception
class LocalScheduler(object): scheduler_registry = {} _lockdown = False @classmethod def get(cls, name): return cls.scheduler_registry[name] @classmethod def get_all(cls): return cls.scheduler_registry.values() @classmethod def shutdown_all(cls): for scheduler in cls.scheduler_registry.values(): scheduler.stop() @classmethod def lockdown(cls): cls._lockdown = True @classmethod def clear_all(cls): for scheduler in cls.scheduler_registry.values(): scheduler.clear() def __init__(self, name, label=None): self.scheduled_jobs = {} self._scheduler = None self.name = name self.label = label self.__class__.scheduler_registry[self.name] = self def start(self): logger.info('Starting scheduler: %s' % self.name) if not self.__class__._lockdown: self._scheduler = OriginalScheduler() for job in self.scheduled_jobs.values(): self._schedule_job(job) self._scheduler.start() else: logger.debug('lockdown in effect') def stop(self): if self._scheduler: self._scheduler.shutdown() del self._scheduler self._scheduler = None @property def running(self): if self._scheduler: return self._scheduler.running else: return False def clear(self): for job in self.scheduled_jobs.values(): self.stop_job(job) def stop_job(self, job): if self.running: self._scheduler.unschedule_job(job._job) del(self.scheduled_jobs[job.name]) job.scheduler = None def _schedule_job(self, job): if isinstance(job, IntervalJob): job._job = self._scheduler.add_interval_job(job.function, *job.args, **job.kwargs) elif isinstance(job, DateJob): job._job = self._scheduler.add_date_job(job.function, *job.args, **job.kwargs) elif isinstance(job, CronJob): job._job = self._scheduler.add_cron_job(job.function, *job.args, **job.kwargs) else: raise UnknownJobClass def add_job(self, job): logger.debug('adding job') if job.scheduler or job.name in self.scheduled_jobs.keys(): raise AlreadyScheduled if self._scheduler: self._schedule_job(job) job.scheduler = self self.scheduled_jobs[job.name] = job def add_interval_job(self, name, label, function, *args, **kwargs): job = IntervalJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def add_date_job(self, name, label, function, *args, **kwargs): job = DateJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def add_cron_job(self, name, label, function, *args, **kwargs): job = CronJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def get_job_list(self): return self.scheduled_jobs.values() def get_job_by_name(self, name): try: return self.scheduled_jobs[name] except KeyError: raise UnknownJob def __unicode__(self): return unicode(self.label or self.name)
class Controller: def __init__(self): # Start the scheduler self.sched = Scheduler() self.sched.start() # set default turn on and turn off times # default to everyday self.daysLabel=dayLabels[0] self.days=dayOptions[self.daysLabel] # turn on at 7am self.turnOnHour = 7 self.turnOnMin = 0 self.DisplayOnJob = self.sched.add_cron_job(self.displayPowerOn, day_of_week=self.days, hour=self.turnOnHour, minute=self.turnOnMin) # turn off at 7pm self.turnOffHour = 19 self.turnOffMin = 0 self.DisplayOffJob = self.sched.add_cron_job(self.displayPowerOff, day_of_week=self.days, hour=self.turnOffHour, minute=self.turnOffMin) # print the menu self.printMenu() def printMenu(self): print(""" Timer Test Menu 1. Set Turn On/Off Days 2. Set Turn On Time 3. Set Turn Off Time 4. Get On-Off Times 5. Quit/Exit """) # get the selection self.main_selection = input("Please select: ") print("\n") if self.main_selection == '1': print('Current Turn On/Off days:',self.daysLabel) print('1. Daily') print('2. WeekDays') self.newDays = input("Select which days to use: ") # validate entry if int(self.newDays)==1 or int(self.newDays)==2: self.daysLabel = dayLabels[int(self.newDays)-1] self.days = dayOptions[self.daysLabel] # cancel old jobs and start new ones self.schedDisplayOn() self.schedDisplayOff() print('New Turn On/Off days:', self.daysLabel) else: print('Invalid entry') self.printMenu() elif self.main_selection == '2': print('Current Turn On time ', str(self.turnOnHour), ':', str(self.turnOnMin).zfill(2), sep='') self.newTurnOnHour = input("Enter new turn on hour (in 24 hour clock): ") # validate hour entry if int(self.newTurnOnHour) < 24 and int(self.newTurnOnHour) >= 0: self.newTurnOnMin = input("Enter new turn on minute: ") # validate min entry if int(self.newTurnOnMin) < 60 and int(self.newTurnOnMin) >= 0: # assign new hour self.turnOnHour = int(self.newTurnOnHour) # assign new minute self.turnOnMin = int(self.newTurnOnMin) # cancel old job and start new one self.schedDisplayOn() # print new turn on time print('New Turn On time ', str(self.turnOnHour), ':', str(self.turnOnMin).zfill(2), sep='') else: print('Invalid Turn On Min') else: print('Invalid Turn On Hour') self.printMenu() elif self.main_selection == '3': print('Current Turn Off time ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='') self.newTurnOffHour = input("Enter new turn off hour (in 24 hour clock): ") # validate hour entry if int(self.newTurnOffHour) < 24 and int(self.newTurnOffHour) >= 0: self.newTurnOffMin = input("Enter new turn off minute: ") # validate min entry if int(self.newTurnOffMin) < 60 and int(self.newTurnOffMin) >= 0: # assign new hour self.turnOffHour = int(self.newTurnOffHour) # assign new minute self.turnOffMin = int(self.newTurnOffMin) # cancel old job and start new one self.schedDisplayOff() # print new turn off time print('New Turn Off time ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='') else: print('Invalid Turn Off Min') else: print('Invalid Turn Off Hour') self.printMenu() elif self.main_selection == '4': print('Turn On ',self.daysLabel,' at ',str(self.turnOnHour),':',str(self.turnOnMin).zfill(2), sep='') print('Turn Off ',self.daysLabel,' at ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='') self.sched.print_jobs() self.printMenu() elif self.main_selection == '5': sys.exit() else: print("Invalid selection.\n") self.printMenu() def displayPowerOn(self): print("Display On") def displayPowerOff(self): print("Display Off") def schedDisplayOn(self): # cancel the old job self.sched.unschedule_job(self.DisplayOnJob) # schedule the new job self.DisplayOnJob = self.sched.add_cron_job(self.displayPowerOn, day_of_week=self.days, hour=self.turnOnHour, minute=self.turnOnMin) def schedDisplayOff(self): # cancel the old job self.sched.unschedule_job(self.DisplayOffJob) # schedule the new job self.DisplayOffJob = self.sched.add_cron_job(self.displayPowerOff, day_of_week=self.days, hour=self.turnOffHour, minute=self.turnOffMin)
class TNActionScheduler (TNArchipelPlugin): """ This plugin allows to create scheduled actions. """ def __init__(self, configuration, entity, entry_point_group): """ Initialize the plugin. @type configuration: Configuration object @param configuration: the configuration @type entity: L{TNArchipelEntity} @param entity: the entity that owns the plugin @type entry_point_group: string @param entry_point_group: the group name of plugin entry_point """ TNArchipelPlugin.__init__(self, configuration=configuration, entity=entity, entry_point_group=entry_point_group) self.scheduler = Scheduler() self.scheduler.start() self.database = sqlite3.connect(self.configuration.get("SCHEDULER", "database"), check_same_thread=False) self.database.execute("create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)") self.database.commit() self.cursor = self.database.cursor() self.restore_jobs() self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause") self.supported_actions_for_hypervisor = ("alloc", "free") # permissions self.entity.permission_center.create_permission("scheduler_jobs", "Authorizes user to get the list of task", False) self.entity.permission_center.create_permission("scheduler_schedule", "Authorizes user to schedule a task", False) self.entity.permission_center.create_permission("scheduler_unschedule", "Authorizes user to unschedule a task", False) self.entity.permission_center.create_permission("scheduler_actions", "Authorizes user to get available actions", False) # hooks if self.entity.__class__.__name__ == "TNArchipelVirtualMachine": self.entity.register_hook("HOOK_VM_TERMINATE", method=self.vm_terminate) ### Plugin interface def register_handlers(self): """ This method will be called by the plugin user when it will be necessary to register module for listening to stanza. """ self.entity.xmppclient.RegisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) def unregister_handlers(self): """ Unregister the handlers. """ self.entity.xmppclient.UnregisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) @staticmethod def plugin_info(): """ Return informations about the plugin. @rtype: dict @return: dictionary contaning plugin informations """ plugin_friendly_name = "Action Scheduler" plugin_identifier = "action_scheduler" plugin_configuration_section = "SCHEDULER" plugin_configuration_tokens = ["database"] return { "common-name" : plugin_friendly_name, "identifier" : plugin_identifier, "configuration-section" : plugin_configuration_section, "configuration-tokens" : plugin_configuration_tokens } ### Persistance def delete_job(self, uid): """ Remove a job from the database. @type uid: string @param uid: the uid of the job to remove """ self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, )) self.database.commit() def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None): """ Save a job in the database. @type uid: string @param uid: the uid of the job @type action: string @param action: the action @type year: string @param year: year of execution @type month: string @param month: month of execution @type day: string @param day: day of execution @type hour: string @param hour: hour of execution @type minute: string @param minute: minute of execution @type second: string @param second: second of execution @type comment: string @param comment: comment about the job @type params: string @param params: random parameter of the job """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (entity_uid, uid, action, year, month, day, hour, minute, second, comment, params, )) self.database.commit() def restore_jobs(self): """ Restore the jobs from the database. """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid, )) for values in self.cursor: try: entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second) self.scheduler.add_cron_job(self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment]) except Exception as ex: self.entity.log.error("unable to restore a job: %s" % str(ex)) def vm_terminate(self, origin, user_info, arguments): """ Close the database connection. @type origin: TNArchipelEntity @param origin: the origin of the hook @type user_info: object @param user_info: random user information @type arguments: object @param arguments: runtime argument """ self.database.close() ### Jobs def get_jod_with_uid(self, uid): """ Get a job with given uid. @type uid: string @param uid: the uid of the job """ if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: if str(job.args[1]) == uid: return job return None def do_job_for_vm(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "create": self.entity.create() elif action == "shutdown": self.entity.shutdown() elif action == "destroy": self.entity.destroy() elif action == "suspend": self.entity.suspend() elif action == "resume": self.entity.resume() elif action == "pause": if self.entity.libvirt_status == 1: self.entity.suspend() elif self.entity.libvirt_status == 3: self.entity.resume() elif action == "migrate": pass job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") def do_job_for_hypervisor(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "alloc": self.entity.alloc() elif action == "free": pass #self.entity.free() job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") ### Process IQ def process_iq(self, conn, iq): """ This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received. It understands IQ of type: - jobs - schedule - unschedule @type conn: xmpp.Dispatcher @param conn: ths instance of the current connection that send the stanza @type iq: xmpp.Protocol.Iq @param iq: the received IQ """ reply = None action = self.entity.check_acp(conn, iq) self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_") if action == "schedule": reply = self.iq_schedule(iq) elif action == "unschedule": reply = self.iq_unschedule(iq) elif action == "jobs": reply = self.iq_jobs(iq) elif action == "actions": reply = self.iq_actions(iq) if reply: conn.send(reply) raise xmpp.protocol.NodeProcessed def iq_schedule(self, iq): """ Schedule a task. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") job = iq.getTag("query").getTag("archipel").getAttr("job") entityClass = self.entity.__class__.__name__ param = None if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm: raise Exception("action %s is not valid" % job) elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor: raise Exception("action %s is not valid" % job) year = iq.getTag("query").getTag("archipel").getAttr("year") month = iq.getTag("query").getTag("archipel").getAttr("month") day = iq.getTag("query").getTag("archipel").getAttr("day") hour = iq.getTag("query").getTag("archipel").getAttr("hour") minute = iq.getTag("query").getTag("archipel").getAttr("minute") second = iq.getTag("query").getTag("archipel").getAttr("second") comment = iq.getTag("query").getTag("archipel").getAttr("comment") if iq.getTag("query").getTag("archipel").has_attr("param"): param = iq.getTag("query").getTag("archipel").getAttr("param") uid = str(uuid.uuid1()) str_date = "%s-%s-%s @ %s : %02d : %02d" % (year, month, day, hour, int(minute), int(second)) if entityClass == "TNArchipelVirtualMachine": func = self.do_job_for_vm elif entityClass == "TNArchipelHypervisor": func = self.do_job_for_hypervisor self.scheduler.add_cron_job(func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param]) self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param) self.entity.push_change("scheduler", "scheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_jobs(self, iq): """ Get jobs. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") nodes = [] if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: job_node = xmpp.Node(tag="job", attrs={"action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3]}) nodes.append(job_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_unschedule(self, iq): """ Unschedule a job. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") uid = iq.getTag("query").getTag("archipel").getAttr("uid") the_job = self.get_jod_with_uid(uid) if not the_job: raise Exception("job with uid %s doesn't exists" % uid) self.delete_job(uid) self.scheduler.unschedule_job(the_job) self.entity.push_change("scheduler", "unscheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_actions(self, iq): """ Get available actions. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": actions = self.supported_actions_for_vm elif entityClass == "TNArchipelHypervisor": actions = self.supported_actions_for_hypervisor nodes = [] for action in actions: action_node = xmpp.Node(tag="action") action_node.setData(action) nodes.append(action_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply
# this is the heart of the program: # send email to let me know I'm brewing send_email("starting brew log.", password, graph=False) # get list of temps: list_of_temps = sys.argv[2::2] # get lengths of time for each of those temperatures list_of_times = sys.argv[3::2] # convert to ints list_of_temps = map(int, list_of_temps) list_of_times = map(int, list_of_times) print "list of temps" print list_of_temps print "list of times" print list_of_times for i in range(0, len(list_of_times)): send_email("changing temperature to " + str(list_of_temps[i]) + " for " + str(list_of_times[i]) + " hours.", password, graph=False) recordAndRegulateTemp(list_of_times[i], list_of_temps[i], writer) print "program done. fermenter shutting down." send_email("ending. fermenter is shutting off", password, graph=True) email_sched.unschedule_job(send_email) io.output(power_pin, False)
class HypeScheduler(object): """Wraps APScheduler with some conveniences.""" def __init__(self, local_tz: str = None): """Constructor. Args: local_tz: The local timezone the scheduler is running in. """ self._scheduler = Scheduler() self._local_tz = local_tz self.StartScheduler() def StartScheduler(self): if self._scheduler and not self._scheduler.running: self._scheduler.start() def InSeconds(self, seconds: int, fn: Callable, *args, **kwargs) -> Job: """Schedule function to run in given seconds. Args: seconds: How many seconds to wait before scheduling function. fn: Function to call. *args: Arguments to pass to function. **kwargs: Keyworded arguments to pass to function. Returns: APScheduler Job. """ schedule_time = arrow.now().shift(seconds=seconds) # APScheduler 2.1.2 doesn't understand timezones. return self._scheduler.add_date_job(fn, schedule_time.naive, args=args, kwargs=kwargs) def DailyCallback(self, schedule_time: arrow.Arrow, fn: Callable, *args, **kwargs) -> Job: """Schedules fn to be run once a day at schedule_time. The actual scheduled time is perturbed randomly +/-30s unless the kwarg '_jitter' is set to False. Args: schedule_time: An Arrow object specifying when to run fn. fn: The function to be run. *args: Arguments to pass to fn. **kwargs: Keyworded arguments to pass to fn. Special kwargs listed below: _jitter - {int} How many seconds to perturb scheduling time by, in both directions. Defaults to 30s. Returns: APScheduler Job. """ if self._local_tz: schedule_time = schedule_time.to(self._local_tz) jitter = kwargs.get('_jitter', 30) if jitter: jitter_secs = random.randint(-jitter, jitter) schedule_time = schedule_time.shift(seconds=jitter_secs) kwargs.pop('_jitter', None) # APScheduler 2.1.2 doesn't understand timezones. return self._scheduler.add_interval_job(fn, args=args, kwargs=kwargs, start_date=schedule_time.naive, days=1) def FixedRate(self, initial_delay: int, period: int, fn: Callable, *args, **kwargs) -> Job: """Schedules a recurring task at a fixed rate. Args: initial_delay: Seconds to wait before scheduling first instance. period: Interval in seconds between subsequent instances. fn: The function to run. *args: Arguments to pass to fn. **kwargs: Keyworded arguments to pass to fn. Returns: APScheduler Job. """ start_time = arrow.now().shift(seconds=initial_delay) # APScheduler 2.1.2 doesn't understand timezones. return self._scheduler.add_interval_job(fn, args=args, kwargs=kwargs, start_date=start_time.naive, seconds=period) def UnscheduleJob(self, job: Job) -> None: """Unschedules job from running in the future. Args: job: Job to unschedule. """ try: self._scheduler.unschedule_job(job) except KeyError: logging.info('Job %s not scheduled.', job)
class SchedulerContainer( DaemonContainer ): def __init__(self, environment): super(Scheduler, self).__init__(environment) gconfig = environment.get("gconfig", {}) options = environment.get("options", {}) self.scheduler = Scheduler(gconfig, **options) def on_start(self): self.scheduler.start() def on_stop(self): self.scheduler.stop() def unschedule_func(self, func): self.scheduler.unschedule_func(func) def unschedule_job(self, job): self.scheduler.unschedule_job(job) def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, args=None, kwargs=None, **options): return self.scheduler.add_interval_job(func=func, weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds, start_date=start_date, args=args, kwargs=kwargs, **options) def add_cron_job(self, func, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, args=None, kwargs=None, **options): return self.scheduler.add_cron_job(func=func, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, **options) def add_date_job(self, func, date, args=None, kwargs=None, **options): return self.scheduler.add_date_job(func=func, date=date, args=args, kwargs=kwargs, **options) def get_jobs(self): return self.scheduler.get_jobs() def add_job(self, trigger, func, args, kwargs, jobstore='default', **options): return self.scheduler.add_job(trigger=trigger, func=func, args=args, kwargs=kwargs, jobstore=jobstore, **options) def add_listener(self, callback, mask): self.scheduler.add_listener(callback, mask) def remove_listener(self, callback): self.scheduler.remove_listener(callback)
class ProgramHandler: def __init__(self, db, radio_station): self.__db = db self.__radio_station = radio_station self.__scheduler = None self.__scheduled_jobs = None self.__start_listeners() self.__radio_station.logger.info("Done initialising ProgramHandler for {0}".format(radio_station.station.name)) def run(self): self.run_today_schedule() def __prepare_schedule(self): self.__load_programs() self.__scheduler = Scheduler() self.__scheduled_jobs = dict() def run_today_schedule(self): self.__prepare_schedule() self.__scheduler.start() self.__schedule_programs() self.__schedule_next_day_scheduler() print self.__scheduler.get_jobs() def stop(self): self.__stop_program() # any clean up goes here # unschedule stuff def __schedule_next_day_scheduler(self): #TODO: make this safe for differebt timezones! base_date = date.today() + timedelta(1,0) tomorrow_date = datetime.combine(base_date, time()) #add the timezone offset tomorrow_date = tomorrow_date + timedelta(0, timezone(self.__radio_station.station.timezone).utcoffset(datetime.now()).seconds) self.__scheduler.add_date_job(getattr(self, 'run_today_schedule'), tomorrow_date) #schedule the scheduler to reload at midnight def __schedule_programs(self): for scheduled_program in self.__scheduled_programs: if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program {0} for station {1} starting at {2}".format(scheduled_program.program.name, self.__radio_station.station.name, scheduled_program.start)) return def __add_scheduled_job(self, scheduled_program): program = RadioProgram(self.__db, scheduled_program, self.__radio_station) scheduled_job = self.__scheduler.add_date_job(getattr(program, 'start'), self.__get_program_start_time(scheduled_program).replace( tzinfo=None)) self.__scheduled_jobs[scheduled_program.id] = scheduled_job def __delete_scheduled_job(self, index): if index in self.__scheduled_jobs: self.__scheduler.unschedule_job(self.__scheduled_jobs[index]) del self.__scheduled_jobs[index] def __stop_program(self): #self.__running_program.stop() return def __run_program(self): #self.__running_program.run() return def __load_programs(self): self.__scheduled_programs = self.__db.query(ScheduledProgram).filter( ScheduledProgram.station_id == self.__radio_station.id).filter(text("date(start at TIME ZONE 'UTC') = current_date at TIME ZONE 'UTC'")).filter( ScheduledProgram.deleted == False).all() self.__radio_station.logger.info("Loaded programs for {0}".format(self.__radio_station.station.name)) def __load_program(self, id): return self.__db.query(ScheduledProgram).filter(ScheduledProgram.id == id).first() def __start_listeners(self): t = threading.Thread(target=self.__listen_for_scheduling_changes, args=(DefaultConfig.SCHEDULE_EVENTS_SERVER_IP, DefaultConfig.SCHEDULE_EVENTS_SERVER_PORT)) t.start() def __listen_for_scheduling_changes(self, ip, port): sck = socket.socket(socket.AF_INET, socket.SOCK_STREAM) addr = (ip, port) #It may not be possible to connect after restart, TIME_WAIT could come into play etc. Anyway, keep trying connected = False while not connected: try: sck.connect(addr) connected = True except: self.__radio_station.logger.error("Could not connect to server, retrying in 30 ...") sleep(30) sck.send(json.dumps({'station':self.__radio_station.id, 'action':'register'})) while True: data = sck.recv(1024) try: event = json.loads(data) if event["action"] == "delete": self.__delete_scheduled_job(event["id"]) self.__radio_station.logger.info("Scheduled program with id {0} has been deleted".format(event["id"])) elif event["action"] == "add": scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been added at time {1}".format(event["id"], scheduled_program.start)) elif event["action"] == "update": self.__delete_scheduled_job(event["id"]) scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program, scheduled_program.program.duration): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been moved to start at time {1}".format(event["id"], scheduled_program.start)) except: pass #Most probably a JSON parse error """ Gets the program to run from the current list of programs that are lined up for the day """ def __get_current_program(self): for program in self.__scheduled_programs: if not self.__is_program_expired(program): return program """ Returns whether or not the time for a particular program has passed """ def __is_program_expired(self, scheduled_program): now = pytz.utc.localize(datetime.utcnow()) return (scheduled_program.start + scheduled_program.program.duration) < (now + timedelta(minutes=1)) def __get_program_start_time(self, scheduled_program): now = datetime.now(dateutil.tz.tzlocal()) if scheduled_program.start < now: # Time at which program begins is already past return now + timedelta(seconds=5) # 5 second scheduling allowance else: return scheduled_program.start + timedelta(seconds=5) # 5 second scheduling allowance
除此之外,也可以使用 Decorator 的方式,如下: from apscheduler.scheduler import Scheduler sched = Scheduler() sched.daemonic = False sched.start() @sched.interval_schedule(hours=2, start_date='2012-04-12 09:54:59') def job_function(): print "Hello World" 如果想解除 Decorator 功能方法,可以通过如下方式: sched.unschedule_job(job_function.job) 3. 如果我们想实现类似Linux下的 crontab 功能,可以通过 Cron-style scheduling 方式来实现,如下: from apscheduler.scheduler import Scheduler sched = Scheduler() sched.daemonic = False def job_function(): print "Hello World" # Schedules job_function 将会在六七八月、十一月、十二月的第三个星期五的0至3点执行 sched.add_cron_job(job_function, month='6-8,11-12', day='3rd fri', hour='0-3')
class AlertSchedulerHandler(): make_cachedir = True FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' APS_CONFIG = { 'threadpool.core_threads': 3, 'coalesce': True, 'standalone': False } def __init__(self, cachedir, stacks_dir, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir if not os.path.exists( cachedir) and AlertSchedulerHandler.make_cachedir: try: os.makedirs(cachedir) except: logger.critical( "Could not create the cache directory {0}".format( cachedir)) pass self._collector = AlertCollector() self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) self.__in_minutes = in_minutes self.__config_maps = {} def update_definitions(self, alert_commands, reschedule_jobs=False): ''' updates the persisted definitions and restarts the scheduler ''' with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_commands, f, indent=2) if reschedule_jobs: self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): ''' loads definitions from file and starts the scheduler ''' if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.debug("Starting scheduler {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) def reschedule(self): ''' Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. ''' jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "Alert Reschedule Summary: {0} rescheduled, {1} unscheduled". format(str(jobs_scheduled), str(jobs_removed))) def collector(self): ''' gets the collector for reporting to the server ''' return self._collector def __load_definitions(self): ''' loads all alert commands from the file. all clusters are stored in one file ''' definitions = [] all_commands = None try: with open(os.path.join(self.cachedir, self.FILENAME)) as fp: all_commands = json.load(fp) except: if (logger.isEnabledFor(logging.DEBUG)): traceback.print_exc() return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] configmap = None # each cluster gets a map of key/value pairs of substitution values self.__config_maps[clusterName] = {} if 'configurations' in command_json: configmap = command_json['configurations'] for definition in command_json['alertDefinitions']: obj = self.__json_to_callable(clusterName, hostName, definition) if obj is None: continue # get the config values for the alerts 'lookup keys', # eg: hdfs-site/dfs.namenode.http-address : host_and_port vals = self.__find_config_values(configmap, obj.get_lookup_keys()) self.__config_maps[clusterName].update(vals) obj.set_helpers(self._collector, self.__config_maps[clusterName]) definitions.append(obj) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): ''' converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual ''' source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("Creating job type {0} with {1}".format( source_type, str(json_definition))) alert = None if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_dir'] = self.stacks_dir alert = ScriptAlert(json_definition, source) if alert is not None: alert.set_cluster(clusterName, hostName) return alert def __find_config_values(self, configmap, obj_keylist): ''' finds templated values in the configuration map provided by the server ''' if configmap is None: return {} result = {} for key in obj_keylist: try: obj = configmap for layer in key.split('/'): obj = obj[layer] result[key] = obj except KeyError: # the nested key is missing somewhere pass return result def update_configurations(self, commands): ''' when an execution command comes in, update any necessary values. status commands do not contain useful configurations ''' for command in commands: clusterName = command['clusterName'] if not clusterName in self.__config_maps: continue if 'configurations' in command: configmap = command['configurations'] keylist = self.__config_maps[clusterName].keys() vals = self.__find_config_values(configmap, keylist) self.__config_maps[clusterName].update(vals) def schedule_definition(self, definition): ''' Schedule a definition (callable). Scheduled jobs are given the UUID as their name so that they can be identified later on. <p/> This function can be called with a definition that is disabled; it will simply NOOP. ''' # NOOP if the definition is disabled; don't schedule it if definition.is_enabled() == False: logger.info( "The alert {0} with UUID {1} is disabled and will not be scheduled" .format(definition.get_name(), definition.get_uuid())) return job = None if self.__in_minutes: job = self.__scheduler.add_interval_job( self.__make_function(definition), minutes=definition.interval()) else: job = self.__scheduler.add_interval_job( self.__make_function(definition), seconds=definition.interval()) # although the documentation states that Job(kwargs) takes a name # key/value pair, it does not actually set the name; do it manually if job is not None: job.name = definition.get_uuid() logger.info("Scheduling {0} with UUID {1}".format( definition.get_name(), definition.get_uuid())) def get_job_count(self): ''' Gets the number of jobs currently scheduled. This is mainly used for test verification of scheduling ''' if self.__scheduler is None: return 0 return len(self.__scheduler.get_jobs()) def execute_alert(self, execution_commands): ''' Executes an alert immediately, ignoring any scheduled jobs. The existing jobs remain untouched. The result of this is stored in the alert collector for tranmission during the next heartbeat ''' if self.__scheduler is None or execution_commands is None: return for execution_command in execution_commands: try: alert_definition = execution_command['alertDefinition'] clusterName = '' if not 'clusterName' in execution_command else execution_command[ 'clusterName'] hostName = '' if not 'hostName' in execution_command else execution_command[ 'hostName'] alert = self.__json_to_callable(clusterName, hostName, alert_definition) if alert is None: continue logger.info("Executing on-demand alert {0} ({1})".format( alert.get_name(), alert.get_uuid())) alert.set_helpers(self._collector, self.__config_maps[clusterName]) alert.collect() except: logger.exception( "Unable to execute the alert outside of the job scheduler")
class Scheduler(Plugin): crons = {} intervals = {} started = False def __init__(self): addEvent('schedule.cron', self.cron) addEvent('schedule.interval', self.interval) addEvent('schedule.remove', self.remove) self.sched = Sched(misfire_grace_time=60) self.sched.start() self.started = True def remove(self, identifier): for cron_type in ['intervals', 'crons']: try: self.sched.unschedule_job( getattr(self, cron_type)[identifier]['job']) log.debug('%s unscheduled %s', (cron_type.capitalize(), identifier)) except: pass def doShutdown(self): super(Scheduler, self).doShutdown() self.stop() def stop(self): if self.started: log.debug('Stopping scheduler') self.sched.shutdown() log.debug('Scheduler stopped') self.started = False def cron(self, identifier='', handle=None, day='*', hour='*', minute='*'): log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s', (identifier, day, hour, minute)) self.remove(identifier) self.crons[identifier] = { 'handle': handle, 'day': day, 'hour': hour, 'minute': minute, 'job': self.sched.add_cron_job(handle, day=day, hour=hour, minute=minute) } def interval(self, identifier='', handle=None, hours=0, minutes=0, seconds=0): log.info( 'Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s', (identifier, hours, minutes, seconds)) self.remove(identifier) self.intervals[identifier] = { 'handle': handle, 'hours': hours, 'minutes': minutes, 'seconds': seconds, 'job': self.sched.add_interval_job(handle, hours=hours, minutes=minutes, seconds=seconds) }
class DS_Scheduler: """ The main guts and logic for the scheduler.""" def __init__(self): self.util = Util() self.sched = Scheduler(conf.config) self.sched.start() self.queue = {} self.util.init_path() self.util.init_DB() self.util.reset_all() def run(self): # Main Loop i = 0 master = False idle = False while True: if conf.clustering == True: inactive = heartbeats.getActive() ''' Here is where we elect our current master node. If the conf.preferred_master is not us and is not up yet (Neither True or False), then we will wait 10 check cycles and if the master is not up we will assume control. ''' if conf.preferred_master == myself: # I am the master node and I should alwas be in charge master = True i = 0 elif conf.preferred_master != myself and conf.peers[conf.preferred_master] == 'True': # The master will take control now master = False elif conf.peers[conf.preferred_master] == False: # Assume the master server is dead master = True elif conf.peers[conf.preferred_master] == '': # Master has not been started yet i += 1 if i >= 10: master = True i = 10 else: # I am not the master node, but I am available just in case master = False if master == True or conf.clustering == False: idle = False events = self.util.refresh_events() for e in events.keys(): event = events[e] e_id = event[0] e_type = event[1] e_target = event[2] e_host = event[3] e_update_flag = event[4] if e_update_flag == 1: self.util.remote_command(e_id, e_host, e_type, e_target, 'NEW EVENT') self.util.runQuery("update event_table set update_flag=0 where id=%s" % (e_id)) jobs = self.util.refresh_jobs() for j in jobs.keys(): job = jobs[j] # IF the job was updated then the update_flag will be set, so reschedule # TODO: Change these in the code below j_id = job[0] j_name = job[1] j_host = job[2] j_user = job[3] j_dep = job[6] j_etrigger = job[8] j_cron = job[9] j_command = job[12] j_update_flag = job[13] j_pid = job[16] j_dep_mode = job[17] job_name = '%s_%i' % (j_name, j_id) now = dt.datetime.now() + dt.timedelta(seconds=5) if j_update_flag == 1: # Lets make sure the job does not already exist. If it does it means the user had edited an existing job so let's requeue try: self.sched.unschedule_job(self.queue[job_name]) # Remove job from queue self.queue.pop(job_name) Log("Refreshing job %s" %(job_name)) except(KeyError): pass # DEP SECTION if j_etrigger > 0: # Event trigger # immediate jobs like event jobs and run_now jobs need not collide with the namespace # of cron jobs. So lets randomize the name current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name)) Log(current_job) self.util.runQuery("UPDATE jobs set update_flag=1, event_trigger=0 where id='%s'" % (j_id)) if j_dep > 0: # If job depends on another... now = dt.datetime.now() + dt.timedelta(seconds=10) parent = self.util.job_status(j_dep) if j_dep_mode == 0 and j_dep_mode == parent[11]: # ON_SUCCESS current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name)) Log(current_job) if j_dep_mode == 1 and j_dep_mode == parent[11]: # ON_FAIL current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name)) Log(current_job) if j_dep_mode == 2 and j_dep_mode == parent[11]: # ON_KILL current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name)) Log(current_job) self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id)) if j_etrigger == 0 and j_dep == 0: #Standard "cron" type scheduling crontab = j_cron.split() # Job Names need to be unique, so the same 'job' being run on different hosts can work. current_job = self.sched.add_cron_job(self.util.remote_command, minute=crontab[0], hour=crontab[1], day=crontab[2], month=crontab[3], day_of_week=crontab[4], name=job_name, args=(j_id, j_host, j_command, j_user, job_name), max_instances=2) # Add the job to the queue Log(current_job) self.queue[job_name] = current_job self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id)) # Job was marked for removal if j_update_flag == 2: self.sched.unschedule_job(self.queue[job_name]) # Remove job from queue self.queue.pop(job_name) self.util.remove_job(j_id) Log("Unscheduled job %s" %(job_name)) self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id)) if j_update_flag == 3: Log("Disabling Job: %s" % (job_name)) self.sched.unschedule_job(self.queue[job_name]) # Remove job from queue self.queue.pop(job_name) self.util.disable_job(j_id) #self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id)) if j_update_flag == 4: Log("Killing PID: %s Job: %s" % (j_pid, job_name)) kill_command = 'kill -9 %s' % (j_pid) current_job = self.sched.add_date_job(self.util.remote_command, now, name='ON_KILL %s' % (j_pid), args=(j_id, j_host, kill_command , j_user, job_name)) Log(current_job) self.queue[job_name] = current_job self.util.runQuery("update jobs set update_flag=0, status=99996 where id='%s'" % (j_id)) #pprint(self.queue) time.sleep(conf.CHECK_PERIOD) else: # Unscheduling all jobs so that the peer node can take over. if idle == False: jobs = self.util.refresh_jobs() for j in jobs.keys(): job = jobs[j] self.sched.unschedule_job(self.queue[job_name]) # Remove job from queue self.queue.pop(job_name) Log("Failed over job %s" %(job_name)) self.util.reset_all_jobs() idle = True self.sched.shutdown()
class PhoneManager: # Default Sleep time between checks kDefaultLoopSleep = 10 # SMS Forwarding Information bForwardSMS = False sForwardSMSNumber = "" bCallForwardNotify = False # Call Forwarding Information. bEnableCallForward = False sCallForwardNumber = "" bSMSForwardNotify = False # SMS Auto Reply Informaiton. bAutoReply = False sAutoReplyText = "" bDeleteAfterResponse = False # Diable Call Forwarding bDisableCallForward = False # Determins if all SMS's should be processed, or just unread mesaages. bCheckAllSMS = True # The last result of a Send SMS bLastResult = True # MSISDN of last received SMS sLastMessageFrom = "" # Maintains a list of numbers that have received an autoreply listAutoReply = [] # THread for prcessing _thread = None _bActive = False _lastRunTime = None # IMSI _IMSI = None # Message Queue messageQueue = None # Config File. config = None configFile = '' kModemSection = 'modem' kIMSIOption = 'IMSI' kSettingsSection = 'settings' kForwardOption = 'forwardsms' kDivertOption = 'divert' kAutoReplyOption = 'autoreply' kScheduleSetting = 'schedule' kCommandOption = 'command' kOptionOption = 'option' kDayOption = 'day' kWeekOption = 'week' kDayOfWeekOption = 'day_of_week' kHourOption = 'hour' kMinuteOption = 'minute' # Scheudler. sched = None # Config file changes configFileLastChanged = 0 def __init__(self, modems, configFile): logger.debug('Init Modem, modems=' + str(modems) + ' configFileconfigFile=' + configFile) if modems == None: raise Exception('A Modem Must be Provided') if configFile == None: raise Exception('A config file must be provided.') self.messageQueue = [] self.sched = Scheduler() self.loadConfig(configFile) if self._IMSI in modems: self.modem = modems[self._IMSI] else: raise Exception('No modem for selected IMSI') def loadConfig(self, configFile): logger.info('Loading config:' + configFile) self.configFile = configFile workConfigFile = configFile + '~' logger.debug('Renaming to ' + workConfigFile) shutil.copyfile(configFile, workConfigFile) logger.debug('Clearing Scheduled Tasks') jobs = self.sched.get_jobs() for job in jobs: logger.debug('Removing ' + job.__str__()) self.sched.unschedule_job(job) try: config = ConfigParser.ConfigParser() config.read(workConfigFile) self.config = config IMSI = config.get(self.kModemSection, self.kIMSIOption) logger.debug('modem/IMSI-' + IMSI) self._IMSI = IMSI if config.has_option(self.kSettingsSection, self.kForwardOption): number = config.get(self.kSettingsSection, self.kForwardOption) if len(number) > 0: logger.debug('Forward SMS Number-' + number) self.enableSMSForwarding(number) else: self.disableSMSForwarding() if config.has_option(self.kSettingsSection, self.kDivertOption): number = config.get(self.kSettingsSection, self.kDivertOption) if len(number) > 0: logger.debug('Divert Number-' + number) self.enableCallForward(number) else: config.set(self.kSettingsSection, self.kDivertOption, '') #Diable call forward, need to right into main thread if config.has_option(self.kSettingsSection, self.kAutoReplyOption): autoReplyText = config.get(self.kSettingsSection, self.kAutoReplyOption) if len(autoReplyText) > 0: logger.debug('Auto Reply Text-' + autoReplyText) self.enableSMSAutoReply(autoReplyText) else: self.disableSMSAutoReply() else: self.disableSMSAutoReply() for section in config.sections(): if section.startswith(self.kScheduleSetting): logger.debug('Adding schedule ' + section) if config.has_option(section, self.kCommandOption): command = config.get(section, self.kCommandOption) if config.has_option(section, self.kOptionOption): option = config.get(section, self.kOptionOption) optionList = option.split(',') _optionDict = [] for opt in optionList: logger.debug('Option: ' + opt) _optionDict.append(self.getStringAsType(opt)) logger.debug('Options: ' + str(_optionDict)) else: option = '' _day = self.getSetConfigOption(config, section, self.kDayOption) _week = self.getSetConfigOption( config, section, self.kWeekOption) _dayOfWeek = self.getSetConfigOption( config, section, self.kDayOfWeekOption) _hour = self.getSetConfigOption( config, section, self.kHourOption) _minute = self.getSetConfigOption( config, section, self.kMinuteOption) logger.debug('Add Schdule. Comamnd=[' + command + '] options [' + option + '] day=' + _day + ' week=' + _week + ' day_of_week=' + _dayOfWeek + ' hour=' + _hour + ' minutes=' + _minute) if command == self.kForwardOption and len(option) > 0: job = self.sched.add_cron_job( self.enableSMSForwarding, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kForwardOption and len(option) == 0: job = self.sched.add_cron_job( self.disableSMSForwarding, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if command == self.kDivertOption and len(option) > 0: job = self.sched.add_cron_job( self.enableCallForward, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kDivertOption and len(option) == 0: job = self.sched.add_cron_job( self.disableCallForward, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if command == self.kAutoReplyOption and len( option) > 0: job = self.sched.add_cron_job( self.enableSMSAutoReply, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kAutoReplyOption and len( option) == 0: job = self.sched.add_cron_job( self.disableSMSAutoReply, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if job is not None: logger.info(job.__str__()) self.configFileLastChanged = time.ctime( os.path.getmtime(self.configFile)) logger.debug('Config file last changed: ' + self.configFileLastChanged) except Exception, e: logger.critical('Error loading config file') logger.exception(e)
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in a :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Errors in the ab-initio code Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError. The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. The flow tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases: #. The number of python exceptions is > max_num_pyexcs #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, the scheduler will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy") Error = PyFlowSchedulerError @classmethod def autodoc(cls): i = cls.__init__.__doc__.index("Args:") return cls.__init__.__doc__[i + 5:] def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait (DEFAULT: 0). days: number of days to wait (DEFAULT: 0). hours: number of hours to wait (DEFAULT: 0). minutes: number of minutes to wait (DEFAULT: 0). seconds: number of seconds to wait (DEFAULT: 0). mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds. (DEFAULT: None i.e. not used). verbose: (int) verbosity level. (DEFAULT: 0) use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from file before launching the jobs. (DEFAULT: "no") max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200) remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds. (int, DEFAULT: 1 day). max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs (int, DEFAULT: 0) max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs (int, DEFAULT: 0) safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5) max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler. (DEFAULT: -1 i.e. no limit) debug: Debug level. Use 0 for production (int, DEFAULT: 0) fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes") rmflow: If "yes", the scheduler will remove the flow directory if the calculation completed successfully. (DEFAULT: "no") killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs before exiting due to an error. (DEFAULT: "yes") """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = as_bool( kwargs.pop("use_dynamic_manager", False)) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.max_ncores_used = kwargs.pop("max_ncores_used", None) self.contact_resource_manager = as_bool( kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True)) self.rmflow = as_bool(kwargs.pop("rmflow", False)) self.killjobs_if_errors = as_bool( kwargs.pop("killjobs_if_errors", True)) self.customer_service_dir = kwargs.pop("customer_service_dir", None) if self.customer_service_dir is not None: self.customer_service_dir = Directory(self.customer_service_dir) self._validate_customer_service() if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: logger.warning("Using scheduler v>=3.0.0") from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "rt") as fh: return cls(**yaml.safe_load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.safe_load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: `RuntimeError` if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error( "Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) if self.flow is not None: app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" try: return self._flow except AttributeError: return None @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """ Add an :class:`Flow` flow to the scheduler. """ if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") # Check if we are already using a scheduler to run this flow flow.check_pid_file() flow.set_spectator_mode(False) # Build dirs and files (if not yet done) flow.build() with open(flow.pid_file, "wt") as fh: fh.write(str(self.pid)) self._pid_file = flow.pid_file self._flow = flow def _validate_customer_service(self): """ Validate input parameters if customer service is on then create directory for tarball files with correct premissions for user and group. """ direc = self.customer_service_dir if not direc.exists: mode = 0o750 print("Creating customer_service_dir %s with mode %s" % (direc, mode)) direc.makedirs() os.chmod(direc.path, mode) if self.mailto is None: raise RuntimeError( "customer_service_dir requires mailto option in scheduler.yml") def _do_customer_service(self): """ This method is called before the shutdown of the scheduler. If customer_service is on and the flow didn't completed successfully, a lightweight tarball file with inputs and the most important output files is created in customer_servide_dir. """ if self.customer_service_dir is None: return doit = self.exceptions or not self.flow.all_ok doit = True if not doit: return prefix = os.path.basename(self.flow.workdir) + "_" import tempfile, datetime suffix = str(datetime.datetime.now()).replace(" ", "-") # Remove milliseconds i = suffix.index(".") if i != -1: suffix = suffix[:i] suffix += ".tar.gz" #back = os.getcwd() #os.chdir(self.customer_service_dir.path) _, tmpname = tempfile.mkstemp(suffix="_" + suffix, prefix=prefix, dir=self.customer_service_dir.path, text=False) print("Dear customer,\n We are about to generate a tarball in\n %s" % tmpname) self.flow.make_light_tarfile(name=tmpname) #os.chdir(back) def start(self): """ Starts the scheduler in a new thread. Returns 0 if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return 1 # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email( msg= "Error while trying to run the flow for the first time!\n %s" % self.exceptions) return 1 try: self.sched.start() return 0 except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno( "Do you want to cancel all the jobs in the queue? [Y/n]"): print("Number of jobs cancelled:", self.flow.cancel()) self.flow.pickle_dump() return -1 def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s, returning" % nqjobs) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_use:d %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info( "Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! s = straceback() self.exceptions.append(s) # This is useful when debugging #try: # print("Exception in callback, will cancel all tasks") # for task in self.flow.iflat_tasks(): # task.cancel() #except Exception: # pass self.shutdown(msg="Exception raised in callback!\n" + s) def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append( "No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append( "No task is running and cannot find other tasks to submit." ) # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint( "killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint( "Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError as exc: logger.critical("Could not remove pid_file: %s", exc) # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on: %s" % time.asctime()) self.history.append("Elapsed time: %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "wt") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow %s didn't complete successfully" % repr(self.flow.workdir)) app("use `abirun.py FLOWDIR debug` to analyze the problem.") app("Shutdown message:\n%s" % msg) print("") print("\n".join(lines)) print("") self._do_customer_service() if self.flow.all_ok: print("Calling flow.finalize()...") self.flow.finalize() #print("finalized:", self.flow.finalized) if self.rmflow: app("Flow directory will be removed...") try: self.flow.rmtree() except Exception: logger.warning( "Ignoring exception while trying to remove flow dir." ) finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('This should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() if not has_sched_v3: for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class GPCAlgGlobFSM(): def __init__(self, debugFlag=False, configFile = None): #SM FSM specific initialization self._fsm = GPCAlgGlobProc_sm(self) self._fsm.setDebugFlag(debugFlag) self.MPCAlgo = {'Active':None} self.logger = logging.getLogger("GPCAlgGlobProc") tmpLogger = logging.getLoggerClass() logging.setLoggerClass(sm_Logger) self._fsm.setDebugStream(logging.getLogger("GPCAlgGlobProc.fsm")) logging.setLoggerClass(tmpLogger) self.eventDeque = deque() #APScheduler self.sched = Scheduler() self.configFile = configFile def __del__(self): self.sched.shutdown(wait=False) def start(self): self._fsm.enterStartState() self.sched.start() def initInit(self): self.doInitMemory = {"Count":0, "S0_tUpdate":{'State':None}, "MPCData":{'State':None}, "Config":{'State':None}} def doInit(self): mem = self.doInitMemory mem["Count"] += 1 # Get the GPC Config file if mem["Config"]['State'] != 'Done': conf = mem["Config"] try: if self.configJob.isConfigRead(): conf['State'] = 'Done' except AttributeError: conf['State'] = 'Running' # Get the main "S0_tUpdate" from the SCADA system. if mem["S0_tUpdate"]['State'] == None \ and self.configJob.isConfigRead(): AlgConfVars = AlgData_OPC(variables=["S0_tUpdate",], opcserver = self.config["Tree"]["Global"]["OPCServer"]) AlgConfVars.logger = self.logger mem["S0_tUpdate"]["Data"] = AlgConfVars mem["S0_tUpdate"]['State'] = 'Running' if mem["S0_tUpdate"]['State'] in ['Running']: S0 = mem["S0_tUpdate"] for i in xrange(3): sleep(0.2) S0['Data'].readOPC() if S0['Data'].opcVarsDict["S0_tUpdate"].value not in [None, 0]: self.S0_tUpdate = S0['Data'].opcVarsDict["S0_tUpdate"].value S0['State'] = 'Done' break else: # only if the for loop is not stopped with break (no usable value found) if S0['Data'].opcVarsDict["S0_tUpdate"].isProblem(): S0['Data'].opcVarsDict["S0_tUpdate"]._reset() S0['State'] = 'Running' # Initialize the MPC OPC data objects. if mem["MPCData"]['State'] == None \ and self.configJob.isConfigRead(): # Basic dynamic System variables needed for MPC as input variables self.MPCData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"]) self.MPCData.logger = self.logger # All MPC related output variables Variables = {'OPC_Group':'MPCOutVariables' } Variables.update(GPC_OutVars) self.MPCOutData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"], variables = Variables) self.MPCOutData.logger = self.logger self.MPCOutData.readOPC() # Need to read this variable ones because otherwise it will not be usable for writing. # All GPC/MPC State related variables Variables = {'OPC_Group':'GPCStateVariables' } Variables.update(GPC_StateVars) self.MPCStateData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"], variables = Variables) self.MPCStateData.logger = self.logger mem["MPCData"]['State'] = 'Done' def doInitRTrigParam(self): self.RTrig = ReadTrigger( S0_tUpdate=self.S0_tUpdate, opcserver=self.config["Tree"]["Global"]["OPCServer"], test=True) self.RTrig.setLogger(self.logger) self.RTrig.CTimeperiod = self.config["Tree"]["MPC_Opti"]["ControlTimeperiod"] def doUpdateRTrigParam(self,dt): """Update trigger detection parameter to get a more precise identification of the positive slope instance.""" self.RTrig.updateTrigParam() self.logger.debug( "Trigger: lastDT=%s,DT=%s" % (self.RTrig.lastDT,self.RTrig.DT) ) def doUpdateConfig(self,conf): self.config = dict(zip(("Tree","Valid"),conf)) MPCmode = self.config["Tree"]["MPC"]["mode"] if isinstance(self.MPCAlgo['Active'], MPCAlgos.__dict__[MPCmode]): self.MPCAlgo['Active'].updateConf(self.config['Tree']["MPC_"+MPCmode]) def doRTrigInit(self): self.RTrig.jobRuns = 0 DT = self.RTrig.DT NextRT = self.RTrig.getNextRT().replace(tzinfo=None) max_runs = self.RTrig.getMaxRuns() #Debug-GSc: test max_runs = 4 self.RTrig.job = self.sched.add_interval_job(self.jobRTrig, seconds = DT, start_date = NextRT, max_runs = max_runs, name = "ReadTrigger-Job") self.doRTrigMemory = {"Count":0, "TrigDone":False} def doRTrigStop(self): if self.RTrig.job.compute_next_run_time(datetime.now()): # only un-schedule if the job is still scheduled otherwise scheduler error self.sched.unschedule_job(self.RTrig.job) self.RTrig.job = None def doWTrigInit(self): #for debug reasons: OPC-values are sometimes lost. self.logger.debug( "Log QSoll before trigger:" ) self.MPCOutData.readOPC() if self.isNoOPCWriteTrigger(): self.logger.debug( "WriteTrigger is asked not to be set." ) else: self.WTrig = WriteTrigger(S0_tUpdate=self.S0_tUpdate) self.WTrig.setLogger(self.logger) self.WTrig.job = self.sched.add_interval_job(self.jobWTrig, seconds = self.WTrig.DT, start_date = datetime.now() + timedelta(seconds=0.5), max_runs = 2, name = "WriteTrigger-Job") #GSc-ToDo: start a job that sets and resets the trigger # use self.WTrig.process() # Should be called a maximum of 2x self.WTrig.maxRuns # but only until self.WTrig.state is in ('Reset' or some "Error") # "sched" seems not to be best as Setting process can take several runs (DT 1s) # and reseting the same but in between 10%S0_tUpdate needs to be waited. def doReadOPC(self): #GSc-ToDo: rework this first level checking. Here only completely infeasible situations should lead to "VarsError" #Get first state related information and check it evStr = self.MPCStateData.readOPC() if evStr == None: evStr = self.checkMPCData(self.MPCStateData) #If OK Get the MPC "In" information and check it if evStr == "VarsOK": evStr = self.MPCData.readOPC() if evStr == None: evStr = self.checkMPCData(self.MPCData) if isinstance(evStr, (list,tuple)): evt = dict(zip(("Type","Data"),evStr)) else: evt = {"Type":evStr} self.eventDeque.append(evt) def doUpdateParam(self): #Only update here is GPC is OPC triggered if not self.RTrig.TrigOPC: return #Do update only if changed S0_tUpdate = self.MPCStateData.opcVarsDict["S0.S0_tUpdate"] S0_tUpdateDiff = S0_tUpdate.getDiff() if S0_tUpdateDiff != None and S0_tUpdateDiff.Diff[0] != 0: self.S0_tUpdate = S0_tUpdate.value self.RTrig.updateTrigParam(S0_tUpdate=self.S0_tUpdate) def doWriteOPCInit(self): #GSc-ToDo: Init write process self.logger.debug( "Init writeOPCVars process" ) self.doWriteOPCMemory = {"Count":0,} def doWriteOPC(self): opcResult = self.MPCOutData.writeOPC(allStored=True, toOPC=True) if opcResult in [True, None, []]: self.eventHandler({"Type":"OPCWriteError", "Data":"writeOPC returns: %s" % (opcResult)}) return nbrW = len(opcResult) tfSuccess = [ri[1] == "Success" for ri in opcResult] if not all(tfSuccess): nbrErr = nbrW - sum(tfSuccess) if self.doWriteOPCMemory["Count"] > 3: self.logger.debug( "doWriteOPC after (%s) tries still %s un-successful opc-writeouts\n -> give-up" % \ (self.doWriteOPCMemory["Count"],nbrErr)) self.eventHandler({"Type":"OPCWriteError", "Data":"writeOPC returns: %s" % (opcResult)}) else: self.logger.debug( "doWriteOPC (%s): %s un-successful opc-writeouts" % \ (self.doWriteOPCMemory["Count"],nbrErr)) sleep(0.5) else: self.logger.debug( "doWriteOPC (%s): ends successful" % \ (self.doWriteOPCMemory["Count"],) ) self.doWriteOPCMemory["Count"] += 1 def doCheckSysStates(self): #ToDo-GSc: check the on/off states of the GPC MPCSimu = self.isMPCSimu() if MPCSimu and self.getMPCSimuMode() in ['OPCReadOnly',]: self.eventDeque.append({"Type":"MPCInactif","Data":"OPCReadOnly Mode specified"}) return # Check the life states of all configured actors # SysGPCState = getSysGPCState(self.MPCStateData.opcVarsDict)# Old Life/Autonom based approach SysGPCState = getSysGPCState_StMo(self.MPCStateData.opcVarsDict) if getattr(self, "SysGPCState", None): UpdatedBState = dict([(si,Statei) for si, Statei in SysGPCState.iteritems() if Statei != self.SysGPCState[si]]) else: UpdatedBState = {} self.SysGPCState = SysGPCState #check the life states of actor S0 (SCADA system) if self.SysGPCState['S0'] == 'offline': self.eventHandler({"Type":"MPCImpossible","Data":"S0 is %s" % (self.SysGPCState['S0'],)}) return elif self.SysGPCState['S0'] == 'maintenance': self.eventDeque.append({"Type":"MPCInactif","Data":"S0 Station is in maintenance"}) return if all([zi in ['offline','maintenance'] for si,zi in self.SysGPCState.iteritems() if si not in ['S0','S99']]): self.eventDeque.append({"Type":"MPCInactif","Data":"There is NO Station controllable"}) return else: BModeUdate = getSysBModeUpdate(self.MPCStateData.opcVarsDict) UpdatedBMode = dict([(si,bmi['Mode']) for si, bmi in BModeUdate.iteritems() if bmi['Update']]) if self.MPCAlgo['Active'] != None and (UpdatedBState !={} or UpdatedBMode != {}): updateStruct = False algo = self.MPCAlgo['Active'] if UpdatedBMode != {}: res = algo.readBConfig(toUpdate=UpdatedBMode) #ToDo: check the return value (True False) if network configuration is usable. if res != True: interItem = {"Type":"MPCImpossible", "Data":"Error in reading the current basin configuration: %s" % (res,)} self.eventHandler(interItem) return updateStruct = True MPCmode = self.config["Tree"]["MPC"]["mode"] C_Switch = algo.updateBasinConf(self.SysGPCState,updateStruct=updateStruct) self.handleStateSwitch(C_Switch) else: try: MPCmode = self.config["Tree"]["MPC"]["mode"] try: if not isinstance(self.MPCAlgo['Active'], MPCAlgos.__dict__[MPCmode]): algo = MPCAlgos.__dict__[MPCmode](self.config['Tree']["MPC_"+MPCmode], sysVars=self.MPCData.opcVarsDict, stateVars=self.MPCStateData.opcVarsDict, outVars=self.MPCOutData.opcVarsDict) C_Switch = algo.updateBasinConf(self.SysGPCState) self.handleStateSwitch(C_Switch) self.MPCAlgo['Active'] = algo except KeyError as e: interItem = {"Type":"MPCImpossible", "Data":"MPC-mode related class is missing. %s" % (e,)} self.eventHandler(interItem) return except BaseException as e: interItem = {"Type":"MPCImpossible", "Data":"Error during instantiation of the algo class: %s" % (e,)} self.eventHandler(interItem) return except KeyError as e: interItem = {"Type":"MPCImpossible", "Data":"Error getting MPC-Mode specification: %s" % e} self.eventHandler(interItem) return #ToDo: Handle possible other control approaches that will run only as off-line control self.MPCAlgo['Inactive'] = [] try: for im in self.config["Tree"]["MPC"]["inactiveModes"]: pass except: pass #ToDo-GSc: integrate the AlgInernalSysFSM (S4, ...) # - init it in doInit # - process it here using the self.MPCData.S4_BZ self.logger.debug( "doCheckSysStates(): SysStates: %s; SysModes: %s" % (self.SysGPCState,BModeUdate) ) #Check the SysGPCState again here as it may have changed due to Switching. #If here a basin is in controllable this means it is not controlled by GPC in this cycle. if all([zi in ['offline','maintenance','controllable'] for si,zi in self.SysGPCState.iteritems() if si not in ['S0','S99']]): self.eventDeque.append({"Type":"MPCInactif","Data":"There is NO Station configured for GPC control"}) else: self.eventDeque.append({"Type":"MPCActive"}) def doRunMPC(self): #Initialize the specified MPC mode class object. algo = self.MPCAlgo['Active'] #run MPC try: algo.run(self.MPCData.opcVarsDict, stateVars=self.MPCStateData.opcVarsDict, outVars=self.MPCOutData.opcVarsDict) self.eventDeque.append({"Type":"MPCDone",}) except BaseException as e: interItem = {"Type":"MPCImpossible", "Data":"General MPC-Error: %s" % e} self.eventHandler(interItem) return def doLogMPCResults(self): #get the results and build a log entry res = [vi.wvalue for ki,vi in self.MPCOutData.opcVarsDict.iteritems() if ki.endswith('QSoll') and vi.isWReady()] self.logger.debug("MPC Results: %s" % (res,)) def doResetWriteVars(self): for ki,vi in self.MPCOutData.opcVarsDict.items() + self.MPCStateData.opcVarsDict.items(): if vi.isWReady(): vi._reset() def doWarning(self,msg): pass def doSetGPCOffline(self): self.logger.debug("""====== GPC is Offline ====== The GPC: is now in Offline mode. Only a 'Reset'-Event or a complete GSP-restart are possible in this System state. ============================""") if self.isMPCSimu(): try: DT = self.S0_tUpdate - 2*self.RTrig.gitter DT -= self.S0_tUpdate / self.RTrig.TrigSizePct # This is the sleep time in GPCOffline mode. except: DT = 900 #Debug-GSc: test DT = 40 self.sched.add_date_job( self.jobReset, date = datetime.now() + timedelta(seconds=DT), name = "Reset-Job" ) self.logger.debug("""====!! GPC auto-Reset !!==== The GPC: will be automatically reset at %s ============================""" % (DT,)) def isInitDone(self): #Check all doInitMemory entries for their "State" status state = [si['State'] == 'Done' for si in self.doInitMemory.itervalues() if isinstance(si, dict) and si.has_key('State')] return all(state) def isNotSync(self): return not self.RTrig.isSync() def isNoOPCWrite(self): if not self.isMPCSimu(): return False elif self.getMPCSimuMode() in [None,'NoOPCWrite']: return True return False def isNoOPCWriteTrigger(self): if not self.isMPCSimu(): return False elif self.getMPCSimuMode() in [None,'NoOPCWrite','NoOPCWriteTrigger']: return True return False def isMPCSimu(self): try: MPCSimu = self.config['Tree']['MPC']['simu'] except: MPCSimu = True if MPCSimu: return True return False def isOPCWriteOK(self): if self.MPCOutData.isWAllIdle(): self.logger.debug("isOPCWriteOK == True") return True self.logger.debug("isOPCWriteOK == False") return False def isOPCWriteError(self): if self.MPCOutData.isWAnyProblem(): self.logger.debug("isOPCWriteError == True") return True self.logger.debug("isOPCWriteError == False") return False def logIgnored(self): self._fsm.getDebugStream().write("The latest asked transition was ignored by the StateMashine.") def jobRTrig(self): j = self.RTrig.job self.doRTrigMemory['Count'] += 1 if self.RTrig.getRTrigJob(): self.doRTrigMemory['TrigDone'] = True self.doRTrigMemory['Event'] = {"Type":"TrigOK", "Data":self.RTrig.lastT} else: if not j.compute_next_run_time(datetime.now()): self.doRTrigMemory['TrigDone'] = True self.doRTrigMemory['Event'] = {"Type":"TrigError", "Data":"%s: no next fire time scheduled" % j.name} def jobWTrig(self): if self.WTrig.isInProcess(): cSatate = self.WTrig.state while self.WTrig.state == cSatate: self.WTrig.process() if self.WTrig.state == cSatate: sleep(1) #for debug reasons: OPC-values are sometimes lost. if not self.WTrig.isInProcess(): self.logger.debug( "Log QSoll after trigger:" ) self.MPCOutData.readOPC() if self.WTrig.isJobAlife() and not self.WTrig.isInProcess(): self.sched.unschedule_job(self.WTrig.job) self.WTrig.job = None self.logger.debug("jobWTrigError: job unscheduled due to probable Error WTrig process") #GSC-ToDo: this is not a correct solution because it can leave the system in an incoherent state. def jobReset(self): interItem = {"Type":"Reset"} self.eventHandler(interItem) def checkInitSleep(self): if self.isInitDone(): return False elif self.doInitMemory["Count"] == 0: return False elif self.doInitMemory["Count"] % 3 == 0: return True else: return False def checkMPCData(self,MPCData): #ToDo: Check if all variables of stations that are not "offline" are usable. for k,v in MPCData.opcVarsDict.iteritems(): if not v.usable: return ("VarsError","%s: is not usable"%(k,)) return "VarsOK" def getMPCSimuMode(self): try: MPCSimuMode = self.config['Tree']['MPC']['simuMode'] except: return None return MPCSimuMode def getFSMState(self): if not self._fsm.isInTransition(): cState = self._fsm.getState().getName() FSMState = "%s" % (cState,) ret = {'Trans':None,'State':cState,'Msg':FSMState} else: trans = self._fsm.getTransition() pState = self._fsm.getPreviousState().getName() FSMState = "In Transition: %s from %s" %(trans,pState) ret = {'Trans':trans,'State':pState,'Msg':FSMState} return ret def handleStateSwitch(self,C_Switch): """Handles the switch from Mode 1 (C-abl) -> 2 (C) or 2 (C) -> 1 (C-abl) """ for sti in C_Switch.get('C-abl -> C',[]): vi = "%s.%s_SteuerModus" % (sti,sti) self.MPCStateData.opcVarsDict[vi].setWriteValue(6) self.SysGPCState[sti] = 'controlled' for sti in C_Switch.get('C -> C-abl',[]): vi = "%s.%s_SteuerModus" % (sti,sti) self.MPCStateData.opcVarsDict[vi].setWriteValue(5) self.SysGPCState[sti] = 'controllable' if not self.isNoOPCWrite() and len(C_Switch) > 0: opcResult = self.MPCStateData.writeOPC(allStored=True, toOPC=True) if opcResult in [True, None, []]: self.logger.debug( "Basin StateSwitch error: writeOPC returns: %s" % (opcResult)) else: nbrW = len(opcResult) tfSuccess = [ri[1] == "Success" for ri in opcResult] if not all(tfSuccess): nbrErr = nbrW - sum(tfSuccess) self.logger.debug( "Basin StateSwitch error: writeOPC %s un-successful opc-writeouts" % \ (nbrErr,)) else: self.logger.debug( "Basin StateSwitch: %s; writeOPC: %s" % (C_Switch, opcResult,)) elif self.isNoOPCWrite() and len(C_Switch) > 0: self.logger.debug( "Basin StateSwitch: %s but NoOPCWrite is active" % (C_Switch,) ) def eventHandler(self,evt): evtStr = evt["Type"] if evt.has_key("Data"): evtStr = ';'.join((evtStr,str(evt['Data']))) self._fsm.getDebugStream().write("#%s (%s)\n" % (evtStr,datetime.now())) if evt['Type'] == "DoInit": self._fsm.InitDone() elif evt['Type'] == "InitError": self._fsm.InitError(evt['Data']) elif evt['Type'] == "TrigOK": self._fsm.TrigOK(evt['Data']) elif evt['Type'] == "TrigError": self._fsm.TrigError(evt['Data']) elif evt['Type'] == "VarsOK": self._fsm.VarsOK() elif evt['Type'] == "VarsError": self._fsm.VarsError() elif evt['Type'] == "MPCActive": self._fsm.MPCActive() elif evt['Type'] == "MPCInactif": self._fsm.MPCInactif() elif evt['Type'] == "MPCImpossible": self._fsm.MPCImpossible(evt['Data']) elif evt['Type'] == "MPCDone": self._fsm.MPCDone() elif evt['Type'] == "OPCWrite": self._fsm.OPCWrite(evt.get('Data',None)) elif evt['Type'] == "Reset": self._fsm.Reset() elif evt['Type'] == "Stop": sys.exit(0) else: raise ValueError("Unhandled Event type: %s" % evt)
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' APS_CONFIG = { 'threadpool.core_threads': 3, 'coalesce': True, 'standalone': False } def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, cluster_configuration, config, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir)) self._collector = AlertCollector() self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) self.__in_minutes = in_minutes self.config = config # register python exit handler atexit.register(self.exit_handler) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions") return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. :return: """ definitions = [] all_commands = None alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json['clusterName'] hostName = '' if not 'hostName' in command_json else command_json['hostName'] for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition))) alert = None if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) if alert is not None: alert.set_cluster(clusterName, hostName) return alert def schedule_definition(self,definition): """ Schedule a definition (callable). Scheduled jobs are given the UUID as their name so that they can be identified later on. <p/> This function can be called with a definition that is disabled; it will simply NOOP. """ # NOOP if the definition is disabled; don't schedule it if not definition.is_enabled(): logger.info("[AlertScheduler] The alert {0} with UUID {1} is disabled and will not be scheduled".format( definition.get_name(),definition.get_uuid())) return job = None if self.__in_minutes: job = self.__scheduler.add_interval_job(self.__make_function(definition), minutes=definition.interval()) else: job = self.__scheduler.add_interval_job(self.__make_function(definition), seconds=definition.interval()) # although the documentation states that Job(kwargs) takes a name # key/value pair, it does not actually set the name; do it manually if job is not None: job.name = definition.get_uuid() logger.info("[AlertScheduler] Scheduling {0} with UUID {1}".format( definition.get_name(), definition.get_uuid())) def get_job_count(self): """ Gets the number of jobs currently scheduled. This is mainly used for test verification of scheduling. """ if self.__scheduler is None: return 0 return len(self.__scheduler.get_jobs()) def execute_alert(self, execution_commands): """ Executes an alert immediately, ignoring any scheduled jobs. The existing jobs remain untouched. The result of this is stored in the alert collector for tranmission during the next heartbeat """ if self.__scheduler is None or execution_commands is None: return for execution_command in execution_commands: try: alert_definition = execution_command['alertDefinition'] clusterName = '' if not 'clusterName' in execution_command else execution_command['clusterName'] hostName = '' if not 'hostName' in execution_command else execution_command['hostName'] alert = self.__json_to_callable(clusterName, hostName, alert_definition) if alert is None: continue logger.info("[AlertScheduler] Executing on-demand alert {0} ({1})".format(alert.get_name(), alert.get_uuid())) alert.set_helpers(self._collector, self._cluster_configuration) alert.collect() except: logger.exception("[AlertScheduler] Unable to execute the alert outside of the job scheduler")
class TrainScheduler(object): def __init__(self): logging.basicConfig(level=logging.DEBUG, filename="debug.log", format='%(asctime)s %(levelname)-8s %(message)s', datefmt="%d.%m.%Y %H:%M:%S") self.scheduler = Scheduler() self.scheduler.add_listener(self.checkForDuplicates, apscheduler.events.EVENT_JOBSTORE_JOB_ADDED) self.scheduler.start() if len(self.scheduler.get_jobs()) == 0: self.createInitSchedule() self.log("Initial tasks completed. Waiting for next event..") while True: try: time.sleep(10) #self.scheduler.print_jobs() except KeyboardInterrupt: self.log("Shutting down..") self.scheduler.shutdown() quit() def createInitSchedule(self): self.log("Perform initial query for passenger trains..") self.processPassenger() self.log("Perform initial query for freight trains..") self.processFreight() self.log("Perform initial query for auto trains..") self.processAutotrain() self.log("Creating initial train schedule..") # request passenger trains every hour self.scheduler.add_cron_job(self.processPassenger, hour="*/1", minute="0", day="*", month="*", year="*") # request freight trains every day self.scheduler.add_cron_job(self.processFreight, hour="0", minute="2", day="*", month="*", year="*") # request auto trains every month self.scheduler.add_cron_job(self.processAutotrain, hour="0", minute="5", day="1", month="*", year="*") def processPassenger(self): # return trains for station in question tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID) for train in tReq.getTrainList(): trainTime = train.actualTime if (train.actualTime) else train.scheduledTime trainTimeCheck = trainTime - datetime.timedelta(minutes=CHECKBEFORE) try: self.scheduler.add_date_job(self.checkIfOnTime, trainTimeCheck, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be checked on %s." % (train.name, trainTimeCheck)) except ValueError: try: self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime)) except ValueError: self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime)) def checkIfOnTime(self, remTrain): # return trains for station in question tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID) for train in tReq.getTrainList(): if remTrain.name == train.name: trainTime = train.actualTime if (train.actualTime) else train.scheduledTime try: self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime)) except ValueError: self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime)) break def processFreight(self): # return trains for station in question freightTrains = freight.FreightTrainRequest(FREIGHT_STATION_ID) for train in freightTrains.getTrainList(): # FIXME: only arrival atm if train.arrival > datetime.datetime.now(): self.log("Schedule freight train '%s' to be displayed on %s." % (train.name, train.arrival)) self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name) else: self.log("Freight train '%s' (%s) already passed." % (train.name, train.arrival)) def processAutotrain(self): # return trains for station in question freightTrains = autotrain.AutoTrainRequest(AUTO_TRAIN_STATION_NAME) for train in freightTrains.getTrainList(): if train.arrival > datetime.datetime.now(): self.log("Schedule auto train '%s' to be displayed on %s." % (train.name, train.arrival)) self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name) else: self.log("Auto train '%s' (%s) already passed." % (train.name, train.arrival)) def checkForDuplicates(self, event): jobs = self.scheduler.get_jobs() if jobs: # events with the same name (train name) and the next "next run time" are duplicates dups = [job for job in jobs if job.name == event.job.name and job.next_run_time == event.job.next_run_time] if len(dups) > 1: self.log("Unscheduling %s." % event.job) self.scheduler.unschedule_job(event.job) def output(self, train): self.log("OUTPUT: %s" % train) f = open(OUTPUT_FILE, "a") f.write("%s\n" % train) f.close() def log(self, message): logging.info("* %s" % message)
class ProgramHandler: def __init__(self, radio_station): self.__radio_station = radio_station self.__scheduler = None self.__scheduled_jobs = None self.__start_listeners() self.__is_starting_up = True self.__interval_hours = 3 # Time after which to schedule again self.__radio_station.logger.info( "Done initialising ProgramHandler for {0}".format( radio_station.station.name)) def run(self): self.run_current_schedule() self.__is_starting_up = False def __prepare_schedule(self): self.__load_programs() self.__scheduler = Scheduler(timezone=pytz.utc) self.__scheduled_jobs = dict() def run_current_schedule(self): self.__prepare_schedule() self.__scheduler.start() self.__schedule_programs() #self.__schedule_next_schedule() def stop(self): self.__stop_program() # any clean up goes here # unschedule stuff def __schedule_next_schedule(self): base_date = datetime.now() next_schedule_date = base_date + timedelta( 0, 0, 0, 0, 0, self.__interval_hours) # 3 hours self.__scheduler.add_date_job(getattr(self, 'run_current_schedule'), next_schedule_date) def __schedule_programs(self): for scheduled_program in self.__scheduled_programs: if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program {0} for station {1} starting at {2}". format(scheduled_program.program.name, self.__radio_station.station.name, scheduled_program.start)) def __add_scheduled_job(self, scheduled_program): start_time = self.__get_program_start_time(scheduled_program).replace( tzinfo=None) program = RadioProgram(scheduled_program, self.__radio_station) try: scheduled_job = self.__scheduler.add_date_job( getattr(program, 'start'), start_time) self.__scheduled_jobs[scheduled_program.id] = scheduled_job except Exception as e: self.__radio_station.logger.error( "Error {err} in __add_scheduled_job".format(err=e.message)) def __delete_scheduled_job(self, index): if not self.__scheduled_jobs: self.__radio_station.logger.warning( "Failed to delete job (no jobs are scheduled)") return if index in self.__scheduled_jobs: try: self.__scheduler.unschedule_job(self.__scheduled_jobs[index]) except: # The job probably ran already self.__radio_station.logger.warning( "Failed to remove unscheduled job #{}".format(index)) del self.__scheduled_jobs[index] def __stop_program(self): # self.__running_program.stop() return def __run_program(self): # self.__running_program.run() return def __load_programs(self): timezone = self.__radio_station.station.timezone #if self.__is_starting_up: date_filter = "((date(start) = date(now())) or (start < now() and radio_scheduledprogram.end > now()))" #else: # date_filter = "(start >= now() at time zone '{tz}' and start < now() at time zone '{tz}' + interval '{interval} hour')".format( # tz=timezone, interval=self.__interval_hours) query = self.__radio_station.db.query(ScheduledProgram).filter( ScheduledProgram.station_id == self.__radio_station.station.id).filter( text(date_filter)).filter(ScheduledProgram.deleted == False) self.__scheduled_programs = query.all() self.__radio_station.logger.info("Loaded {1} programs for {0}".format( self.__radio_station.station.name, len(self.__scheduled_programs))) def __load_program(self, program_id): return self.__radio_station.db.query(ScheduledProgram).filter( ScheduledProgram.id == program_id).first() def __start_listeners(self): t = threading.Thread(target=self.__listen_for_scheduling_changes, args=(DefaultConfig.SCHEDULE_EVENTS_SERVER_IP, DefaultConfig.SCHEDULE_EVENTS_SERVER_PORT)) t.start() def __listen_for_scheduling_changes(self, ip, port): sck = socket.socket(socket.AF_INET, socket.SOCK_STREAM) addr = (ip, port) # It may not be possible to connect after restart, TIME_WAIT could come into play etc. Anyway, keep trying connected = False while not connected: try: sck.connect(addr) connected = True except: self.__radio_station.logger.warning( "[Station #{}] Could not connect to server, retrying in 30..." .format(self.__radio_station.id)) sleep(30) sck.send( json.dumps({ 'station': self.__radio_station.station.id, 'action': 'register' })) while True: data = sck.recv(10240000) try: event = json.loads(data) except ValueError as e: continue if "action" in event and "id" in event: if event["action"] == "delete": self.__delete_scheduled_job(event["id"]) self.__radio_station.logger.info( "Scheduled program with id {0} has been deleted". format(event["id"])) elif event["action"] == "add": scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been added at time {1}" .format(event["id"], scheduled_program.start)) elif event["action"] == "update": self.__delete_scheduled_job(event["id"]) scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been moved to start at time {1}" .format(event["id"], scheduled_program.start)) elif event["action"] == "sync": #self.__radio_station.logger.info("Syncing music for station {0}".format(event["id"])) t = threading.Thread(target=self.__process_music_data, args=(event["id"], event["music_data"])) t.start() def __get_dict_from_rows(self, rows): result = dict() for row in rows: result[row.title] = row return result def __process_music_data(self, station_id, json_string): songs_in_db = self.__get_dict_from_rows( self.__radio_station.db.query(ContentMusic).filter( ContentMusic.station_id == station_id).all()) artists_in_db = self.__get_dict_from_rows( self.__radio_station.db.query(ContentMusicArtist).filter( ContentMusicArtist.station_id == station_id).all()) albums_in_db = self.__get_dict_from_rows( self.__radio_station.db.query(ContentMusicAlbum).filter( ContentMusicAlbum.station_id == station_id).all()) data = json.loads(json_string) for artist in data: if artist in artists_in_db: music_artist = artists_in_db[artist] else: # persist the artist music_artist = ContentMusicArtist(**{ 'title': artist, 'station_id': station_id }) artists_in_db[artist] = music_artist self.__radio_station.db.add(music_artist) try: self.__radio_station.db._model_changes = {} self.__radio_station.db.commit() except DatabaseError: self.__radio_station.db.rollback() continue for album in data[artist]: if album in albums_in_db: music_album = albums_in_db[album] else: # persist the album music_album = ContentMusicAlbum(**{ 'title': album, 'station_id': station_id }) albums_in_db[album] = music_album self.__radio_station.db.add(music_album) try: self.__radio_station.db._model_changes = {} self.__radio_station.db.commit() except DatabaseError: self.__radio_station.db.rollback() continue for song in data[artist][album]['songs']: if song['title'] in songs_in_db: music_song = songs_in_db[song['title']] else: music_song = ContentMusic( **{ 'title': song['title'], 'duration': song['duration'], 'station_id': station_id, 'album_id': music_album.id, 'artist_id': music_artist.id }) songs_in_db[song['title']] = music_song self.__radio_station.db.add(music_song) try: self.__radio_station.db._model_changes = {} self.__radio_station.db.commit() except DatabaseError: self.__radio_station.db.rollback() continue """ Gets the program to run from the current list of programs that are lined up for the day """ def __get_current_program(self): for program in self.__scheduled_programs: if not self.__is_program_expired(program): return program """ Returns whether or not the time for a particular program has passed """ def __is_program_expired(self, scheduled_program): now = arrow.utcnow() return (scheduled_program.start_utc + scheduled_program.program.duration) < (now + timedelta(minutes=1)) def __get_program_start_time(self, scheduled_program): now = arrow.utcnow().datetime if scheduled_program.start_utc < now: # Time at which program begins is already past return now + timedelta(seconds=5) # 5 second scheduling allowance else: return scheduled_program.start_utc + timedelta( seconds=5) # 5 second scheduling allowance
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_AMS = 'AMS' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, cachedir, stacks_dir, common_services_dir, extensions_dir, host_scripts_dir, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.extensions_dir = extensions_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration # a mapping between a cluster name and a unique hash for all definitions self._cluster_hashes = {} # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int(config.get('agent', 'alert_grace_period', 5)) if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical( "[AlertScheduler] Could not create the cache directory {0}" .format(cachedir)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def _job_context_injector(self, config): """ apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running in embedded mode Please note, this function called in job context thus all injects should be time-running optimized :type config AmbariConfig.AmbariConfig """ if not config.use_system_proxy_setting(): from ambari_commons.network import reconfigure_urllib2_opener reconfigure_urllib2_opener(ignore_system_proxy=True) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning( "There are no alert definition commands in the heartbeat; unable to update definitions" ) return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # determine how to reschedule the jobs reschedule_all = False if "clusterName" in command_copy and command_copy[ "clusterName"] not in self._cluster_hashes: reschedule_all = True if reschedule_all is True: # reschedule all jobs, creating new instances self.reschedule_all() else: # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info( "[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid is False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled is False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled" .format(str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ logger.info("[AlertScheduler] Rescheduling all jobs...") jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled" .format(str(jobs_removed), str(jobs_scheduled))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. This wil also populate the cluster-to-hash dictionary. :return: """ definitions = [] alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning( '[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.' .format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] clusterHash = None if not 'hash' in command_json else command_json[ 'hash'] # cache the cluster and cluster hash after loading the JSON if clusterName != '' and clusterHash is not None: logger.info( '[AlertScheduler] Caching cluster {0} with alert hash {1}'. format(clusterName, clusterHash)) self._cluster_hashes[clusterName] = clusterHash for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug( "[AlertScheduler] Creating job type {0} with {1}".format( source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_AMS: alert = AmsAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['extensions_directory'] = self.extensions_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.config, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, hostName) except Exception, exception: logger.exception( "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped." ) return alert
class _Direct(Direct): # set initial conditions for the subclass (in addition to the superclass # methods defined in Direct) and initialize the scheduler def __init__(self, *args, **kwargs): Direct.__init__(self, *args, **kwargs) self.scheduler = Scheduler() self.sampling = False def collect_sample(self): time_value = gmtime() self.send('R0\r') print '#\t--- Collecting Sample at %.3f' % mktime(time_value) def query_status(self): time_value = gmtime() self.send('S\r') print '#\t--- Query Instrument Status at %.3f' % mktime(time_value) def run(self): while True: # parse the user commands from stdin cmd = sys.stdin.readline() cmd = cmd.strip() # default command set if cmd == 'q': if self.sampling is True: print '#\t--- stop all scheduled sampling' self.scheduler.unschedule_job(self.sample) self.scheduler.unschedule_job(self.status) self.scheduler.shutdown() print '#\t--- turning on 1 Hz status messages' self.send('F1\r') print '### exiting' sleep(1) break elif cmd == 'init': print '### initialize instrument for sampling' print '#\t--- turning off 1 Hz status messages' self.send('F5A\r') sleep(1) self.send('F5A\r') sleep(1) self.send('F5A\r') sleep(1) print '#\t--- flush internal pump 2 times with reagent' self.send('P2\r') sleep(2) print '#\t\t--- * first cycle complete' self.send('P2\r') sleep(2) print '#\t\t--- * second cycle complete, ready for sampling' elif cmd == 'start': print '### sampling started, will sample every hour at the top of the hour' self.scheduler.start() self.sample = self.scheduler.add_cron_job(self.collect_sample, minute=0) self.status = self.scheduler.add_cron_job(self.query_status, hour='0,12', minute=15) #self.scheduler.print_jobs() self.sampling = True elif cmd == 'stop': print '### sampling stopped' self.scheduler.unschedule_job(self.sample) self.scheduler.unschedule_job(self.status) self.scheduler.shutdown() self.sampling = False else: print '### sending %s' % cmd self.send(cmd + '\r')
class TestJobExecution(object): def setup(self): self.scheduler = Scheduler(threadpool=FakeThreadPool()) self.scheduler.add_jobstore(RAMJobStore(), "default") # Make the scheduler think it's running self.scheduler._thread = FakeThread() self.logstream = StringIO() self.loghandler = StreamHandler(self.logstream) self.loghandler.setLevel(ERROR) scheduler.logger.addHandler(self.loghandler) def teardown(self): scheduler.logger.removeHandler(self.loghandler) if scheduler.datetime == FakeDateTime: scheduler.datetime = datetime FakeDateTime._now = original_now def test_job_name(self): def my_job(): pass job = self.scheduler.add_interval_job(my_job, start_date=datetime(2010, 5, 19)) eq_( repr(job), "<Job (name=my_job, trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), " "start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>", ) def test_schedule_object(self): # Tests that any callable object is accepted (and not just functions) class A: def __init__(self): self.val = 0 def __call__(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_schedule_method(self): # Tests that bound methods can be scheduled (at least with RAMJobStore) class A: def __init__(self): self.val = 0 def method(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a.method, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_unschedule_job(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_cron_job(increment) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) self.scheduler.unschedule_job(job) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) def test_unschedule_func(self): def increment(): vals[0] += 1 def increment2(): vals[0] += 1 vals = [0] job1 = self.scheduler.add_cron_job(increment) job2 = self.scheduler.add_cron_job(increment2) job3 = self.scheduler.add_cron_job(increment) eq_(self.scheduler.get_jobs(), [job1, job2, job3]) self.scheduler.unschedule_func(increment) eq_(self.scheduler.get_jobs(), [job2]) @raises(KeyError) def test_unschedule_func_notfound(self): self.scheduler.unschedule_func(copy) def test_job_finished(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_interval_job(increment, max_runs=1) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [1]) assert job not in self.scheduler.get_jobs() def test_job_exception(self): def failure(): raise DummyException job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9)) self.scheduler._process_jobs(job.next_run_time) assert "DummyException" in self.logstream.getvalue() def test_misfire_grace_time(self): self.scheduler.misfire_grace_time = 3 job = self.scheduler.add_interval_job(lambda: None, seconds=1) eq_(job.misfire_grace_time, 3) job = self.scheduler.add_interval_job(lambda: None, seconds=1, misfire_grace_time=2) eq_(job.misfire_grace_time, 2) def test_coalesce_on(self): # Makes sure that the job is only executed once when it is scheduled # to be executed twice in a row def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job( increment, seconds=1, start_date=FakeDateTime.now(), coalesce=True, misfire_grace_time=2 ) # Turn the clock 14 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 1) eq_(len(events), 1) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(vals, [1]) def test_coalesce_off(self): # Makes sure that every scheduled run for the job is executed even # when they are in the past (but still within misfire_grace_time) def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job( increment, seconds=1, start_date=FakeDateTime.now(), coalesce=False, misfire_grace_time=2 ) # Turn the clock 2 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 3) eq_(len(events), 3) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(events[1].code, EVENT_JOB_EXECUTED) eq_(events[2].code, EVENT_JOB_EXECUTED) eq_(vals, [3]) def test_interval(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_interval_job(increment, seconds=1, args=[2]) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [4, 2]) def test_interval_schedule(self): @self.scheduler.interval_schedule(seconds=1) def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [2]) def test_cron(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_cron_job(increment, args=[3]) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vals, [3, 1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [6, 2]) self.scheduler._process_jobs(start + timedelta(seconds=2)) eq_(vals, [9, 3]) def test_cron_schedule_1(self): @self.scheduler.cron_schedule() def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals[0], 2) def test_cron_schedule_2(self): @self.scheduler.cron_schedule(minute="*") def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time next_run = start + timedelta(seconds=60) eq_(increment.job.get_run_times(next_run), [start, next_run]) self.scheduler._process_jobs(start) self.scheduler._process_jobs(next_run) eq_(vals[0], 2) def test_date(self): def append_val(value): vals.append(value) vals = [] date = datetime.now() + timedelta(seconds=1) self.scheduler.add_date_job(append_val, date, kwargs={"value": "test"}) self.scheduler._process_jobs(date) eq_(vals, ["test"]) def test_print_jobs(self): out = StringIO() self.scheduler.print_jobs(out) expected = "Jobstore default:%s" " No scheduled jobs%s" % (os.linesep, os.linesep) eq_(out.getvalue(), expected) self.scheduler.add_date_job(copy, datetime(2200, 5, 19)) out = StringIO() self.scheduler.print_jobs(out) expected = ( "Jobstore default:%s " "copy (trigger: date[2200-05-19 00:00:00], " "next run at: 2200-05-19 00:00:00)%s" % (os.linesep, os.linesep) ) eq_(out.getvalue(), expected) def test_jobstore(self): self.scheduler.add_jobstore(RAMJobStore(), "dummy") job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore="dummy") eq_(self.scheduler.get_jobs(), [job]) self.scheduler.remove_jobstore("dummy") eq_(self.scheduler.get_jobs(), []) @raises(KeyError) def test_remove_nonexistent_jobstore(self): self.scheduler.remove_jobstore("dummy2") def test_job_next_run_time(self): # Tests against bug #5 def increment(): vars[0] += 1 vars = [0] scheduler.datetime = FakeDateTime job = self.scheduler.add_interval_job(increment, seconds=1, misfire_grace_time=3, start_date=FakeDateTime.now()) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vars, [2])
class MetaDataGenerationScheduler(): def __init__(self, updateIntervalSeconds=30): self.interval = updateIntervalSeconds config = {'apscheduler.daemonic': False} self.sched = Scheduler(config) # initialize these per instance. self.repo_timestamps = {} self.jobs = {} repo_timestamps = {} #dictionary with jobName (=reponame) : last scheduler modification timestamp (float) jobs = {} #dictionary with jobName (=reponame) : jobHandle configService = RepoConfigService() static_root_dir = configService.getStaticRepoDir() sched = None interval = None def start(self): self.update_program_config() #read configs, schedule jobs # schedule an update as a job self.sched.add_interval_job(self.update_program_config, seconds=self.interval) # schedule cleanup cache self.sched.add_cron_job(self.cleanupCacheDir, hour = 23, minute = 17, second = 20) self.sched.start() def createrepo_with_optional_cleanup_job(self, *argList): monitor = JobMonitorer() monitor.job_starts() repoDir = argList[0] reponame = argList[1] rpm_max_keep = argList[2] didCleanUp=False try: if rpm_max_keep != None: didCleanUp=True self.configService.doCleanup(repoDir, rpm_max_keep) logging.info("job RpmCleanup on "+reponame+" took "+str(monitor.get_execution_time_until_now_seconds())+" seconds") self.configService.doCreateRepo(repoDir, reponame) monitor.job_finishes() logging.info(monitor.get_pretty_job_summary("createrepo on "+reponame+" (cleanup included : "+str(didCleanUp)+")")) except Exception as ex: logging.error(traceback.format_exc()) def update_program_config(self): updatedJobs = 0 addedJobs = 0 removedJobs = 0 list_of_static_dirs = os.listdir(self.static_root_dir) self.remove_jobs_where_repo_deleted(list_of_static_dirs) for static_dir in list_of_static_dirs: file_path = self.configService.getMetaDataGenerationFilePathRelativeToRepoDirByRepoName(static_dir) if not os.path.exists(file_path): if self.repo_timestamps.has_key(static_dir): logging.debug("unschedule because file does not exist") self.unschedule_by_reponame(static_dir) del self.repo_timestamps[static_dir] #repo is unmanaged now, check back later removedJobs+=1 continue if not static_dir in self.repo_timestamps: logging.debug("new repo found..") addedJobs+=1 self.repo_timestamps[static_dir] = self.determine_last_modification_time( file_path) #make an entry so we know we processed the repo + remember modification timestamp self.add_job_for_repo(static_dir) else: # we already processed the repo because its in the dictionary logging.debug("check for updates in repo config...") if self.is_more_recent_metadata_generation_file_than(static_dir, self.repo_timestamps[static_dir]): logging.debug("update job for repo " + static_dir) updatedJobs+=1 self.repo_timestamps[static_dir] = self.determine_last_modification_time(file_path) self.unschedule_by_reponame(static_dir) self.add_job_for_repo(static_dir) logging.info("update_program_config finished -- updated %s jobs, added %s jobs, removed %s jobs"%(updatedJobs,addedJobs,removedJobs)) def remove_jobs_where_repo_deleted(self, list_of_existing_repos): removed_repos = set(self.repo_timestamps.keys()) - set(list_of_existing_repos) for repo in removed_repos: self.unschedule_by_reponame(repo) def determine_last_modification_time(self, file_path): statbuf = os.stat(file_path) return statbuf.st_mtime #float representing the last modification timestamp def unschedule_by_reponame(self, reponame): if reponame in self.jobs: self.sched.unschedule_job(self.jobs[reponame]) del self.jobs[reponame] #remove the job from the job-handle dictionary.. def is_more_recent_metadata_generation_file_than(self, repodir, past_timestamp): file_path = self.configService.getMetaDataGenerationFilePathRelativeToRepoDirByRepoName(repodir) actual_timestamp = self.determine_last_modification_time(file_path) if actual_timestamp > past_timestamp: return True else: return False def cleanupCacheDir(self): cleanupCacheMonitor = JobMonitorer() cleanupCacheMonitor.job_starts() logging.info('Start cache cleanup ...') cleanupDir = self.configService.getRepoCacheDir() try: for reponame in os.listdir(cleanupDir): if reponame.startswith('.'): continue # check for cache dirs of already deleted repos absoluteDir = os.path.join(cleanupDir, reponame) if os.path.isdir(absoluteDir): if not os.path.exists(self.configService.getStaticRepoDir(reponame)): shutil.rmtree(absoluteDir) continue lockfile = self.configService.getRepoLockFile(reponame) if not os.path.exists(lockfile): shutil.rmtree(absoluteDir) except Exception as ex: logging.error("Exception in CleanupCacheDir : "+str(ex)) finally: cleanupCacheMonitor.job_finishes() logging.info(cleanupCacheMonitor.get_pretty_job_summary("CleanupCacheDir")) def add_job_for_repo(self, repo_dir): metaDataConfig = self.configService.getMetaDataGenerationConfig(repo_dir) if not metaDataConfig: return #exit silently without adding a job generation_type = metaDataConfig.getMetaDataGenerationType() if generation_type == 'manual': return #exit silently #if we get here, we know its "scheduled" generation_interval = metaDataConfig.getMetaDataGenerationInterval() generation_interval = int(generation_interval) rpm_max_keep = metaDataConfig.getMetaDataGenerationRpmMaxKeep() full_path_to_repo = self.configService.getStaticRepoDir(repo_dir) argList = [full_path_to_repo, repo_dir, rpm_max_keep] addedJob = self.sched.add_interval_job(self.createrepo_with_optional_cleanup_job, seconds=generation_interval, args=argList) self.jobs[repo_dir] = addedJob def shutdown(self): self.sched.shutdown()
class TNActionScheduler(TNArchipelPlugin): """ This plugin allows to create scheduled actions. """ def __init__(self, configuration, entity, entry_point_group): """ Initialize the plugin. @type configuration: Configuration object @param configuration: the configuration @type entity: L{TNArchipelEntity} @param entity: the entity that owns the plugin @type entry_point_group: string @param entry_point_group: the group name of plugin entry_point """ TNArchipelPlugin.__init__(self, configuration=configuration, entity=entity, entry_point_group=entry_point_group) self.scheduler = Scheduler() self.scheduler.start() self.database = sqlite3.connect(self.configuration.get( "SCHEDULER", "database"), check_same_thread=False) self.database.execute( "create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)" ) self.database.commit() self.cursor = self.database.cursor() self.restore_jobs() self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause") self.supported_actions_for_hypervisor = ("alloc", "free") # permissions self.entity.permission_center.create_permission( "scheduler_jobs", "Authorizes user to get the list of task", False) self.entity.permission_center.create_permission( "scheduler_schedule", "Authorizes user to schedule a task", False) self.entity.permission_center.create_permission( "scheduler_unschedule", "Authorizes user to unschedule a task", False) self.entity.permission_center.create_permission( "scheduler_actions", "Authorizes user to get available actions", False) # hooks if self.entity.__class__.__name__ == "TNArchipelVirtualMachine": self.entity.register_hook("HOOK_VM_TERMINATE", method=self.vm_terminate) ### Plugin interface def register_handlers(self): """ This method will be called by the plugin user when it will be necessary to register module for listening to stanza. """ self.entity.xmppclient.RegisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) def unregister_handlers(self): """ Unregister the handlers. """ self.entity.xmppclient.UnregisterHandler( 'iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) @staticmethod def plugin_info(): """ Return informations about the plugin. @rtype: dict @return: dictionary contaning plugin informations """ plugin_friendly_name = "Action Scheduler" plugin_identifier = "action_scheduler" plugin_configuration_section = "SCHEDULER" plugin_configuration_tokens = ["database"] return { "common-name": plugin_friendly_name, "identifier": plugin_identifier, "configuration-section": plugin_configuration_section, "configuration-tokens": plugin_configuration_tokens } ### Persistance def delete_job(self, uid): """ Remove a job from the database. @type uid: string @param uid: the uid of the job to remove """ self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, )) self.database.commit() def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None): """ Save a job in the database. @type uid: string @param uid: the uid of the job @type action: string @param action: the action @type year: string @param year: year of execution @type month: string @param month: month of execution @type day: string @param day: day of execution @type hour: string @param hour: hour of execution @type minute: string @param minute: minute of execution @type second: string @param second: second of execution @type comment: string @param comment: comment about the job @type params: string @param params: random parameter of the job """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute( "INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( entity_uid, uid, action, year, month, day, hour, minute, second, comment, params, )) self.database.commit() def restore_jobs(self): """ Restore the jobs from the database. """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid, )) for values in self.cursor: try: entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second) self.scheduler.add_cron_job( self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment]) except Exception as ex: self.entity.log.error("unable to restore a job: %s" % str(ex)) def vm_terminate(self, origin, user_info, arguments): """ Close the database connection. @type origin: TNArchipelEntity @param origin: the origin of the hook @type user_info: object @param user_info: random user information @type arguments: object @param arguments: runtime argument """ self.database.close() ### Jobs def get_jod_with_uid(self, uid): """ Get a job with given uid. @type uid: string @param uid: the uid of the job """ if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: if str(job.args[1]) == uid: return job return None def do_job_for_vm(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "create": self.entity.create() elif action == "shutdown": self.entity.shutdown() elif action == "destroy": self.entity.destroy() elif action == "suspend": self.entity.suspend() elif action == "resume": self.entity.resume() elif action == "pause": if self.entity.libvirt_status == 1: self.entity.suspend() elif self.entity.libvirt_status == 3: self.entity.resume() elif action == "migrate": pass job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") def do_job_for_hypervisor(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "alloc": self.entity.alloc() elif action == "free": pass #self.entity.free() job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") ### Process IQ def process_iq(self, conn, iq): """ This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received. It understands IQ of type: - jobs - schedule - unschedule @type conn: xmpp.Dispatcher @param conn: ths instance of the current connection that send the stanza @type iq: xmpp.Protocol.Iq @param iq: the received IQ """ reply = None action = self.entity.check_acp(conn, iq) self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_") if action == "schedule": reply = self.iq_schedule(iq) elif action == "unschedule": reply = self.iq_unschedule(iq) elif action == "jobs": reply = self.iq_jobs(iq) elif action == "actions": reply = self.iq_actions(iq) if reply: conn.send(reply) raise xmpp.protocol.NodeProcessed def iq_schedule(self, iq): """ Schedule a task. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") job = iq.getTag("query").getTag("archipel").getAttr("job") entityClass = self.entity.__class__.__name__ param = None if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm: raise Exception("action %s is not valid" % job) elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor: raise Exception("action %s is not valid" % job) year = iq.getTag("query").getTag("archipel").getAttr("year") month = iq.getTag("query").getTag("archipel").getAttr("month") day = iq.getTag("query").getTag("archipel").getAttr("day") hour = iq.getTag("query").getTag("archipel").getAttr("hour") minute = iq.getTag("query").getTag("archipel").getAttr("minute") second = iq.getTag("query").getTag("archipel").getAttr("second") comment = iq.getTag("query").getTag("archipel").getAttr("comment") if iq.getTag("query").getTag("archipel").has_attr("param"): param = iq.getTag("query").getTag("archipel").getAttr("param") uid = str(uuid.uuid1()) str_date = "%s-%s-%s @ %s : %02d : %02d" % ( year, month, day, hour, int(minute), int(second)) if entityClass == "TNArchipelVirtualMachine": func = self.do_job_for_vm elif entityClass == "TNArchipelHypervisor": func = self.do_job_for_hypervisor self.scheduler.add_cron_job( func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param]) self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param) self.entity.push_change("scheduler", "scheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_jobs(self, iq): """ Get jobs. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") nodes = [] if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: job_node = xmpp.Node(tag="job", attrs={ "action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3] }) nodes.append(job_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_unschedule(self, iq): """ Unschedule a job. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") uid = iq.getTag("query").getTag("archipel").getAttr("uid") the_job = self.get_jod_with_uid(uid) if not the_job: raise Exception("job with uid %s doesn't exists" % uid) self.delete_job(uid) self.scheduler.unschedule_job(the_job) self.entity.push_change("scheduler", "unscheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_actions(self, iq): """ Get available actions. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": actions = self.supported_actions_for_vm elif entityClass == "TNArchipelHypervisor": actions = self.supported_actions_for_hypervisor nodes = [] for action in actions: action_node = xmpp.Node(tag="action") action_node.setData(action) nodes.append(action_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in an :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Abinit Errors. Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError. The set of Abinit Errors is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically if #. The number of python exceptions is > MAX_NUM_PYEXC #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS #. The number of jobs launched becomes greater than (SAFETY_RATIO * total_number_of_tasks). #. The scheduler will send an email to the user (specified by mailto) every REMINDME_S seconds. If the mail cannot be sent, it will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy") DEBUG = 0 Error = PyFlowSchedulerError def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait days: number of days to wait hours: number of hours to wait minutes: number of minutes to wait seconds: number of seconds to wait verbose: (int) verbosity level max_njobs_inque: Limit on the number of jobs that can be present in the queue use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from file before launching the jobs. Default: False max_nlaunch: Maximum number of tasks launched by radpifire (default -1 i.e. no limit) """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.REMINDME_S = float(kwargs.pop("REMINDME_S", 4 * 24 * 3600)) self.MAX_NUM_PYEXCS = int(kwargs.pop("MAX_NUM_PYEXCS", 0)) self.MAX_NUM_ABIERRS = int(kwargs.pop("MAX_NUM_ABIERRS", 0)) self.SAFETY_RATIO = int(kwargs.pop("SAFETY_RATIO", 5)) #self.MAX_ETIME_S = kwargs.pop("MAX_ETIME_S", ) self.max_nlaunch = kwargs.pop("max_nlaunch", -1) if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if has_sched_v3: from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = collections.deque(maxlen=self.MAX_NUM_PYEXCS + 10) # Used to push additional info during the execution. self.history = collections.deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "r") as fh: return cls(**yaml.load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: RuntimeError if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) err_msg = "Cannot locate %s neither in current directory nor in %s" % ( cls.YAML_FILE, path) raise cls.Error(err_msg) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" return self._flow @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """Add an :class:`Flow` flow to the scheduler.""" if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid") if os.path.isfile(pid_file): flow.show_status() err_msg = (""" pid_file %s already exists There are two possibilities: 1) There's an another instance of PyFlowScheduler running 2) The previous scheduler didn't exit in a clean way To solve case 1: Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file) Then you can restart the new scheduler. To solve case 2: Remove the pid_file and restart the scheduler. Exiting""" % pid_file) raise self.Error(err_msg) with open(pid_file, "w") as fh: fh.write(str(self.pid)) self._pid_file = pid_file self._flow = flow def start(self): """ Starts the scheduler in a new thread. Returns True if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: print(errors) self.exceptions.append(errors) return False # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email( msg= "Error while trying to run the flow for the first time!\n %s" % self.exceptions) return False self.sched.start() return True def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinitio.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 print('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue, returning") return if self.max_nlaunch == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunch) # check status flow.check_status() flow.show_status() # fix problems # Try to restart the unconverged tasks # todo donot fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: print("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except Exception: excs.append(straceback()) # move here from withing rapid fire ... # fix only prepares for restarting, and sets to ready flow.fix_critical() # update database flow.pickle_dump() #if self.num_restarts == self.max_num_restarts: # info_msg = "Reached maximum number of restarts. Cannot restart anymore Returning" # logger.info(info_msg) # self.history.append(info_msg) # return 1 # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! self.exceptions.append(straceback()) self.shutdown(msg="Exception raised in callback!") def _callback(self): """The actual callback.""" if self.DEBUG: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) #print('before _runem_all in _callback') self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.MAX_ETIME_S: # err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.MAX_NUM_PYEXCS: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.MAX_NUM_PYEXCS) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS) err_msg += boxed(msg) # Count the number of tasks with status == S_UNCONVERGED. #if self.flow.num_unconverged_tasks: # # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet # msg = ("Found %d unconverged tasks." # "Automatic restarting is not available yet. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) #deadlocks = self.detect_deadlocks() #if deadlocks: # msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError: logger.critical("Could not remove pid_file") pass # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() #if False and self.flow.has_db: # try: # self.flow.db_insert() # except Exception: # logger.critical("MongoDb insertion failed.") self.history.append("Completed on %s" % time.asctime()) self.history.append("Elapsed time %s" % self.get_delta_etime()) if self.DEBUG: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.DEBUG: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "w") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) finally: # Shutdown the scheduler thus allowing the process to exit. print('this should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown self.sched.print_jobs() for job in self.sched.get_jobs(): self.sched.unschedule_job(job) self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class Scheduler(object): """ Manages a list of actions that should be performed at specific times. Note that this class *intentionally* contains hardly any error checking. The correct behaviour of the Scheduler depends on the parent code doing "the right thing". In particular, it is crucial that the reached() method be called with the next time step at which an event is expected to happen, which can be obtained using the next() method. Thus a typical (correct) usage is as follows: s = Scheduler() s.add(...) # schedule some item(s) t = s.next() # get next time step at which something should happen # [do some stuff based on the time step just obtained] s.reached(t) """ def __init__(self): """ Create a Scheduler. """ self.items = [] self.realtime_items = {} self.realtime_jobs = [] # while the scheduler is running, the job # associated with each realtime_item will be # stored in this list (otherwise it is empty) self.last = None def __iter__(self): return self def add(self, func, args=None, kwargs=None, at=None, at_end=False, every=None, after=None, realtime=False): """ Register a function with the scheduler. Returns the scheduled item, which can be removed again by calling Scheduler._remove(item). Note that this may change in the future, so use with care. """ if not hasattr(func, "__call__"): raise TypeError( "The function must be callable but object '%s' is of type '%s'" % (str(func), type(func))) assert at or every or at_end or ( after and realtime ), "Use either `at`, `every` or `at_end` if not in real time mode." assert not ( at is not None and every is not None ), "Cannot mix `at` with `every`. Please schedule separately." assert not (at is not None and after is not None), "Delays don't mix with `at`." args = args or [] kwargs = kwargs or {} callback = functools.partial(func, *args, **kwargs) if realtime: if at_end: at_end_item = SingleTimeEvent(None, True, callback) self._add(at_end_item) return at_end_item if at or (at_end and not every): at_item = SingleTimeEvent(at, at_end, callback) self._add(at_item) return at_item if every: every_item = RepeatingTimeEvent(every, after, at_end, callback) self._add(every_item) return every_item def _add(self, item): self.items.append(item) def _remove(self, item): self.items.remove(item) def _add_realtime(self, func, at=None, every=None, after=None): """ Add a realtime job. Returns the Job object as obtained from APScheduler.add_job() etc. """ if not hasattr(self, "apscheduler"): try: from apscheduler.scheduler import Scheduler as APScheduler except ImportError: log.error( "Need APScheduler package to schedule realtime events.\n" "Please install from http://pypi.python.org/pypi/APScheduler." ) raise self.apscheduler = APScheduler() atexit.register(lambda: self.apscheduler.shutdown(wait=False)) self.apscheduler.start() if after and isinstance(after, Number): # `after` can be either a delay in seconds, or a date/datetime. # Since the APScheduler API expects a date/datetime convert it. after = datetime.now() + timedelta(seconds=after) # Register the job so that it can be started/stopped as needed. self.realtime_items[func] = (at, every, after) def start_realtime_jobs(self): for (func, (at, every, after)) in self.realtime_items.items(): if at: job = self.apscheduler.add_date_job(func, at) elif every: if after: job = self.apscheduler.add_interval_job(func, seconds=every, start_date=after) else: job = self.apscheduler.add_interval_job(func, seconds=every) elif after: job = self.apscheduler.add_date_job(func, after) else: raise ValueError( "Assertion violated. Use either `at`, `every` of `after`.") self.realtime_jobs.append(job) def stop_realtime_jobs(self): for job in self.realtime_jobs: self.apscheduler.unschedule_job(job) self.realtime_jobs = [] def next(self): """ Returns the time for the next action to be performed. Automatically called upon iteration of scheduler instance. """ next_step = None stop = False # This flag determines whether or not iteration should be # stopped after all items are checked. for item in self.items: if item.next_time is not None and (next_step is None or next_step > item.next_time): next_step = item.next_time if item.state == EV_REQUESTS_STOP_INTEGRATION: self._remove(item) stop = True if next_step is None: stop = True if stop is True: raise StopIteration if next_step < self.last: log.error( "Scheduler computed the next time step should be t = {:.2g} s, but the last one was already t = {:.2g} s." .format(next_step, self.last)) raise ValueError( "Scheduler is corrupted. Requested a time step in the past: dt = {:.2g}." .format(next_step - self.last)) return next_step def reached(self, time): """ Notify the Scheduler that a certain point in time has been reached. It will perform the action(s) that were defined to happen at that time. """ for item in self.items: if same_time(item.next_time, time): item.check_and_trigger(time) if item.state == EV_DONE: self._remove(item) self.last = time def finalise(self, time): """ Trigger all events that need to happen at the end of time integration. """ for item in self.items: if item.trigger_on_stop: item.check_and_trigger(time, is_stop=True) def reset(self, time): """ Override schedule so that internal time is now `time` and modify scheduled items accordingly. """ self.last = None for item in self.items: item.reset(time) def _print_realtime_item(self, item, func_print=log.info): (f, (at, every, after)) = item func_print("'{}': <at={}, every={}, after={}>".format( item.callback.f.__name__, at, every, after)) def print_scheduled_items(self, func_print=log.info): for item in self.items: # this will call __str__ on the item, which should be defined for # all events print item for item in self.realtime_items: self._print_realtime_item(item, func_print) def clear(self): log.debug("Removing scheduled items:") self.print_scheduled_items(func_print=log.debug) self.items = [] self.stop_realtime_jobs() self.realtime_items = {} def run(self, integrator, callbacks_at_scheduler_events=[]): """ Integrate until an exit condition in the schedule has been met. The optional argument `callbacks_at_scheduler_events` should be a list of functions which are called whenever the time integration reaches a "checkpoint" where some event is scheduled. Each such function should expect the timestep t at which the event occurs as its single argument. Note that these functions are called just *before* the scheduled events are triggered. This is used, for example, to keep time-dependent fields up to date with the simulation time. """ self.start_realtime_jobs() for t in self: assert (t >= integrator.cur_t) # sanity check # If new items were scheduled after a previous time # integration finished, we can have t == integrator.cur_t. # However, this confuses the integrators so we don't integrate # in this case. if t != integrator.cur_t: integrator.advance_time(t) for f in callbacks_at_scheduler_events: f(t) self.reached(t) self.finalise(t) self.stop_realtime_jobs()
class PeetsMediaTranslator(DatagramProtocol): ''' A translator protocol to relay local udp traffic to NDN and remote NDN traffic to local udp. This class also implements the strategy for fetching remote data. If the remote seq is unknown, use a short prefix without seq to probe; otherwise use a naive leaking-bucket like method to fetch the remote data We seperate the fetching of the media stream and the fetching of the control stream (RTCP, STUN, etc). ''' __logger = Logger.get_logger('PeetsMediaTranslator') def __init__(self, factory, pipe_size): ''' Args: factory (PeetsServerFactory) : the factory that stores necessory information about the local user pipe_size (int) : the pipeline size for fetching the remote media stream. Pipelining allows us to minimize impact of the interest-data roundtrip delay. ''' self.factory = factory self.pipe_size = pipe_size self.factory = factory self.factory.set_local_status_callback(self.toggle_scheduler) # here we use two sockets, because the pending interests sent by a socket can not be satisified # by the content published later by the same socket self.ccnx_int_socket = CcnxSocket() self.ccnx_int_socket.start() self.ccnx_con_socket = CcnxSocket() self.ccnx_con_socket.start() self.stream_closure = PeetsClosure(msg_callback = self.stream_callback, timeout_callback = self.stream_timeout_callback) self.probe_closure = PeetsClosure(msg_callback = self.probe_callback, timeout_callback = self.probe_timeout_callback) self.ctrl_probe_closure = PeetsClosure(msg_callback = self.ctrl_probe_callback, timeout_callback = self.ctrl_probe_timeout_callback) self.scheduler = None self.peets_status = None def toggle_scheduler(self, status): '''Start or stop the scheduler for periodic jobs. Args: status (str): either 'Running' or 'Stopped' ''' if status == 'Running': self.peets_status = 'Running' self.scheduler = Scheduler() self.scheduler.start() self.scheduler.add_interval_job(self.fetch_media, seconds = 0.01, max_instances = 2) elif status == 'Stopped': self.peets_status = 'Stopped' for job in self.scheduler.get_jobs(): self.scheduler.unschedule_job(job) self.scheduler.shutdown(wait = True) self.scheduler = None def datagramReceived(self, data, (host, port)): '''Intercept the webrtc traffice from the local front end and relay it to the NDN Args: data (bytes) : the UDP data host (str) : the IP of the source port (int) : the port of the source 1. Differentiate RTP vs RTCP RTCP: packet type (PT) = 200 - 208 SR (sender report) 200 RR (receiver report) 201 SDES (source description) 202 BYE (goodbye) 203 App (application-defined) 204 other types go until 208 RFC 5761 (implemented by WebRTC) makes sure that RTP's PT field plus M field (which is equal to the PT field in RTCP) would not conflict 2. Differentiate STUN vs RTP & RTCP STUN: the most significant 2 bits of every STUN msg MUST be zeros (RFC 5389) RTP & RTCP: version bits (2 bits) value equals 2 Note: Tried to fake a Stun request and response so that we don't have to relay stun msgs to NDN, but failed. It worked for a time, although will significantly high rate of the STUN message exchanges We need to use the username exchanged in the sdps for stun it worked for a while but magically stopped working, so now we still send it over NDN Note 2: We only publish one medai stream from the local user (with the default offer SDP). We publish RTCP and STUN for each PeerConnections though. ''' # mask to test most significant 2 bits msg = bytearray(data) c = self.factory.client if msg[0] & 0xC0 == 0 or msg[1] > 199 and msg[1] < 209: try: ctrl_seq = c.ctrl_seqs[port] cid = c.remote_cids[port] # RTCP and STUN is for each peerconnection. the cid of remote user is used to identify the peer connection so that remote user knows which one to fetch name = c.local_user.get_ctrl_prefix() + '/' + cid + '/' + str(ctrl_seq) c.ctrl_seqs[port] = ctrl_seq + 1 self.ccnx_con_socket.publish_content(name, data) except KeyError: pass elif c.media_source_port == port: # only publish one media stream name = c.local_user.get_media_prefix() + '/' + str(c.local_seq) c.local_seq += 1 self.ccnx_con_socket.publish_content(name, data)
class LocalScheduler(object): scheduler_registry = {} _lockdown = False @classmethod def get(cls, name): return cls.scheduler_registry[name] @classmethod def get_all(cls): return cls.scheduler_registry.values() @classmethod def shutdown_all(cls): for scheduler in cls.scheduler_registry.values(): scheduler.stop() @classmethod def lockdown(cls): cls._lockdown = True @classmethod def clear_all(cls): for scheduler in cls.scheduler_registry.values(): scheduler.clear() def __init__(self, name, label=None): self.scheduled_jobs = {} self._scheduler = None self.name = name self.label = label self.__class__.scheduler_registry[self.name] = self def start(self): logger.info('Starting scheduler: %s' % self.name) if not self.__class__._lockdown: self._scheduler = OriginalScheduler() for job in self.scheduled_jobs.values(): self._schedule_job(job) self._scheduler.start() else: logger.debug('lockdown in effect') def stop(self): if self._scheduler: self._scheduler.shutdown() del self._scheduler self._scheduler = None @property def running(self): if self._scheduler: return self._scheduler.running else: return False def clear(self): for job in self.scheduled_jobs.values(): self.stop_job(job) def stop_job(self, job): if self.running: self._scheduler.unschedule_job(job._job) del (self.scheduled_jobs[job.name]) job.scheduler = None def _schedule_job(self, job): if isinstance(job, IntervalJob): job._job = self._scheduler.add_interval_job( job.function, *job.args, **job.kwargs) elif isinstance(job, DateJob): job._job = self._scheduler.add_date_job(job.function, *job.args, **job.kwargs) elif isinstance(job, CronJob): job._job = self._scheduler.add_cron_job(job.function, *job.args, **job.kwargs) else: raise UnknownJobClass def add_job(self, job): logger.debug('adding job') if job.scheduler or job.name in self.scheduled_jobs.keys(): raise AlreadyScheduled if self._scheduler: self._schedule_job(job) job.scheduler = self self.scheduled_jobs[job.name] = job def add_interval_job(self, name, label, function, *args, **kwargs): job = IntervalJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def add_date_job(self, name, label, function, *args, **kwargs): job = DateJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def add_cron_job(self, name, label, function, *args, **kwargs): job = CronJob(name=name, label=label, function=function, *args, **kwargs) self.add_job(job) return job def get_job_list(self): return self.scheduled_jobs.values() def get_job_by_name(self, name): try: return self.scheduled_jobs[name] except KeyError: raise UnknownJob def __unicode__(self): return unicode(self.label or self.name)
class HMScheduler( Base ): ''' The HMSceduler is used to periodically to send messages to HouseMonitor. The commands can anything including: # Report status # Turn on and off devices. You control the scheduler by sending messages to the scheduler using pubsub. ''' ''' The queue that is used to send messages to the rest of the system. ''' __input_queue = None ''' The scheduler object ''' scheduler = None ''' A dictionary of the current jobs that are running ''' jobs = defaultdict( list ) previous_datetime = datetime.utcnow() def __init__( self, queue ): ''' Initialize the MHScheduler. # Store the queue into __input_queue # Associate **add_interval** with Constants.TopicNames.SchedulerAddIntervalStep # Associate **add_cron** with Constants.TopicNames.SchedulerAddCronStep # Associate **add_date** with Constants.TopicNames.SchedulerAddDateStep # Associate **add_one_shot with Constants.TopicNames.SchedulerAddOneShotStepSchedulerAddOneShotStep # Associate **delete_job** with Constants.TopicNames.SchedulerDeleteJob ''' super( HMScheduler, self ).__init__() self.__input_queue = queue pub.subscribe( self.add_interval, Constants.TopicNames.SchedulerAddIntervalStep ) pub.subscribe( self.add_cron, Constants.TopicNames.SchedulerAddCronStep ) pub.subscribe( self.add_date, Constants.TopicNames.SchedulerAddDateStep ) pub.subscribe( self.add_one_shot, Constants.TopicNames.SchedulerAddOneShotStep ) pub.subscribe( self.deleteJob, Constants.TopicNames.SchedulerDeleteJob ) pub.subscribe( self.print_jobs, Constants.TopicNames.SchedulerPrintJobs ) @property def scheduler_topic_name( self ): ''' The topic name to which this routine subscribes.''' return Constants.TopicNames.SchedulerStep @property def logger_name( self ): ''' Set the logger level. ''' return Constants.LogKeys.Scheduler def start( self ): ''' Start the Scheduler. For more information on the parameter see: .. seealso:: http://packages.python.org/APScheduler/#starting-the-scheduler ''' self.logger.debug( 'Scheduler starting' ) self.scheduler = Scheduler() # self.logger.debug( 'Setting jobstore to HouseMonitor.db' ) # self.scheduler.add_jobstore(ShelveJobStore('HouseMonitor.db'), 'shelve') self.scheduler.start() name = 'scheduled status check' device = 'status' port = 'scheduler' listeners = [Constants.TopicNames.Statistics, Constants.TopicNames.CurrentValueStep] scheduler_id = str( uuid.uuid4() ) args = name, device, port, listeners, scheduler_id self.scheduler.add_interval_job( self.sendCommand, minutes=10, args=args ) name = 'uptime' device = 'HouseMonitor' port = 'uptime' listeners = [Constants.TopicNames.UpTime, Constants.TopicNames.CurrentValueStep] scheduler_id = str( uuid.uuid4() ) args = name, device, port, listeners, scheduler_id self.scheduler.add_interval_job( self.sendCommand, seconds=5, args=args ) name = 'Pulse' device = '0x13a20040902a02' port = 'DIO-0' listeners = [ Constants.TopicNames.StatusPanel_SystemCheck, Constants.TopicNames.ZigBeeOutput] scheduler_id = str( uuid.uuid4() ) args = name, device, port, listeners, scheduler_id self.scheduler.add_interval_job( self.sendCommand, seconds=5, args=args ) def add_interval( self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, args=None, kwargs=None ): ''' Schedule an interval at which sendCommand will be called. For more information on the parameter see: .. seealso:: http://packages.python.org/APScheduler/intervalschedule.html :param name: the name of the job to start. This will be used to identify the job if there is a need to delete it latter. :type name: str :param weeks: the number of weeks between calls. :type weeks: int :param days: the number of days between calls. :type days: int :param hours: the number of hours between calls. :type hours: int :param minutes: the number of minutes between calls. :type minutes: int :param seconds: the number of seconds between calls. :type seconds: int :param start_date: the time and date to start the interval. :type start_date: datetime :param args: the args to pass to sendCommand :param kwargs: the kwargs to pass to sendCommand :raises: None ''' name = args[0] self.logger.debug( 'interval ({}) add {} {} {} {} {} {} {}'.format( name, weeks, days, hours, hours, minutes, seconds, start_date ) ) token = self.scheduler.add_interval_job( self.sendCommand, weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds, start_date=start_date, args=args, kwargs=kwargs, name=name ) self.jobs[name].append( token ) def add_cron( self, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, args=None, kwargs=None ): ''' Schedule a cron command to call sendCommand. For more information on the parameter see: .. seealso:: http://packages.python.org/APScheduler/cronschedule.html :param name: the name of the cron job to start. This will be used to identify the job if there is a need to delete it latter. :type weeks: str :param weeks: the number of weeks between calls. :type weeks: int :param days: the number of days between calls. :type days: int :param hours: the number of hours between calls. :type hours: int :param minutes: the number of minutes between calls. :type minutes: int :param seconds: the number of seconds between calls. :type seconds: int :param start_date: the time and date to start the interval. :type start_date: datetime :param args: the args to pass to sendCommand :param kwargs: the kwargs to pass to sendCommand :raises: None ''' name = args[0] self.logger.debug( 'set cron({}) at {}/{}/{} {}:{}:{} {} {} {}'.format( name, year, month, day, hour, minute, second, week, day_of_week, start_date ) ) token = self.scheduler.add_cron_job( self.sendCommand, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs ) self.jobs[name].append( token ) def add_date( self, date, args, **kwargs ): ''' Schedule a specific data and time to call sendCommand. For more information on the parameter see: .. seealso:: http://packages.python.org/APScheduler/dateschedule.html :param name: the name of the cron job to start. This will be used to identify the job if there is a need to delete it latter. :type weeks: str :param date: Set the time to call sendCommand :type date: datetime :param args: the arguments to call sendCommand with :type weeks: tuple :param date: the kwwargs to call sendCommand with :type date: dictionary ''' name = args[0] self.logger.debug( 'add date({}) at {}'.format( name, date ) ) token = self.scheduler.add_date_job( self.sendCommand, date=date, args=args, kwargs=kwargs ) self.jobs[name].append( token ) def add_one_shot( self, delta, args=None, kwargs=None ): ''' Schedule sendCommand to be called after some interval. (ie. in 5 seconds or one hour). For more information on timeDelta see: .. seealso:: http://docs.python.org/2/library/datetime.html#timedelta-objects :param name: delta the time until sendCommand is called :type weeks: timedelta :param date: Set the time to call sendCommand :type date: datetime :param args: the arguments to call sendCommand with :type weeks: tuple :param date: the kwwargs to call sendCommand with :type date: dictionary ''' name = args[0] now = GetDateTime() dt = now.datetime() dt = dt + delta token = self.scheduler.add_date_job( self.sendCommand, date=dt, name=name, args=args, kwargs=kwargs ) self.jobs[name].append( token ) def deleteJob( self, name ): ''' Delete a specified job :param name: the name of the job to delete. :type weeks: str ''' item = None if name in self.jobs: for number, item in enumerate( self.jobs[name] ): try: self.scheduler.unschedule_job( item ) except KeyError: pass self.logger.info( '{} "{}" removed from scheduler'.format( number, name ) ) self.jobs[name] = [] def shutdown( self, wait=True ): ''' shutdown the scheduler .. seealso: http://packages.python.org/APScheduler/#shutting-down-the-scheduler :param wait: determines whether to wait on threads to commplete. :type wait: boolean ''' if ( self.scheduler != None ): self.scheduler.shutdown( wait=wait ) self.scheduler = None def print_jobs( self ): ''' print tye currently scheduled jobs .. seealso: http://packages.python.org/APScheduler/#getting-a-list-of-scheduled-jobs ''' self.scheduler.print_jobs() def sendCommand( self, name, device, port, listeners=[], scheduler_id=str( uuid.uuid4() ) ): """ send command will send the cammand to the HouseMonitor system :param device: the device name. :type device: str :param port: the port name. :type days: str :param listeners: the listeners that this command will be routed to. :type listeners: list of strings that contains the topic name of the listeners. Most can be found in Constants.TopicNames """ try: data = { Constants.EnvelopeContents.VALUE: 1, Constants.EnvelopeContents.DEVICE: device, Constants.EnvelopeContents.PORT: port, Constants.EnvelopeContents.SCHEDULER_ID: scheduler_id, Constants.EnvelopeContents.ARRIVAL_TIME: datetime.utcnow(), Constants.EnvelopeContents.STEPS: copy.copy( listeners ), Constants.EnvelopeContents.NAME: name, } de = DataEnvelope( Constants.EnvelopeTypes.STATUS, **data ) self.logger.debug( 'name: {} listeners: {} scheduler_id: {}'. format( name, listeners, data[Constants.EnvelopeContents.STEPS] ) ) self.__input_queue.transmit( de, self.__input_queue.LOW_PRIORITY ) except Exception as ex: self.logger.exception( "Exception in SendCommand: {}".format( ex ) )
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, alert_grace_period, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir)) self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': False, 'apscheduler.misfire_grace_time': alert_grace_period } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions") return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. :return: """ definitions = [] all_commands = None alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json['clusterName'] hostName = '' if not 'hostName' in command_json else command_json['hostName'] for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, hostName) except Exception,exception: logger.exception("[AlertScheduler] Unable to load an invalid alert definition. It will be skipped.") return alert
class beholdDaemon(Daemon): #class application(): def __init__(self, pidfile): Daemon.__init__(self, pidfile) self.sched = Scheduler() self.processingJob = None self.sidsJob = None self.logger = '' self.rtmp = '' self.username = '' self.password = '' self.host = '' self.db = '' self.connection = '' self.prefix = './' #self.username = dboptions[ 'username' ] #self.password = dboptions[ 'password' ] #self.host = dboptions[ 'host' ] #self.db = dboptions[ 'database' ] #try: #self.connection = PySQLPool.getNewConnection( username = self.username, \ #password = self.password, \ #host = self.host, \ #db = self.db ) #except: #self.logger.error( 'database connection failed' ) #die( 'database connection failed' ) self.sids = {} # sid to cid dictionary self.binaries = {} self.times = {} # all default values can be overriden with corresponding methods self.binaries['rtmpdump'] = './rtmpdump' # default location self.binaries['yamdi'] = './yamdi' # default location self.times['interval'] = int( 10) # default value, FIXME: currently will not work with other self.times['killafter'] = int(self.times['interval'] * 1.5) # default value self.times['sidrefresh'] = int(30) # default value self.times['overlap'] = int(15) # default value in seconds # def stop( self ): # self.logger.info( 'shutting down beholder' ) # Daemon.stop( self ) def setDatabase(self, dboptions): self.username = dboptions['username'] self.password = dboptions['password'] self.host = dboptions['host'] self.db = dboptions['database'] def setRTMPHost(self, rtmp): self.rtmp = rtmp def setLogger(self, logger): self.logger = logger def setBinaryFilename(self, binary, filename): self.binaries[binary] = filename def setTime(self, name, value): self.times[name] = int(value) def setPrefix(self, prefix): self.prefix = prefix def _dbConnect(self): try: self.connection = PySQLPool.getNewConnection( username = self.username, \ password = self.password, \ host = self.host, \ db = self.db ) except: # self.logger.error( 'database connection failed' ) die('database connection failed') #def _signalHandler( signum, frame ): # if signum == signal.SIGTERM: #self.logger.info( 'beholder shutting down' ) #sys.exit( 0 ) def _getSids(self): try: query = PySQLPool.getNewQuery(self.connection) query.Query('select sid, url from cam where dump is true') self.sids = {} for row in query.record: # print( '%s - %s' % ( row[ 'sid' ], row[ 'url' ] ) ) self.sids[row['sid']] = row['url'] self.logger.info("sid list updated") except: self.logger.warning("getSids failed") def _startProcessing(self): now = datetime.datetime.now() self.logger.info('processing started at %s' % (now)) #TODO: !!! # checking clocks # passed = now - self.schedulerStartTime if self.lastRun is not None: passed = now - self.lastRun minutes = passed.seconds / 60 self.logger.debug('minutes passed %s' % minutes) if minutes != self.times['interval']: m = int( round(now.minute, -1) ) # rounding minutes (FIXME: this will work only for 10) if m == 60: m, s = 59, 59 else: s = 0 self.schedulerStartTime = datetime.datetime(year=now.year, month=now.month, day=now.day, hour=now.hour, minute=m, second=s) if self.schedulerStartTime < now: seld.schedulerStartTime += datetime.timedelta( minutes=self.times['interval']) # reinit scheduler self._unscheduleJobs() self._scheduleJobsInit() self.lastRun = None return # there are nothing more to do here self.logger.debug('kill time interval equals %s' % (self.times['killafter'])) suddenDeathTime = self.schedulerStartTime + datetime.timedelta( minutes=self.times['killafter']) if suddenDeathTime.second == 59: # case of hh:59:59 suddenDeathTime += datetime.timedelta(seconds=1) self.logger.debug('sudden death time is to %s' % (suddenDeathTime)) self.schedulerStartTime += datetime.timedelta( minutes=self.times['interval']) if self.schedulerStartTime.second == 59: # case of hh:59:59 self.schedulerStartTime += datetime.timedelta(seconds=1) self.logger.info('threads will stop recording at %s' % (self.schedulerStartTime)) self.logger.debug('sids are %s' % (self.sids)) for sid in self.sids: self.logger.debug('creating thread for %s' % (sid)) d = streamDumper( self.logger, self.connection, sid, self.rtmp, self.sids[sid], self.schedulerStartTime.strftime( '%d-%m-%Y %H:%M:%S'), # it's a stop time for thread suddenDeathTime.strftime('%d-%m-%Y %H:%M:%S'), self.prefix, self.binaries['rtmpdump'], self.binaries['yamdi']) d.start() self.logger.info('thread started for %s sid' % sid) self.lastRun = now def _initScheduler(self, currentTime): m = int( round(currentTime.minute, -1)) # rounding minutes (FIXME: this will work only for 10) if m == 60: m, s = 59, 59 # s = 59 else: s = 0 nextTime = datetime.datetime(year=currentTime.year, month=currentTime.month, day=currentTime.day, hour=currentTime.hour, minute=m, second=s) if nextTime > currentTime: self.schedulerStartTime = nextTime else: self.schedulerStartTime = nextTime + datetime.timedelta( minutes=self.times['interval']) self.logger.info('scheduled processing start time is %s' % self.schedulerStartTime) # self.sched.add_date_job( self._startProcessing, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) ) # self.sched.add_date_job( self._setupIntervalScheduler, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) ) self._scheduleJobsInit() self.sched.daemonic = False # scheduler will not let caller thread exit self.sched.start() self.logger.info('scheduler initialization completed') def _scheduleJobsInit(self): self.lastRun = None starttime = self.schedulerStartTime - datetime.timedelta( seconds=self.times['overlap']) # self.sched.add_date_job( self._startProcessing, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) ) # self.sched.add_date_job( self._setupIntervalScheduler, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) ) self.sched.add_date_job(self._startProcessing, starttime) self.sched.add_date_job(self._setupIntervalScheduler, starttime) def _setupIntervalScheduler(self): self.processingJob = self.sched.add_interval_job( self._startProcessing, minutes=int(self.times['interval'])) self.sidsJob = self.sched.add_interval_job( self._getSids, minutes=int(self.times['sidrefresh'])) def _unscheduleJobs(self): self.sched.unschedule_job(self.processingJob) self.sched.unschedule_job(self.sidsJob) def run(self): # signal.signal( signal.SIGTERM, self._signalHandler ) # setting up shutdown handler self.logger.info('beholder started. big brother is watching you :)') self._dbConnect() self._getSids() self._initScheduler(datetime.datetime.now())
class HouseControl(object): __scheduler = None __heatingStatusBean = None busJobsQueue = Queue.Queue() busWorkerThread = BusWorker(busJobsQueue) def __init__(self): self.logger = logging.getLogger(APPLICATION_LOGGER_NAME) self.logger.info("HouseControl starting...") configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION) #Initialize HeatingStatusBean self.__initalizeHeatingStatusBean(configurationReader) #Initialize Scheduler self.__initializeScheduler(configurationReader) #Initialize BusQueueWorker self.busWorkerThread.setDaemon(True) self.busWorkerThread.start() self.logger.info("HouseControl started.") def __initalizeHeatingStatusBean(self, configurationReader): #HeatingStatusBean self.__heatingStatusBean = HeatingStatusBean.HeatingStatusBean() #Configure Bean self.updateHeatingStatusBeanConfiguration(configurationReader) #Add ChangeListener self.__heatingStatusBean.addChangeListener(HeatingControlService.HeatingControlService(self)) self.__heatingStatusBean.addChangeListener(HeatingSwitchService.HeatingSwitchService(self)) ##self.__heatingStatusBean.addChangeListener(HeatingMonitorService.HeatingMonitorService(self)) self.logger.info("HeatingStatusBean configured.") def __initializeScheduler(self, configurationReader): #Scheduler self.__scheduler = Scheduler() self.__scheduler.configure(standalone=True) self.__scheduler.add_listener(schedulerListener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) #SchedulerTasks #TemperaturFeedService, TemperatureLogService, MixerControlService self.__loadBaseSchedulerTasks() self.__scheduler.start() #Benutzerdefinierte Schaltzeiten self.loadUserSchedulerTasks(configurationReader) self.logger.info("Scheduler started.") def getHeatingStatusBean(self): return self.__heatingStatusBean def getScheduler(self): return self.__scheduler def __loadBaseSchedulerTasks(self): temperatureFeedService = TemperatureFeedService.TemperatureFeedService(self) temperatureLogService = TemperatureLogService.TemperatureLogService(self) mixerControlService = MixerControlService.MixerControlService(self) #TemperaturFeedService job = self.__scheduler.add_interval_job(temperatureFeedService.run, seconds=INTERVALL_UPDATE_TEMPERATURE) job.name = SCHEDULE_SERVICE_TEMPERATURE_UPDATER self.logger.info("Scheduler-Job [" + job.name + "] loaded.") #TemperatureLogService job = self.__scheduler.add_interval_job(temperatureLogService.run, seconds=INTERVALL_LOG_TEMPERATURE) job.name = SCHEDULE_SERVICE_TEMPERATURE_LOGGER self.logger.info("Scheduler-Job [" + job.name + "] loaded.") #MixerControlService job = self.__scheduler.add_interval_job(mixerControlService.run, seconds=INTERVALL_UPDATE_MIXER) job.name = SCHEDULE_SERVICE_TEMPERATURE_MIXERCONTROL self.logger.info("Scheduler-Job [" + job.name + "] loaded.") def updateHeatingStatusBeanConfiguration(self, configurationReader): temperatures = configurationReader.temperatures self.__heatingStatusBean.setUpperFloorFlowTargetTemperature(float(temperatures.get('ogv'))) self.__heatingStatusBean.setGroundFloorFlowTargetTemperature(float(temperatures.get('egv'))) self.__heatingStatusBean.setWaterTargetTemperature(float(temperatures.get('hotwater'))) def reloadUserSchedulerTasks(self): self.removeUserSchedulerTasks() configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION) self.updateHeatingStatusBeanConfiguration(configurationReader) self.loadUserSchedulerTasks(configurationReader) def removeUserSchedulerTasks(self): prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) jobList = self.__scheduler.get_jobs() for job in jobList: jobName = job.name if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX): self.logger.info("Scheduler-Job [" + job.name + "] removed.") self.__scheduler.unschedule_job(job) def loadUserSchedulerTasks(self, configurationReader): baseCronSched = {'year':None, 'month':None, 'day':None, 'week':None, 'day_of_week':None, 'hour':None, 'minute':None, 'second':None, 'start_date':None} for task in configurationReader.heatingTasks: schedType = task.get('schedule').get('type') if(schedType == 'cron'): cronSched = baseCronSched.copy() cronSched.update(task.get('schedule')) cronSched.pop('type') if(task.get('type') == 'changeHeatingStatus'): taskFunction = self.__heatingStatusBean.setHeatingStatusMap job = self.__scheduler.add_cron_job(taskFunction, year=cronSched['year'], month=cronSched['month'], day=cronSched['day'], week=cronSched['week'], day_of_week=cronSched['day_of_week'], hour=cronSched['hour'], minute=cronSched['minute'], second=cronSched['second'], start_date=cronSched['start_date'], args=[task.get('status')]) n = SERVICE_HEATING_ACTION_PREFIX + str(task.get('name')) job.name = n prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) jobList = self.__scheduler.get_jobs() for job in jobList: jobName = job.name if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX): self.logger.info("Scheduler-Job [" + jobName + "] loaded.")
class TestJobExecution(object): def setup(self): self.scheduler = Scheduler(threadpool=FakeThreadPool()) self.scheduler.add_jobstore(RAMJobStore(), 'default') # Make the scheduler think it's running self.scheduler._thread = FakeThread() self.logstream = StringIO() self.loghandler = StreamHandler(self.logstream) self.loghandler.setLevel(ERROR) scheduler.logger.addHandler(self.loghandler) def teardown(self): scheduler.logger.removeHandler(self.loghandler) if scheduler.datetime == FakeDateTime: scheduler.datetime = datetime FakeDateTime._now = original_now @raises(TypeError) def test_noncallable(self): date = datetime.now() + timedelta(days=1) self.scheduler.add_date_job('wontwork', date) def test_job_name(self): def my_job(): pass job = self.scheduler.add_interval_job(my_job, start_date=datetime(2010, 5, 19)) eq_( repr(job), '<Job (name=my_job, ' 'trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), ' 'start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>') def test_schedule_object(self): # Tests that any callable object is accepted (and not just functions) class A: def __init__(self): self.val = 0 def __call__(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_schedule_method(self): # Tests that bound methods can be scheduled (at least with RAMJobStore) class A: def __init__(self): self.val = 0 def method(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a.method, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_unschedule_job(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_cron_job(increment) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) self.scheduler.unschedule_job(job) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) def test_unschedule_func(self): def increment(): vals[0] += 1 def increment2(): vals[0] += 1 vals = [0] job1 = self.scheduler.add_cron_job(increment) job2 = self.scheduler.add_cron_job(increment2) job3 = self.scheduler.add_cron_job(increment) eq_(self.scheduler.get_jobs(), [job1, job2, job3]) self.scheduler.unschedule_func(increment) eq_(self.scheduler.get_jobs(), [job2]) @raises(KeyError) def test_unschedule_func_notfound(self): self.scheduler.unschedule_func(copy) def test_job_finished(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_interval_job(increment, max_runs=1) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [1]) assert job not in self.scheduler.get_jobs() def test_job_exception(self): def failure(): raise DummyException job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9)) self.scheduler._process_jobs(job.next_run_time) assert 'DummyException' in self.logstream.getvalue() def test_misfire_grace_time(self): self.scheduler.misfire_grace_time = 3 job = self.scheduler.add_interval_job(lambda: None, seconds=1) eq_(job.misfire_grace_time, 3) job = self.scheduler.add_interval_job(lambda: None, seconds=1, misfire_grace_time=2) eq_(job.misfire_grace_time, 2) def test_coalesce_on(self): # Makes sure that the job is only executed once when it is scheduled # to be executed twice in a row def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job(increment, seconds=1, start_date=FakeDateTime.now(), coalesce=True, misfire_grace_time=2) # Turn the clock 14 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 1) eq_(len(events), 1) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(vals, [1]) def test_coalesce_off(self): # Makes sure that every scheduled run for the job is executed even # when they are in the past (but still within misfire_grace_time) def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job(increment, seconds=1, start_date=FakeDateTime.now(), coalesce=False, misfire_grace_time=2) # Turn the clock 2 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 3) eq_(len(events), 3) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(events[1].code, EVENT_JOB_EXECUTED) eq_(events[2].code, EVENT_JOB_EXECUTED) eq_(vals, [3]) def test_interval(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_interval_job(increment, seconds=1, args=[2]) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [4, 2]) def test_interval_schedule(self): @self.scheduler.interval_schedule(seconds=1) def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [2]) def test_cron(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_cron_job(increment, args=[3]) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vals, [3, 1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [6, 2]) self.scheduler._process_jobs(start + timedelta(seconds=2)) eq_(vals, [9, 3]) def test_cron_schedule_1(self): @self.scheduler.cron_schedule() def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals[0], 2) def test_cron_schedule_2(self): @self.scheduler.cron_schedule(minute='*') def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time next_run = start + timedelta(seconds=60) eq_(increment.job.get_run_times(next_run), [start, next_run]) self.scheduler._process_jobs(start) self.scheduler._process_jobs(next_run) eq_(vals[0], 2) def test_date(self): def append_val(value): vals.append(value) vals = [] date = datetime.now() + timedelta(seconds=1) self.scheduler.add_date_job(append_val, date, kwargs={'value': 'test'}) self.scheduler._process_jobs(date) eq_(vals, ['test']) def test_print_jobs(self): out = StringIO() self.scheduler.print_jobs(out) expected = 'Jobstore default:%s'\ ' No scheduled jobs%s' % (os.linesep, os.linesep) eq_(out.getvalue(), expected) self.scheduler.add_date_job(copy, datetime(2200, 5, 19)) out = StringIO() self.scheduler.print_jobs(out) expected = 'Jobstore default:%s '\ 'copy (trigger: date[2200-05-19 00:00:00], '\ 'next run at: 2200-05-19 00:00:00)%s' % (os.linesep, os.linesep) eq_(out.getvalue(), expected) def test_jobstore(self): self.scheduler.add_jobstore(RAMJobStore(), 'dummy') job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore='dummy') eq_(self.scheduler.get_jobs(), [job]) self.scheduler.remove_jobstore('dummy') eq_(self.scheduler.get_jobs(), []) @raises(KeyError) def test_remove_nonexistent_jobstore(self): self.scheduler.remove_jobstore('dummy2') def test_job_next_run_time(self): # Tests against bug #5 def increment(): vars[0] += 1 vars = [0] scheduler.datetime = FakeDateTime job = self.scheduler.add_interval_job(increment, seconds=1, misfire_grace_time=3, start_date=FakeDateTime.now()) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vars, [2])
class EventScheduler(): """Class to scheduler regular events in a similar manner to cron.""" __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \ % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy'))) '''This determines the number of seconds after the designated run time that the job is still allowed to be run. If jobs are not being run, try increasing this in increments of 1.''' __GRACE_PERIOD = 31536000 # Amazing grace! Time in seconds before the job is considered misfired. Currently a year __COALESCE = True # Force the job to only run once instead of retrying multiple times '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads in the pool that the filter would get clogged up!''' __threadpool_corethreads = 0 # Maximum number of persistent threads in the pool __threadpool_maxthreads = 20 # Maximum number of total threads in the pool __threadpool_keepalive = 1 # Seconds to keep non-core worker threads in the pool def __init__(self, start=True): try: config = {'apscheduler.daemon': True, 'apscheduler.standalone': False, 'apscheduler.threadpool.core_threads': self.__threadpool_corethreads, 'apscheduler.threadpool.max_threads': self.__threadpool_maxthreads, 'apscheduler.threadpool.keepalive': self.__threadpool_keepalive, 'apscheduler.coalesce': self.__COALESCE} self.__sched = Scheduler(config) '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the shelve job store working.''' self.__sched.add_jobstore(SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'), 'default') atexit.register(lambda: self.__sched.shutdown(wait=False)) # Stop the scheduler when the program exits if start: self.__sched.start() except KeyError: logging.warning('An error occurred starting the scheduler.') def start_scheduler(self): self.__sched.start() def add_cron_event(self, func, name, year=None, month=None, week=None, day=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, *args, **kwargs): """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed. The following expressions can be used in each field: Expression Field Description * any Fire on every value */a any Fire on every 'a' values, starting from the minimum a-b any Fire on any value in the 'a-b' range (a must be smaller than b a-b/c any Fire every 'c' values within the 'a-b' range xth y day Fire on the x -th occurrence of weekday y within the month last x day Fire on the last occurrence of weekday 'x' within the month last day Fire on the last day within the month x,y,z any Fire on any matching expression; can combine any number of any of the above expressions If you want to add **options to the event, use kwargs (keyword arguments dictionary)""" if self.__sched is not None: event_exists = False if self.__find_event(name) is not None: event_exists = True if not event_exists: self.__sched.add_cron_job(func=func, name=name, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, misfire_grace_time=self.__GRACE_PERIOD) logging.info('New cron event added') else: '''Every event needs a unique name so we can keep track of the little bastards. And please use descriptive names so that they can be properly identified in the job schedule.''' logging.warning('add_cron_event: Event already exists') raise EventExistsError('A job with name %s already exists' % name) else: raise SchedulerNotFoundError('add_cron_event: Scheduler does not exist. It may have not started.') def __find_event(self, event_name): if self.__sched is not None: events = self.__sched.get_jobs() for event in events: if event.name == event_name: return event else: return None else: logging.warning('__find_event: Scheduler does not exist. It may have not started.') raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.') def add_onceoff_event(self, func, name, date, args=None): """Add a once off event to the schedule. The job is executed once at the specified date and time. Date/time format: YYYY-MM-DD HH:MM:SS""" if self.__sched is not None: try: if args is None: # If there are no arguments to be passed to the function self.__sched.add_date_job(func=func, name=name, date=date, misfire_grace_time=self.__GRACE_PERIOD) else: # If there are arguments to be passed to the function self.__sched.add_date_job(func=func, name=name, date=date, arge=args, misfire_grace_time=self.__GRACE_PERIOD) except ValueError: '''If the event is in the past, it will not run. This program is not capable of manipulating space and time. Try import __time_travel__''' raise EventWontRunError('The event will not run: Event time has expired.') logging.info('New once off event added') else: logging.warning('add_onceoff_event: Scheduler does not exist. It may have not started.') raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.') def remove_event(self, event_name): """Remove the event 'event_name' from the schedule.""" if self.__sched is not None: removed = False event = self.__find_event(event_name=event_name) if event is not None: # If the event exists, remove it self.__sched.unschedule_job(event) removed = True if not removed: '''Raise an error so that it can be handled correctly''' logging.warning('remove_event: Event not found for removal.') raise EventNotFoundError('Event not found for removal: %s' % event_name) else: raise SchedulerNotFoundError('remove_event: Scheduler does not exist. It may have not started.') def get_jobs(self): """Get the list of events currently in the job store.""" if self.__sched is not None: return self.__sched.get_jobs() else: raise SchedulerNotFoundError('get_events: Scheduler does not exist. It may have not started.') def get_job_names(self): """ Get the names of all the jobs in the job store :return: list """ jobs = self.get_jobs() job_list = [] if jobs: for job in jobs: job_list.append(job.name) return job_list def get_scheduler(self): """Returns the Scheduler object. Rather add functionality to this class than call this method.""" if self.__sched is not None: return self.__sched else: raise SchedulerNotFoundError('get_scheduler: Scheduler does not exist. It may have not started.')
class TNActionScheduler: def __init__(self, entity, db_file): """ initialize the module @type entity TNArchipelEntity @param entity the module entity """ self.entity = entity self.scheduler = Scheduler() self.scheduler.start() self.database = sqlite3.connect(db_file, check_same_thread=False); self.database.execute("create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)") self.database.commit() self.cursor = self.database.cursor() self.restore_jobs() self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause") self.supported_actions_for_hypervisor = ("alloc", "free") # permissions self.entity.permission_center.create_permission("scheduler_jobs", "Authorizes user to get the list of task", False); self.entity.permission_center.create_permission("scheduler_schedule", "Authorizes user to schedule a task", False); self.entity.permission_center.create_permission("scheduler_unschedule", "Authorizes user to unschedule a task", False); self.entity.permission_center.create_permission("scheduler_actions", "Authorizes user to get available actions", False); ### Persistance def delete_job(self, uid): """ remove a job from the database @type uid string @param uid the uid of the job to remove """ self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid,)) self.database.commit() def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None): """ save a job in the database @type uid string @param uid the uid of the job @type action string @param action the action @type year string @param year year of execution @type month string @param month month of execution @type day string @param day day of execution @type hour string @param hour hour of execution @type minute string @param minute minute of execution @type second string @param second second of execution @type comment string @param comment comment about the job @type params string @param params random parameter of the job """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (entity_uid, uid, action, year, month, day, hour, minute, second, comment, params,)) self.database.commit() def restore_jobs(self): """ restore the jobs from the database """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid,)) for values in self.cursor: entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second) self.scheduler.add_cron_job(self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment]) ### Jobs def get_jod_with_uid(self, uid): """ get a job with given uid @type uid string @param uid the uid of the job """ for job in self.scheduler.jobs: if str(job.args[1]) == uid: return job; return None; def do_job_for_vm(self, action, uid, str_date, comment, param): """ perform the job @type action string @param action the action to execute @type uid string @param uid the uid of the job @type str_date string @param str_date the date of the job @type comment string @param commt comment about the job @type param string @param param a random parameter to give to job """ if action == "create": self.entity.create() elif action == "shutdown": self.entity.shutdown() elif action == "destroy": self.entity.destroy() elif action == "suspend": self.entity.suspend() elif action == "resume": self.entity.resume() elif action == "pause": if self.entity.libvirt_status == 1: self.entity.suspend() elif self.entity.libvirt_status == 3: self.entity.resume() elif action == "migrate": pass job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid); self.entity.push_change("scheduler", "jobexecuted"); def do_job_for_hypervisor(self, action, uid, str_date, comment, param): """ perform the job @type action string @param action the action to execute @type uid string @param uid the uid of the job @type str_date string @param str_date the date of the job @type comment string @param commt comment about the job @type param string @param param a random parameter to give to job """ if action == "alloc": self.entity.alloc() elif action == "free": pass #self.entity.free() job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid); self.entity.push_change("scheduler", "jobexecuted"); ### Process IQ def process_iq(self, conn, iq): """ this method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received. it understands IQ of type: - jobs - schedule - unschedule @type conn: xmpp.Dispatcher @param conn: ths instance of the current connection that send the stanza @type iq: xmpp.Protocol.Iq @param iq: the received IQ """ action = self.entity.check_acp(conn, iq) self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_") if action == "schedule": reply = self.iq_schedule(iq) elif action == "unschedule": reply = self.iq_unschedule(iq) elif action == "jobs": reply = self.iq_jobs(iq) elif action == "actions": reply = self.iq_actions(iq) if reply: conn.send(reply) raise xmpp.protocol.NodeProcessed def iq_schedule(self, iq): """ Schedule a task. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") job = iq.getTag("query").getTag("archipel").getAttr("job") entityClass = self.entity.__class__.__name__ param = None if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm: raise Exception("action %s is not valid" % job) elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor: raise Exception("action %s is not valid" % job) year = iq.getTag("query").getTag("archipel").getAttr("year") month = iq.getTag("query").getTag("archipel").getAttr("month") day = iq.getTag("query").getTag("archipel").getAttr("day") hour = iq.getTag("query").getTag("archipel").getAttr("hour") minute = iq.getTag("query").getTag("archipel").getAttr("minute") second = iq.getTag("query").getTag("archipel").getAttr("second") comment = iq.getTag("query").getTag("archipel").getAttr("comment") if iq.getTag("query").getTag("archipel").has_attr("param"): param = iq.getTag("query").getTag("archipel").getAttr("param") uid = str(uuid.uuid1()) str_date = "%s-%s-%s @ %s : %s : %s" % (year, month, day, hour, minute, second) if entityClass == "TNArchipelVirtualMachine": func = self.do_job_for_vm elif entityClass == "TNArchipelHypervisor": func = self.do_job_for_hypervisor self.scheduler.add_cron_job(func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param]) self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param) self.entity.push_change("scheduler", "scheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_jobs(self, iq): """ gets jobs @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") nodes = []; for job in self.scheduler.jobs: job_node = xmpp.Node(tag="job", attrs={"action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3]}) nodes.append(job_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_unschedule(self, iq): """ gets jobs @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") uid = iq.getTag("query").getTag("archipel").getAttr("uid") the_job = self.get_jod_with_uid(uid); if not the_job: raise Exception("job with uid %s doesn't exists" % uid) self.delete_job(uid); self.scheduler.unschedule_job(the_job); self.entity.push_change("scheduler", "unscheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_actions(self, iq): """ get available actions @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": actions = self.supported_actions_for_vm elif entityClass == "TNArchipelHypervisor": actions = self.supported_actions_for_hypervisor nodes = [] for action in actions: action_node = xmpp.Node(tag="action") action_node.setData(action) nodes.append(action_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply
class EventScheduler(): logging.basicConfig() """Class to scheduler regular events in a similar manner to cron.""" __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \ % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy'))) '''This determines the number of seconds after the designated run time that the job is still allowed to be run. If jobs are not being run, try increasing this in increments of 1.''' __GRACE_PERIOD = 31536000 # Amazing grace! Time in seconds before the job is considered misfired. Currently a year __COALESCE = True # Force the job to only run once instead of retrying multiple times '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads in the pool that the filter would get clogged up!''' __threadpool_corethreads = 0 # Maximum number of persistent threads in the pool __threadpool_maxthreads = 20 # Maximum number of total threads in the pool __threadpool_keepalive = 1 # Seconds to keep non-core worker threads in the pool def __init__(self, start=True): try: config = { 'apscheduler.daemon': True, 'apscheduler.standalone': False, 'apscheduler.threadpool.core_threads': self.__threadpool_corethreads, 'apscheduler.threadpool.max_threads': self.__threadpool_maxthreads, 'apscheduler.threadpool.keepalive': self.__threadpool_keepalive, 'apscheduler.coalesce': self.__COALESCE } self.__sched = Scheduler(config) '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the shelve job store working.''' self.__sched.add_jobstore( SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'), 'default') atexit.register(lambda: self.__sched.shutdown(wait=False) ) # Stop the scheduler when the program exits if start: self.__sched.start() except KeyError: logging.warning('An error occurred starting the scheduler.') def start_scheduler(self): self.__sched.start() def add_cron_event(self, func, name, year=None, month=None, week=None, day=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, *args, **kwargs): """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed. The following expressions can be used in each field: Expression Field Description * any Fire on every value */a any Fire on every 'a' values, starting from the minimum a-b any Fire on any value in the 'a-b' range (a must be smaller than b a-b/c any Fire every 'c' values within the 'a-b' range xth y day Fire on the x -th occurrence of weekday y within the month last x day Fire on the last occurrence of weekday 'x' within the month last day Fire on the last day within the month x,y,z any Fire on any matching expression; can combine any number of any of the above expressions If you want to add **options to the event, use kwargs (keyword arguments dictionary)""" if self.__sched is not None: event_exists = False if self.__find_event(name) is not None: event_exists = True if not event_exists: self.__sched.add_cron_job( func=func, name=name, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, misfire_grace_time=self.__GRACE_PERIOD) logging.info('New cron event added') else: '''Every event needs a unique name so we can keep track of the little bastards. And please use descriptive names so that they can be properly identified in the job schedule.''' logging.warning('add_cron_event: Event already exists') raise EventExistsError('A job with name %s already exists' % name) else: raise SchedulerNotFoundError( 'add_cron_event: Scheduler does not exist. It may have not started.' ) def __find_event(self, event_name): if self.__sched is not None: events = self.__sched.get_jobs() for event in events: if event.name == event_name: return event else: return None else: logging.warning( '__find_event: Scheduler does not exist. It may have not started.' ) raise SchedulerNotFoundError( 'Scheduler does not exist. It may have not started.') def add_onceoff_event(self, func, name, date, args=None): """Add a once off event to the schedule. The job is executed once at the specified date and time. Date/time format: YYYY-MM-DD HH:MM:SS""" if self.__sched is not None: try: if args is None: # If there are no arguments to be passed to the function self.__sched.add_date_job( func=func, name=name, date=date, misfire_grace_time=self.__GRACE_PERIOD) else: # If there are arguments to be passed to the function self.__sched.add_date_job( func=func, name=name, date=date, arge=args, misfire_grace_time=self.__GRACE_PERIOD) except ValueError: '''If the event is in the past, it will not run. This program is not capable of manipulating space and time. Try import __time_travel__''' raise EventWontRunError( 'The event will not run: Event time has expired.') logging.info('New once off event added') else: logging.warning( 'add_onceoff_event: Scheduler does not exist. It may have not started.' ) raise SchedulerNotFoundError( 'Scheduler does not exist. It may have not started.') def remove_event(self, event_name): """Remove the event 'event_name' from the schedule.""" if self.__sched is not None: removed = False event = self.__find_event(event_name=event_name) if event is not None: # If the event exists, remove it self.__sched.unschedule_job(event) removed = True if not removed: '''Raise an error so that it can be handled correctly''' logging.warning('remove_event: Event not found for removal.') raise EventNotFoundError('Event not found for removal: %s' % event_name) else: raise SchedulerNotFoundError( 'remove_event: Scheduler does not exist. It may have not started.' ) def get_jobs(self): """Get the list of events currently in the job store.""" if self.__sched is not None: return self.__sched.get_jobs() else: raise SchedulerNotFoundError( 'get_events: Scheduler does not exist. It may have not started.' ) def get_job_names(self): """ Get the names of all the jobs in the job store :return: list """ jobs = self.get_jobs() job_list = [] if jobs: for job in jobs: job_list.append(job.name) return job_list def get_scheduler(self): """Returns the Scheduler object. Rather add functionality to this class than call this method.""" if self.__sched is not None: return self.__sched else: raise SchedulerNotFoundError( 'get_scheduler: Scheduler does not exist. It may have not started.' )
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in an :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Abinit Errors. Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError. The set of Abinit Errors is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically if #. The number of python exceptions is > MAX_NUM_PYEXC #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, it will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy") Error = PyFlowSchedulerError def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait days: number of days to wait hours: number of hours to wait minutes: number of minutes to wait seconds: number of seconds to wait verbose: (int) verbosity level max_njobs_inque: Limit on the number of jobs that can be present in the queue use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from file before launching the jobs. Default: False max_nlaunches: Maximum number of tasks launched by radpifire (default -1 i.e. no limit) """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 4 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if has_sched_v3: from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = collections.deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = collections.deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "r") as fh: return cls(**yaml.load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: RuntimeError if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" return self._flow @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """Add an :class:`Flow` flow to the scheduler.""" if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid") if os.path.isfile(pid_file): flow.show_status() raise self.Error("""\ pid_file %s already exists There are two possibilities: 1) There's an another instance of PyFlowScheduler running 2) The previous scheduler didn't exit in a clean way To solve case 1: Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file) Then you can restart the new scheduler. To solve case 2: Remove the pid_file and restart the scheduler. Exiting""" % pid_file) with open(pid_file, "w") as fh: fh.write(str(self.pid)) self._pid_file = pid_file self._flow = flow def start(self): """ Starts the scheduler in a new thread. Returns True if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return False # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions) return False try: self.sched.start() return True except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"): self.flow.cancel() self.flow.pickle_dump() return False def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinitio.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: logger.info("Too many jobs in the queue, returning") return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status and print it. flow.check_status(show=False) # fix problems # Try to restart the unconverged tasks # todo donot fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # move here from withing rapid fire ... # fix only prepares for restarting, and sets to ready nfixed = flow.fix_abi_critical() if nfixed: print("Fixed %d AbiCritical errors" % nfixed) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters if False: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QueueCritical errors" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! self.exceptions.append(straceback()) self.shutdown(msg="Exception raised in callback!") def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.max_etime_s: # err_msg += "\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_msg += boxed(msg) deadlocked, runnables, running = self.flow.deadlocked_runnables_running() #print("\ndeadlocked:\n", deadlocked, "\nrunnables:\n", runnables, "\nrunning\n", running) if deadlocked and not runnables and not running: msg = "No runnable job with deadlocked tasks:\n %s\nWill shutdown the scheduler and exit" % str(deadlocked) err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError: logger.critical("Could not remove pid_file") # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on %s" % time.asctime()) self.history.append("Elapsed time %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "w") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow didn't complete successfully") app("Shutdown message:\n%s" % msg) print("\n".join(lines)) finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('this should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class Scheduller(): def __init__(self, job_func): self.job_func = job_func self.jobs = [] config = {'apscheduler.standalone': False } self.sched = Scheduler(config) self.sched.start() self.db = shelve.open(os.path.join(os.path.dirname(__file__), "db/db_timers"), writeback=True) if not self.db.has_key('timers'): print('Creating new timers db') self.db['timers'] = [] self.apply_all_timers(self.db['timers']) #self.db['timers'] = [] #self.db['timers'].append(dict(cron="* * * * *", switch_name="TV", state=1)) #self.db['timers'].append(dict(cron="20 14 * * *", switch_name="Speakers", state=1)) #self.db['timers'].append(dict(cron="30 14 * * *", switch_name="Speakers", state=0)) #self.db_flush() def db_flush(self): self.db.sync() self.db.close() self.db = shelve.open(os.path.join(os.path.dirname(__file__), "db/db_timers"), writeback=True) def get_timers(self): return self.db['timers'] def update_db(self, timers): self.remove_all_jobs() self.db['timers'] = timers self.db_flush() self.apply_all_timers(self.db['timers']) def append_db(self, new_timer): self.db['timers'].append(new_timer) self.db_flush() self.schedulle_job(new_timer) def remove_all_jobs(self): for id, timer in enumerate(self.db['timers']): self.unschedule_job(id) self.db_flush() def apply_all_timers(self, timers): for timer in timers: self.schedulle_job(timer) def schedulle_job(self, timer): switch_name = timer['switch_name'] state = timer['state'] minute, hour, day, month, day_of_week = timer['cron'].split() #workaround for apscheduler, Monday is day 0 in it's implementation if (day_of_week<> "*"): day_of_week = int(day_of_week) - 1 if (day_of_week == -1): day_of_week = 6 job = self.sched.add_cron_job(self.job_func, second=0, minute=minute, hour=hour, day=day, month=month, day_of_week=day_of_week, args=[dict(switch_name=switch_name, state=state)]) self.jobs.append(job) def unschedule_job(self, id): print 'remove ID:', id self.db['timers'].pop(id) self.sched.unschedule_job(self.jobs.pop(int(id))) def remove(self, id): self.unschedule_job(id) self.db_flush()
class schedulerDaemon(object): def __init__(self): #starting scheduler self.sched = Scheduler() self.sched.start() self.sched.add_listener(job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED) self.recoverySchedulerDaemon() self.newEmulationList =[] #Logger.init() def listJobs(self): schedFileLogger.debug("-> listJobs(self)") schJobsFormat=self.sched.get_jobs() #!have to convert list of jobs from scheduler into list of strings to send over the Pyro 4.20 which has new "serpent serializer" strJobsList=[] if schJobsFormat: for job in self.sched.get_jobs(): strJobsList.append(str(job)) schedFileLogger.debug("sending list of jobs") #[<Job (name=1-1-MEM_EMU-logger interval-3sec., trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-0-MEM_Distro-lookbusy-mem: 100 Duration: 60.0sec. End Time: 10:11:10, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-1-MEM_Distro-lookbusy-mem: 225 Duration: 48.0sec. End Time: 10:11:04, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 16))>)>, <Job (name=1-MEM_EMU-1-2-MEM_Distro-lookbusy-mem: 225 Duration: 36.0sec. End Time: 10:10:58, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 22))>)>, <Job (name=1-MEM_EMU-1-3-MEM_Distro-lookbusy-mem: 225 Duration: 24.0sec. End Time: 10:10:52, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 28))>)>, <Job (name=1-MEM_EMU-1-4-MEM_Distro-lookbusy-mem: 225 Duration: 12.0sec. End Time: 10:10:46, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 34))>)>] return strJobsList else: schedFileLogger.debug("No jobs to send") return [] def stopSchedulerDaemon(self): schedFileLogger.debug("-> stopSchedulerDaemon(self)") schedFileLogger.info("stopping Daemon") sys.exit(1) sys.exit(0) def hello(self): schedFileLogger.debug("-> hello(self)") greeting = "Pong!" schedFileLogger.debug(greeting) return greeting def deleteJobs(self,emulationID,distribitionName): schedFileLogger.debug("-> deleteJobs(self,emulationID,distribitionName)") #stringify emulationID =str(emulationID) distribitionName=str(distribitionName) schedFileLogger.debug("Looking for job name:"+emulationID+"-"+distribitionName) if emulationID=="all": schedFileLogger.info("Jobs deleted:") #setting emulation objects list to initial state self.newEmulationList =[] for job in self.sched.get_jobs(): self.sched.unschedule_job(job) msg = {"Action":"Job Deleted","jobName":job.name} schedFileLogger.info(msg) #schedFileLogger.info(str(job.name)) else: for job in self.sched.get_jobs(): if distribitionName in job.name : self.sched.unschedule_job(job) msg = {"Action":"Job Deleted","jobName":job.name} schedFileLogger.info(msg) #schedFileLogger.info( "Job: "+job.name+" Deleted") else: schedFileLogger.info( "These jobs remain: "+job.name) #Adding current emulation object for further availability def setEmuObject(self,newEmulation): try: #prevent duplicate entries of emulations to be created # print "\nlen emuObject: ", str(len(self.newEmulationList)) n=0 if len(self.newEmulationList) >= 1: for emus in self.newEmulationList: #print "emu compare:",emus.emulationID,newEmulation.emulationID if emus.emulationID==newEmulation.emulationID: #print "FOUND IT" self.newEmulationList.pop(n) n+=1 self.newEmulationList.append(newEmulation) else: self.newEmulationList.append(newEmulation) return True except Exception,e: print e return False
email_sched.start() text = "it's been twelve hours. here are the latest temperature readings from your new brew" email_job = sched.add_interval_job(send_email,hours=8,args = [text, password, True]) # this is the heart of the program: # send email to let me know I'm brewing send_email("starting brew log.", password, graph=False) # get list of temps: list_of_temps = sys.argv[2::2] # get lengths of time for each of those temperatures list_of_times = sys.argv[3::2] # convert to ints list_of_temps = map(int,list_of_temps) list_of_times = map(int, list_of_times) print "list of temps" print list_of_temps print "list of times" print list_of_times for i in range(0,len(list_of_times)): send_email("changing temperature to " + str(list_of_temps[i]) + " for " + str(list_of_times[i]) + " hours.", password, graph = False) recordAndRegulateTemp(list_of_times[i],list_of_temps[i],writer) print "program done. fermenter shutting down." send_email("ending. fermenter is shutting off", password, graph=True) email_sched.unschedule_job(send_email) io.output(power_pin, False)
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in a :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Errors in the ab-initio code Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError. The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. The flow tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases: #. The number of python exceptions is > max_num_pyexcs #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, the scheduler will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy") Error = PyFlowSchedulerError @classmethod def autodoc(cls): i = cls.__init__.__doc__.index("Args:") return cls.__init__.__doc__[i+5:] def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait (DEFAULT: 0). days: number of days to wait (DEFAULT: 0). hours: number of hours to wait (DEFAULT: 0). minutes: number of minutes to wait (DEFAULT: 0). seconds: number of seconds to wait (DEFAULT: 0). mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds. (DEFAULT: None i.e. not used). verbose: (int) verbosity level. (DEFAULT: 0) use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from file before launching the jobs. (DEFAULT: "no") max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200) remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds. (int, DEFAULT: 1 day). max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs (int, DEFAULT: 0) max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs (int, DEFAULT: 0) safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5) max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler. (DEFAULT: -1 i.e. no limit) debug: Debug level. Use 0 for production (int, DEFAULT: 0) fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes") rmflow: If "yes", the scheduler will remove the flow directory if the calculation completed successfully. (DEFAULT: "no") killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs before exiting due to an error. (DEFAULT: "yes") """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = as_bool(kwargs.pop("use_dynamic_manager", False)) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.max_ncores_used = kwargs.pop("max_ncores_used", None) self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True)) self.rmflow = as_bool(kwargs.pop("rmflow", False)) self.killjobs_if_errors = as_bool(kwargs.pop("killjobs_if_errors", True)) self.customer_service_dir = kwargs.pop("customer_service_dir", None) if self.customer_service_dir is not None: self.customer_service_dir = Directory(self.customer_service_dir) self._validate_customer_service() if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: logger.warning("Using scheduler v>=3.0.0") from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "rt") as fh: return cls(**yaml.safe_load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.safe_load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: `RuntimeError` if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) if self.flow is not None: app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" try: return self._flow except AttributeError: return None @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """ Add an :class:`Flow` flow to the scheduler. """ if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") # Check if we are already using a scheduler to run this flow flow.check_pid_file() flow.set_spectator_mode(False) # Build dirs and files (if not yet done) flow.build() with open(flow.pid_file, "wt") as fh: fh.write(str(self.pid)) self._pid_file = flow.pid_file self._flow = flow def _validate_customer_service(self): """ Validate input parameters if customer service is on then create directory for tarball files with correct premissions for user and group. """ direc = self.customer_service_dir if not direc.exists: mode = 0o750 print("Creating customer_service_dir %s with mode %s" % (direc, mode)) direc.makedirs() os.chmod(direc.path, mode) if self.mailto is None: raise RuntimeError("customer_service_dir requires mailto option in scheduler.yml") def _do_customer_service(self): """ This method is called before the shutdown of the scheduler. If customer_service is on and the flow didn't completed successfully, a lightweight tarball file with inputs and the most important output files is created in customer_servide_dir. """ if self.customer_service_dir is None: return doit = self.exceptions or not self.flow.all_ok doit = True if not doit: return prefix = os.path.basename(self.flow.workdir) + "_" import tempfile, datetime suffix = str(datetime.datetime.now()).replace(" ", "-") # Remove milliseconds i = suffix.index(".") if i != -1: suffix = suffix[:i] suffix += ".tar.gz" #back = os.getcwd() #os.chdir(self.customer_service_dir.path) _, tmpname = tempfile.mkstemp(suffix="_" + suffix, prefix=prefix, dir=self.customer_service_dir.path, text=False) print("Dear customer,\n We are about to generate a tarball in\n %s" % tmpname) self.flow.make_light_tarfile(name=tmpname) #os.chdir(back) def start(self): """ Starts the scheduler in a new thread. Returns 0 if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return 1 # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions) return 1 try: self.sched.start() return 0 except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"): print("Number of jobs cancelled:", self.flow.cancel()) self.flow.pickle_dump() return -1 def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # and flow.TaskManager.qadapter.QTYPE == "shell": # This call is expensive and therefore it's optional (must be activate in manager.yml) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') else: # Here we just count the number of tasks in the flow who are running. # This logic breaks down if there are multiple schedulers runnig # but it's easy to implement without having to contact the resource manager. nqjobs = (len(list(flow.iflat_tasks(status=flow.S_RUN))) + len(list(flow.iflat_tasks(status=flow.S_SUB)))) if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s. No job will be submitted." % nqjobs) flow.check_status(show=False) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_used %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: cprint("[%s] Number of launches: %d" % (time.asctime(), nlaunch), "yellow") except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! s = straceback() self.exceptions.append(s) # This is useful when debugging #try: # print("Exception in callback, will cancel all tasks") # for task in self.flow.iflat_tasks(): # task.cancel() #except Exception: # pass self.shutdown(msg="Exception raised in callback!\n" + s) def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append("No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append("No task is running and cannot find other tasks to submit.") # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint("killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint("Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError as exc: logger.critical("Could not remove pid_file: %s", exc) # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on: %s" % time.asctime()) self.history.append("Elapsed time: %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "wt") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow %s didn't complete successfully" % repr(self.flow.workdir)) app("use `abirun.py FLOWDIR debug` to analyze the problem.") app("Shutdown message:\n%s" % msg) print("") print("\n".join(lines)) print("") self._do_customer_service() if self.flow.all_ok: print("Calling flow.finalize()...") self.flow.finalize() #print("finalized:", self.flow.finalized) if self.rmflow: app("Flow directory will be removed...") try: self.flow.rmtree() except Exception: logger.warning("Ignoring exception while trying to remove flow dir.") finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('This should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() if not has_sched_v3: for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class AlertSchedulerHandler(): TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_AMS = 'AMS' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, initializer_module, in_minutes=True): self.initializer_module = initializer_module self.cachedir = initializer_module.config.alerts_cachedir self.stacks_dir = initializer_module.config.stacks_dir self.common_services_dir = initializer_module.config.common_services_dir self.extensions_dir = initializer_module.config.extensions_dir self.host_scripts_dir = initializer_module.config.host_scripts_dir self.configuration_builder = initializer_module.configuration_builder self._cluster_configuration = initializer_module.configurations_cache self.alert_definitions_cache = initializer_module.alert_definitions_cache self.config = initializer_module.config # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int( self.config.get('agent', 'alert_grace_period', 5)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': self.config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.recovery_manger = initializer_module.recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def _job_context_injector(self, config): """ apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running in embedded mode Please note, this function called in job context thus all injects should be time-running optimized :type config AmbariConfig.AmbariConfig """ if not config.use_system_proxy_setting(): from ambari_commons.network import reconfigure_urllib2_opener reconfigure_urllib2_opener(ignore_system_proxy=True) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, event_type): """ Updates the persisted alert definitions JSON. :return: """ # prune out things we don't want to store alert_definitions = [] for cluster_id, command in self.alert_definitions_cache.iteritems(): command_copy = Utils.get_mutable_copy(command) alert_definitions.append(command_copy) if event_type == "CREATE": # reschedule all jobs, creating new instances self.reschedule_all() else: # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info( "[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() self.initializer_module.alert_status_reporter.reported_alerts.clear() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid is False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled is False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled" .format(str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ logger.info("[AlertScheduler] Rescheduling all jobs...") jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled" .format(str(jobs_removed), str(jobs_scheduled))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. This wil also populate the cluster-to-hash dictionary. :return: """ definitions = [] for cluster_id, command_json in self.alert_definitions_cache.iteritems( ): clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] publicHostName = '' if not 'publicHostName' in command_json else command_json[ 'publicHostName'] clusterHash = None if not 'hash' in command_json else command_json[ 'hash'] # cache the cluster and cluster hash after loading the JSON if clusterName != '' and clusterHash is not None: logger.info( '[AlertScheduler] Caching cluster {0} with alert hash {1}'. format(clusterName, clusterHash)) for definition in command_json['alertDefinitions']: alert = self.__json_to_callable( clusterName, hostName, publicHostName, Utils.get_mutable_copy(definition)) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration, self.configuration_builder) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, publicHostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug( "[AlertScheduler] Creating job type {0} with {1}".format( source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_AMS: alert = AmsAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['extensions_directory'] = self.extensions_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.config, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, json_definition['clusterId'], hostName, publicHostName) except Exception, exception: logger.exception( "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped." ) return alert
class bakCron(object): def __init__(self): self.sched = Scheduler() self.sched.daemonic = False self.sched.start() self.assign_jobs() self.assign_monitor() def get_fileconfig(self): ''' 获取配置文件的路径,此路径在软件安装时指定目录。 ''' policyfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Policy.conf" serverfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Server.conf" policys = [] PolicyConfig = ConfigParser.ConfigParser(allow_no_value=True) PolicyConfig.read(policyfile) for section in PolicyConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in PolicyConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value policys.append(dictTmp.copy()) dictTmp.clear() servers = [] ServerConfig = ConfigParser.ConfigParser(allow_no_value=True) ServerConfig.read(serverfile) for section in ServerConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in ServerConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value servers.append(dictTmp.copy()) dictTmp.clear() return policys, servers def assign_jobs(self): ''' 读取配置文件,获得针对不同数据库的备份策略,设定备份线程。 ''' (policys, servers) = self.get_fileconfig() for dictTmp in policys: if dictTmp['flag'] == '1': for dict in servers: if dict['server'] == dictTmp['server']: serverInfo = dict for key in dictTmp.keys(): if dictTmp[key] == '': dictTmp[key] = None glob_bak_name = 'glob_bak_' + dictTmp['server'] print[serverInfo, dictTmp['bakcon']] self.sched.add_cron_job(self.glob_bak, args=[serverInfo, dictTmp['bakcon']], month=dictTmp['globmonth'], day=dictTmp['globday'], day_of_week=dictTmp['globweekday'], hour=dictTmp['globhour'], minute=dictTmp['globminute'], second='*/3', name=glob_bak_name) incr_bak_name = 'incr_bak_' + dictTmp['server'] self.sched.add_cron_job(self.incr_bak, month=dictTmp['incmonth'], day=dictTmp['incday'], day_of_week=dictTmp['incweekday'], hour=dictTmp['inchour'], minute=dictTmp['incminute'], name=incr_bak_name) print self.sched.get_jobs() print 'assign jobs finished!' def assign_monitor(self): ''' 设定文件监控线程。 ''' self.sched.add_interval_job(self.monitorfile, name='monitorDaemon') print self.sched.get_jobs() print 'assign monitor finished' def filechange(self, monitor, file1, file2, evt_type): ''' 备份策略文件发生变化时,撤销计划列表中除文件监控以外的所有计划,然后重新设定备份线程。 ''' if evt_type == gio.FILE_MONITOR_EVENT_CHANGED: print 'file changed' for job in self.sched.get_jobs(): print job if job.name != 'monitorDaemon': self.sched.unschedule_job(job) self.assign_jobs() def monitorfile(self): ''' 启动文件监控线程,并设定多线程运行环境。 ''' gfile = gio.File(self.filepath) monitor = gfile.monitor_file(gio.FILE_MONITOR_NONE, None) monitor.connect("changed", self.filechange) gobject.threads_init() gml = gobject.MainLoop() gml.run() def glob_bak(self, serConf, bakcontainer): ''' 负责执行一次全局备份,将备份文件上传至云存储。 ''' timestr = time.strftime(r"%Y-%m-%d_%H-%M-%S", time.localtime()) print timestr conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.glob_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'global backup error!' def incr_bak(self, serConf, bakcontainer): ''' 负责执行一次增量备份,将备份文件上传至云存储。 ''' conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.incr_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'increase backup error!'
class Sched(Basemodule): # ################################################################################ # initialization of module and optional load of config files # ################################################################################ def __init__(self, instance_queue, global_queue): # # "sched|port|command or action" # self.logger = logging.getLogger('Hasip.sched') self.sched = Scheduler() self.items = ConfigItemReader() self.jobs_config = ConfigJobReader() self.mod_list = self.items.get_items_dict() # getting module list from item file self.queue_identifier = 'sched' # this is the 'module address' self.instance_queue = instance_queue # worker queue to receive jobs self.global_queue = global_queue # queue to communicate back to main thread self.jobstore = {} self.sched.start() # read jobs configuration self.jobstore = self.jobs_config.get_jobs_dict() for job in self.jobstore.keys(): self.logger.debug(self.jobstore[job][2]) self.sched.add_cron_job(self.send_msg, name=job, year=self.jobstore[job][2], month=self.jobstore[job][3], day=self.jobstore[job][4], week=self.jobstore[job][5], day_of_week=self.jobstore[job][6], hour=self.jobstore[job][7], minute=self.jobstore[job][8], second=self.jobstore[job][9], args=(self.jobstore[job][0], self.jobstore[job][1])) # ################################################################################ # main thread of this module file which runs in background and constantly checks working queue for new tasks. # ################################################################################ def worker(self): while True: instance_queue_element = self.instance_queue.get(True) _senderport = instance_queue_element.get("module_from_port") _sender = instance_queue_element.get("module_from") _port = instance_queue_element.get("module_addr") _action = instance_queue_element.get("cmd") _optargs = instance_queue_element.get("opt_args") options = { "create": self.create, "delete": self.delete, "list_jobs": self.list_jobs } options[_action](_sender, _senderport, _port, _optargs) # ################################################################################ # # "private" methods from here on... # # ################################################################################ #Function to put jobs in the running scheduler job queue and store them persistent def create(self, sender, senderport, port, optargs): if optargs.get('name') in self.jobstore: self.delete(sender, senderport, port, optargs.get('name')) self.sched.add_cron_job(self.send_msg, name=optargs.get('name'), year=optargs.get('year'), month=optargs.get('month'), day=optargs.get('day'), week=optargs.get('week'), day_of_week=optargs.get('day_of_week'), hour=optargs.get('hour'), minute=optargs.get('minute'), second=optargs.get('second'), args=(optargs.get('module'), optargs.get('action'))) self.jobstore.update( { optargs.get('name'): [ optargs.get('module'), optargs.get('action'), optargs.get('year'), optargs.get('month'), optargs.get('day'), optargs.get('week'), optargs.get('day_of_week'), optargs.get('hour'), optargs.get('minute'), optargs.get('second') ] } ) self.write_to_config() def delete(self, sender, senderport, port, optargs): for job in self.sched.get_jobs(): if job.name == optargs: self.sched.unschedule_job(job) del self.jobstore[job.name] self.write_to_config() def list_jobs(self, sender, senderport, port, optargs): dict1 = {} string = [] for job in self.jobstore.keys(): dict1['jobname'] = job dict1['device'] = self.jobstore[job][0] dict1['action'] = self.jobstore[job][1] dict1['year'] = self.jobstore[job][2] dict1['month'] = self.jobstore[job][3] dict1['day'] = self.jobstore[job][4] dict1['week'] = self.jobstore[job][5] dict1['day_of_week'] = self.jobstore[job][6] dict1['hour'] = self.jobstore[job][7] dict1['min'] = self.jobstore[job][8] dict1['sec'] = self.jobstore[job][9] string.append(dict1.copy()) dict1.clear() self.logger.debug("help") queue_msg = { 'module_from_port': str(port), 'module_from': self.queue_identifier, 'module_rcpt': sender, 'module_addr': senderport, 'cmd': 'reply', 'opt_args': string } self.global_queue.put(queue_msg) def send_msg(self, module, action): # ######################################## if module in self.mod_list.keys(): # checking existence of requested module rcpt = self.mod_list[module][0] # setting receiving module from item file mid = self.mod_list[module][1] # setting module id from item file msg = { # creating queue message 'module_from_port': 0, # ######################################## 'module_from': 'sched', 'module_rcpt': rcpt, 'module_addr': mid, 'cmd': action, 'opt_args': '' } self.global_queue.put(msg) def write_to_config(self): with open('/home/hasip/hasip/config/jobs/example.jobs', 'w') as f: conf = ConfigParser() for job in self.jobstore.keys(): try: conf.add_section(job) except Exception: pass conf.set(job.upper(), 'module', self.jobstore[job][0]) conf.set(job.upper(), 'action', self.jobstore[job][1]) conf.set(job.upper(), 'year', self.jobstore[job][2]) conf.set(job.upper(), 'month', self.jobstore[job][3]) conf.set(job.upper(), 'week', self.jobstore[job][4]) conf.set(job.upper(), 'day', self.jobstore[job][5]) conf.set(job.upper(), 'day_of_week', self.jobstore[job][6]) conf.set(job.upper(), 'hour', self.jobstore[job][7]) conf.set(job.upper(), 'minute', self.jobstore[job][8]) conf.set(job.upper(), 'second', self.jobstore[job][9]) conf.write(f)