class SnortScheduler(SimplePlugin): """ Enables Schduling for Snortmanager """ scheduler = None # The APS instance def __init__(self, bus): """ Initiates scheduler. """ SimplePlugin.__init__(self, bus) self.scheduler = Scheduler() def __initiate_jobs(self): """ Adds schedueled tasks if database is empty. """ sched = self.scheduler sched.add_cron_job(update_snort_rules, hour = 7, jobstore='sql') sched.add_cron_job(produce_configuration_files, hour = 9, jobstore='sql') def start(self): """ Intitates scheduler when Snortmanager starts """ sched = self.scheduler sched.add_jobstore(ScheduleStore(), 'sql') if len(sched.get_jobs()) is 0: self.__initiate_jobs() sched.start() def stop(self): """ Stops Scheduler service when thread dies. """ self.scheduler.shutdown(wait=False) def restart(self): """ Restarts the service if necassary. """ self.stop() self.start() def get_jobs(self): return self.scheduler.get_jobs()
class FreshPots(BotPlugin): pots = [ 'http://i.imgur.com/Q2b54vc.jpg', 'http://i.imgur.com/SYsdsew.jpg', 'http://i.imgur.com/caIbQMh.png', 'http://i.imgur.com/MCwiikl.jpg', 'http://i.imgur.com/g4sFHwz.jpg', 'http://i.imgur.com/vnuJQ4S.gif', 'http://i.imgur.com/cm3Y6jX.jpg', 'http://i.imgur.com/ZcKZTFU.jpg', 'http://i.imgur.com/4mEaNIp.jpg', 'http://i.imgur.com/gDukRFu.png', 'http://i.imgur.com/1MDO9fV.png', 'http://i.imgur.com/U5cFX3M.jpg' ] def activate(self): super(FreshPots, self).activate() self.sched = Scheduler(coalesce=True) self.sched.start() self.sched.add_cron_job( self.fresh_pots, kwargs={'message': 'fresh pots time'}, day_of_week='mon-fri', hour=11) self.sched.add_cron_job( self.fresh_pots, kwargs={'message': 'fresh pots time'}, day_of_week='mon-fri', hour=15) logging.info(self.sched.get_jobs()) def callback_message(self, conn, mess): body = mess.getBody().lower() if body.find('coffee') != -1 or body.find('fresh pots') != -1: self.fresh_pots(mess.getFrom()) def fresh_pots(self, channel='#cloudant-bristol', message=None): if message: self.send( channel, message, message_type='groupchat' ) self.send( channel, choice(self.pots), message_type='groupchat' ) self.check() def check(self): for job in self.sched: delta = job.next_run_time - datetime.now() hour_delta = timedelta(seconds=3600) if delta < hour_delta: job.compute_next_run_time(datetime.now() + hour_delta)
class Job_Manager(object): def __init__(self, config): self.scheduler = Scheduler(config["SCHEDULER"]) if self.scheduler is not None: self.scheduler.start() def add_job(self, task, interval, name, *args): args = args if args is not None else None self.scheduler.add_interval_job(task, seconds=interval, args=args, name=name, max_instances=50) def remove_job(self, name): matchedJobs = self.__get_jobs(name) self.__remove_jobs(matchedJobs) def __get_jobs(self, name): return [job for job in self.scheduler.get_jobs() if job.name == name] def __remove_jobs(self, matchedJobs): for job in matchedJobs: self.scheduler.unschedule_job(job)
class ActivityScheduler(baseRunner.BaseRunner): def __init__(self): super(ActivityScheduler, self).__init__() # set logging options as defined in config file logConf = self.config._sections["logging"] # remove default __name__ item del logConf["__name__"] logConf["level"] = int(logConf["level"]) logConf["filename"] = join(dirname(__file__), logConf["filename"]) logging.basicConfig(**logConf) # initialize scheduler self.scheduler = Scheduler() self.scheduler.start() # create initial schedule if not self.scheduler.get_jobs(): self.createSchedule() # main loop while True: try: time.sleep(10) except KeyboardInterrupt: logging.info("Shutting down..") self.scheduler.shutdown() break def createSchedule(self): logging.info("Schedule requests..") schedules = self.config._sections["schedule"] # remove default __name__ item del schedules["__name__"] for methodName, schedule in schedules.items(): # schedule handler requests (wrapper method gets called with # cron-like notation and the method name) # name parameter is given for logging/debugging purposes only self.scheduler.add_cron_job(self.wrap, *schedule.split(), \ args=[methodName], misfire_grace_time=120, name=methodName)
class MyScheduler: EVENTS = { '1': 'EVENT_SCHEDULER_START', '2': 'EVENT_SCHEDULER_SHUTDOWN', '3': 'EVENT_JOBSTORE_ADDED', '4': 'EVENT_JOBSTORE_REMOVED', '5': 'EVENT_JOBSTORE_JOB_ADDED', '32': 'EVENT_JOBSTORE_JOB_REMOVED', '64': 'EVENT_JOB_EXECUTED', '128': 'EVENT_JOB_ERROR', '256': 'EVENT_JOB_MISSED' } def __init__(self, db_path='sqlite:///scheduler.db'): self.scheduler = Scheduler() self.scheduler.add_jobstore(SQLAlchemyJobStore(url=db_path), 'default') def start(self): self.scheduler.start() def add_job(self, job, date, args): job = self.scheduler.add_date_job(job, date, args) print job def jobs(self): return self.scheduler.get_jobs() def remove_job(self, notfication_id): jobs = self.jobs() for job in jobs: if int(job.args[0]) == int(notfication_id): self.scheduler.unschedule_job(job) return True return False def shutdown(self): self.scheduler.shutdown()
def start_schedule(): #if __name__ == '__main__': scheduler_pl = Scheduler(daemonic = False) scheduler_pl.print_jobs() scheduler_pl.shutdown() scheduler_pl.add_jobstore(ShelveJobStore('/tmp/db_pl_schedule'), 'file') v_current_jobs = scheduler_pl.get_jobs() print v_current_jobs if v_current_jobs: # 如果job存在的话,先请客 scheduler_pl.unschedule_func(upload_processlist) scheduler_pl.add_interval_job(upload_processlist, minutes=1) scheduler_pl.start() print 'success!' scheduler_pl.print_jobs() '''
class ProgramHandler: def __init__(self, db, radio_station): self.__db = db self.__radio_station = radio_station self.__scheduler = None self.__scheduled_jobs = None self.__start_listeners() self.__radio_station.logger.info("Done initialising ProgramHandler for {0}".format(radio_station.station.name)) def run(self): self.run_today_schedule() def __prepare_schedule(self): self.__load_programs() self.__scheduler = Scheduler() self.__scheduled_jobs = dict() def run_today_schedule(self): self.__prepare_schedule() self.__scheduler.start() self.__schedule_programs() self.__schedule_next_day_scheduler() print self.__scheduler.get_jobs() def stop(self): self.__stop_program() # any clean up goes here # unschedule stuff def __schedule_next_day_scheduler(self): #TODO: make this safe for differebt timezones! base_date = date.today() + timedelta(1,0) tomorrow_date = datetime.combine(base_date, time()) #add the timezone offset tomorrow_date = tomorrow_date + timedelta(0, timezone(self.__radio_station.station.timezone).utcoffset(datetime.now()).seconds) self.__scheduler.add_date_job(getattr(self, 'run_today_schedule'), tomorrow_date) #schedule the scheduler to reload at midnight def __schedule_programs(self): for scheduled_program in self.__scheduled_programs: if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program {0} for station {1} starting at {2}".format(scheduled_program.program.name, self.__radio_station.station.name, scheduled_program.start)) return def __add_scheduled_job(self, scheduled_program): program = RadioProgram(self.__db, scheduled_program, self.__radio_station) scheduled_job = self.__scheduler.add_date_job(getattr(program, 'start'), self.__get_program_start_time(scheduled_program).replace( tzinfo=None)) self.__scheduled_jobs[scheduled_program.id] = scheduled_job def __delete_scheduled_job(self, index): if index in self.__scheduled_jobs: self.__scheduler.unschedule_job(self.__scheduled_jobs[index]) del self.__scheduled_jobs[index] def __stop_program(self): #self.__running_program.stop() return def __run_program(self): #self.__running_program.run() return def __load_programs(self): self.__scheduled_programs = self.__db.query(ScheduledProgram).filter( ScheduledProgram.station_id == self.__radio_station.id).filter(text("date(start at TIME ZONE 'UTC') = current_date at TIME ZONE 'UTC'")).filter( ScheduledProgram.deleted == False).all() self.__radio_station.logger.info("Loaded programs for {0}".format(self.__radio_station.station.name)) def __load_program(self, id): return self.__db.query(ScheduledProgram).filter(ScheduledProgram.id == id).first() def __start_listeners(self): t = threading.Thread(target=self.__listen_for_scheduling_changes, args=(DefaultConfig.SCHEDULE_EVENTS_SERVER_IP, DefaultConfig.SCHEDULE_EVENTS_SERVER_PORT)) t.start() def __listen_for_scheduling_changes(self, ip, port): sck = socket.socket(socket.AF_INET, socket.SOCK_STREAM) addr = (ip, port) #It may not be possible to connect after restart, TIME_WAIT could come into play etc. Anyway, keep trying connected = False while not connected: try: sck.connect(addr) connected = True except: self.__radio_station.logger.error("Could not connect to server, retrying in 30 ...") sleep(30) sck.send(json.dumps({'station':self.__radio_station.id, 'action':'register'})) while True: data = sck.recv(1024) try: event = json.loads(data) if event["action"] == "delete": self.__delete_scheduled_job(event["id"]) self.__radio_station.logger.info("Scheduled program with id {0} has been deleted".format(event["id"])) elif event["action"] == "add": scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been added at time {1}".format(event["id"], scheduled_program.start)) elif event["action"] == "update": self.__delete_scheduled_job(event["id"]) scheduled_program = self.__load_program(event["id"]) if not self.__is_program_expired(scheduled_program, scheduled_program.program.duration): self.__add_scheduled_job(scheduled_program) self.__radio_station.logger.info( "Scheduled program with id {0} has been moved to start at time {1}".format(event["id"], scheduled_program.start)) except: pass #Most probably a JSON parse error """ Gets the program to run from the current list of programs that are lined up for the day """ def __get_current_program(self): for program in self.__scheduled_programs: if not self.__is_program_expired(program): return program """ Returns whether or not the time for a particular program has passed """ def __is_program_expired(self, scheduled_program): now = pytz.utc.localize(datetime.utcnow()) return (scheduled_program.start + scheduled_program.program.duration) < (now + timedelta(minutes=1)) def __get_program_start_time(self, scheduled_program): now = datetime.now(dateutil.tz.tzlocal()) if scheduled_program.start < now: # Time at which program begins is already past return now + timedelta(seconds=5) # 5 second scheduling allowance else: return scheduled_program.start + timedelta(seconds=5) # 5 second scheduling allowance
class DawnduskAPI: """ dawndusk API """ def __init__(self, lgt, lat, use_cron, myxpl, log): """ Init the dawndusk API @param lgt : longitude of the observer @param lat : latitude of the observer """ self.use_cron = use_cron self.log = log self.myxpl = myxpl if self.use_cron == False: self._scheduler = Scheduler() self._scheduler.start() else: self._cronquery = CronQuery(self.myxpl, self.log) self.mycity = ephem.Observer() self.mycity.lat, self.mycity.lon = lat, lgt self.mycity.horizon = '-6' self.job = None self.job_test_dawn = None self.job_test_dusk = None def __del__(self): """ Kill the dawndusk API @param lgt : longitude of the observer @param lat : latitude of the observer """ if self.use_cron == True: self._cronquery.halt_job("dawndusk") self._cronquery.halt_job("dawn-test") self._cronquery.halt_job("dusk-test") else : self._scheduler.shutdown() def sched_add(self, sdate, cb_function, label): """ Add an event in the schedulered tasks @param sdate : the date of the event @param cb_function : the callback function to call @param : the label of the event """ self.log.debug("dawndusk.schedAdd : Start ... %s" % label) if self.use_cron == False: if label == "dawn" or label == "dusk": self.job = self._scheduler.add_date_job(cb_function, \ sdate, args = [label]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % label) elif label == "dawn-test": self.job_test_dawn = self._scheduler.add_date_job\ (cb_function, sdate, args = ["dawn"]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % "dawn") elif label == "dusk-test": self.job_test_dusk = self._scheduler.add_date_job\ (cb_function, sdate, args = ["dusk"]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % "dusk") for i in self._scheduler.get_jobs(): self.log.debug("APScheduler : %-10s | %8s" % \ (str(i.trigger), i.runs)) else : self.log.debug("dawndusk.schedAdd : Use external cron ...") if label == "dawn" or label == "dusk": device = "dawndusk" elif label == "dawn-test": device = "dawn-test" elif label == "dusk-test": device = "dusk-test" if self._cronquery.status_job(device, extkey = "current") \ != "halted": self._cronquery.halt_job(device) self.log.debug("dawndusk.schedAdd : Halt old device") nstmess = XplMessage() nstmess.set_type("xpl-trig") nstmess.set_schema("dawndusk.basic") nstmess.add_data({"type" : "dawndusk"}) if label == "dawn": nstmess.add_data({"status" : "dawn"}) elif label == "dusk": nstmess.add_data({"status" : "dusk"}) elif label == "dawn-test": nstmess.add_data({"status" : "dawn"}) elif label == "dusk-test": nstmess.add_data({"status" : "dusk"}) if self._cronquery.start_date_job(device, nstmess, sdate): self.log.debug("dawndusk.schedAdd : External cron activated") self.log.debug("dawndusk.schedAdd : Done :)") else: self.log.error("dawndusk.schedAdd : Can't activate \ external cron") self.log.debug("dawndusk.schedAdd : Done :(") return False self.log.info("Add a new event of type %s at %s" % (label, sdate)) return True def get_next_dawn(self): """ Return the date and time of the next dawn @return : the next dawn daytime """ self.mycity.date = datetime.datetime.today() dawn = ephem.localtime(self.mycity.next_rising(ephem.Sun(), \ use_center = True)) return dawn def get_next_dusk(self): """ Return the date and time of the dusk @return : the next dusk daytime """ self.mycity.date = datetime.datetime.today() dusk = ephem.localtime(self.mycity.next_setting(ephem.Sun(), \ use_center = True)) return dusk def get_next_fullmoon_dawn(self): """ Return the date and time of the next dawn and dusk of the next fullmoon @return : the next dawn daytime """ self.mycity.date = self._get_next_fullmoon() dawn = ephem.localtime(self.mycity.next_rising(ephem.Moon(), \ use_center = True)) dusk = ephem.localtime(self.mycity.next_setting(ephem.Moon(), \ use_center = True)) if dawn > dusk: dawn = ephem.localtime(self.mycity.previous_rising(ephem.Moon(), \ use_center = True)) return dawn def get_next_fullmoon_dusk(self): """ Return the date and time of the dusk of the next fullmoon @return : the next dusk daytime """ self.mycity.date = self._get_next_fullmoon() dusk = ephem.localtime(self.mycity.next_setting(ephem.Moon(), \ use_center = True)) return dusk def get_next_fullmoon(self): """ Return the date and time of the next fullmoon @return : the next full moon daytime """ dusk = ephem.localtime(self._get_next_fullmoon()) return dusk def _get_next_fullmoon(self): """ Return the date and time of the next full moon @return : the next full moon daytime """ now = datetime.datetime.today() nextfullmoon = ephem.next_full_moon(now) return nextfullmoon
class SchedulerDaemon(Daemon): def __init__(self, pid, config): super( SchedulerDaemon, self ).__init__(pid) self.config = config # set DaemonArgs for CommandDispatcher daemonArgs = DaemonArgs(config) # setup logger self.logger = None if os.path.exists(daemonArgs.log_file): logging.config.fileConfig(daemonArgs.log_file) self.logger = logging.getLogger('framework') # sftp settings self.sftpHost = self.config.get("sftp", "host") self.sftpPort = int(self.config.get("sftp", "port")) self.sftpRemotePath = self.config.get("sftp", "remote_path") self.sftpUsername = self.config.get("sftp", "username") self.sftpPassword = self.config.get("sftp", "password") or None self.sftpPrivateKey = self.config.get("sftp", "pkey") or None self.sftpPrivateKeyPassword = self.config.get("sftp", "pkey_password") or None self.sftpPrivateKeyType = self.config.get("sftp", "pkey_type") or None if self.sftpPrivateKeyType.lower() != 'rsa' \ and self.sftpPrivateKeyType.lower() != 'dss': self.sftpPrivateKeyType = None self.jobSubmitInterval = int(self.config.get("scheduler", "jobsubmit_interval")) or 10 self.jobCleanupInterval = int(self.config.get("scheduler", "jobcleanup_interval")) or 30 self.scheduler = Scheduler(daemonic=True) self.cronScheduleSequence = ('minute', 'hour', 'day', 'month', 'day_of_week') @transaction.commit_on_success def saveJob(self, status, frameworkJobId, scheduledJob): now = datetime.now() newJob = None #create new job if frameworkJobId is not None: newJob, created = Job.objects.get_or_create( frameworkid=frameworkJobId ) newJob.name = scheduledJob.job_name newJob.started = now newJob.workflow = scheduledJob.workflow newJob.is_public = scheduledJob.is_public newJob.owner = scheduledJob.created_by newJob.schedule = scheduledJob newJob.status = status else: newJob = Job( name=scheduledJob.job_name, started = now, workflow = scheduledJob.workflow, is_public = scheduledJob.is_public, owner = scheduledJob.created_by, schedule = scheduledJob, status = status ) newJob.save() @transaction.commit_on_success def submitJobToFramework(self, **kwargs): jobCommand = 'job' daemonArgs = DaemonArgs(self.config) daemonArgs.command = jobCommand unScheduledJob = kwargs['unScheduledJob'] is_fileFeeder = False fileFeederUploadedFile = None del daemonArgs.param[:] # go through all parameters for parameter in unScheduledJob.parameters.all(): # add parameter to daemonArgs.param if parameter.service and parameter.param_key and parameter.param_value: # check if a file feeder is used if parameter.service == settings.FILE_FEEDER_ID: is_fileFeeder = True fileFeederUploadedFile = parameter.param_value remoteFeederFile = os.path.join(self.sftpRemotePath, parameter.param_value) parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, remoteFeederFile ) else: parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, parameter.param_value ) self.logger.debug("add parameter string: %s" % parameterString) daemonArgs.param.append([parameterString]) # in case of a filefeeder upload file to framework server if is_fileFeeder: self.logger.debug("is file feeder") sftp = None transport = None try: transport = Transport((self.sftpHost, self.sftpPort)) if self.sftpPassword: transport.connect(username=self.sftpUsername, password=self.sftpPassword) else: privateKey = None if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'rsa': privateKey = RSAKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword ) if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'dss': privateKey = DSSKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword ) transport.connect(username=self.sftpUsername, pkey=privateKey) sftp = SFTPClient.from_transport(transport) filePath = os.path.join( settings.MEDIA_ROOT, fileFeederUploadedFile ) remotePath = os.path.join( self.sftpRemotePath, fileFeederUploadedFile ) self.logger.debug("uploading file from %s to %s on remote machine" % (filePath, remotePath)) sftp.put(filePath, remotePath) # sftp.put(filePath, remotePath, confirm=False) sftp.chmod( remotePath, 0644 ) self.logger.debug("put OK") except IOError as e: self.logger.error("IOError: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except PasswordRequiredException as e: self.logger.error("PasswordRequiredException: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except SSHException as e: self.logger.error("SSH Exception: %s. Will continue with next scheduled job." % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) except Exception as e: self.logger.error("Unkown SFTP problem. Will continue with next scheduled job. %s" % e) self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) finally: if sftp is not None: sftp.close() if transport is not None: transport.close() # set job workflow daemonArgs.jd_workflow = unScheduledJob.workflow.name frameworkJobId = None try: setattr(daemonArgs, jobCommand, 'submit') frameworkJobId = self.sendFrameworkCommand(jobCommand, daemonArgs) self.saveJob(Job.PROCESSING_STATUS, frameworkJobId, unScheduledJob) except WorkflowNotDeployedException: # The workflow is not deployed in the framework. To prevent the scheduler retrying continuously # we disable this job unScheduledJob.status = Schedule.DEACTIVATE_STATUS unScheduledJob.save() except: self.saveJob(Job.FAILED_STATUS, None, unScheduledJob) finally: daemonArgs.clean(jobCommand) if unScheduledJob.scheduled_start is not None: unScheduledJob.status = Schedule.DEACTIVATED_STATUS unScheduledJob.save() def updateProcessingJobs(self): jobCommand = 'job' processingJobs = Job.objects.filter(status=Job.PROCESSING_STATUS) daemonArgs = DaemonArgs(self.config) if len(list(processingJobs)) != 0: jobs_dict = {} try: setattr(daemonArgs, jobCommand, 'list') jobs_dict = self.sendFrameworkCommand(jobCommand, daemonArgs) except: return finally: daemonArgs.clean(jobCommand) for processingJob in processingJobs: if processingJob.frameworkid in jobs_dict \ and int(processingJob.status) != int(jobs_dict[processingJob.frameworkid]): try: setattr(daemonArgs, jobCommand, 'details') setattr(daemonArgs, 'gjd_id', processingJob.frameworkid) job_details = self.sendFrameworkCommand(jobCommand, daemonArgs) except: continue finally: daemonArgs.clean(jobCommand) daemonArgs.clean('gjd_id') processingJob.status = jobs_dict[processingJob.frameworkid] processingJob.finished = job_details['job_end_time'] processingJob.save() elif processingJob.frameworkid not in jobs_dict: processingJob.status = Job.COMPLETED_STATUS processingJob.finished = None processingJob.save() def checkJobs(self): scheduledJobs = self.scheduler.get_jobs() # remove scheduled jobs which are set to be deleted or deactivated deleteAndDeactivateJobs = Schedule.objects.filter( Q(status=Schedule.DELETE_STATUS) | Q(status=Schedule.DEACTIVATE_STATUS) ) for deleteAndDeactivateJob in deleteAndDeactivateJobs: for scheduledJob in scheduledJobs: if scheduledJob.name == deleteAndDeactivateJob.job_name: self.scheduler.unschedule_job(scheduledJob) deleteAndDeactivateJob.status = Schedule.DEACTIVATED_STATUS\ if deleteAndDeactivateJob.status == Schedule.DEACTIVATE_STATUS\ else Schedule.DELETED_STATUS deleteAndDeactivateJob.save() # add/update unscheduled jobs split_re = re.compile("\s+") unScheduledJobs = Schedule.objects.filter( Q(status=Schedule.NEW_STATUS) | Q(status=Schedule.UPDATE_STATUS) ) for unScheduledJob in unScheduledJobs: if unScheduledJob.status == Schedule.UPDATE_STATUS: for scheduledJob in scheduledJobs: if scheduledJob.name == unScheduledJob.job_name: self.scheduler.unschedule_job(scheduledJob) if unScheduledJob.scheduled_start is not None: schedule = { 'kwargs': { 'unScheduledJob': unScheduledJob }, 'name': unScheduledJob.job_name } try: newJob = self.scheduler.add_date_job(self.submitJobToFramework, unScheduledJob.scheduled_start, **schedule) self.logger.debug( 'Job will run on %s' % newJob.next_run_time ) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: unScheduledJob.status = Schedule.ACTIVE_STATUS unScheduledJob.save() else: cronList = split_re.split(unScheduledJob.cron_expression) schedule = dict(itertools.izip(self.cronScheduleSequence, cronList)) schedule['kwargs'] = { 'unScheduledJob': unScheduledJob } schedule['name'] = unScheduledJob.job_name try: newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule) self.logger.debug( 'First run of job will be on %s' % newJob.next_run_time ) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: unScheduledJob.status = Schedule.ACTIVE_STATUS unScheduledJob.save() def cleanup(self): try: self.updateProcessingJobs() except Exception as e: self.logger.error("Unknown error while updating processing jobs: %s" % str(e)) raise Exception def onNotification(self, eventType, body): if eventType == 'JobFinished': # sleep is added, because a failing job can be quicker than # Django save the frameworkid of that job time.sleep(1) event = JobFinished() event.ParseFromString(body) self.logger.debug('Job with ID %s is finished with status %s', str(event.job), str(event.status)) Job.objects.update() finishedJob = Job.objects.get(frameworkid=event.job) finishedJob.status = event.status finishedJob.finished = datetime.now() finishedJob.save() return True def run(self): self.logger.info('Started scheduler') # add active schedules to scheduler split_re = re.compile("\s+") scheduledJobs = Schedule.objects.filter( status=Schedule.ACTIVE_STATUS ) for scheduledJob in scheduledJobs: if scheduledJob.scheduled_start is not None: schedule = { 'kwargs': { 'unScheduledJob': scheduledJob }, 'name': scheduledJob.job_name } try: newJob = self.scheduler.add_date_job(self.submitJobToFramework, scheduledJob.scheduled_start, **schedule) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception else: cronList = split_re.split(scheduledJob.cron_expression) schedule = dict(itertools.izip(self.cronScheduleSequence, cronList)) schedule['kwargs'] = { 'unScheduledJob': scheduledJob } schedule['name'] = scheduledJob.job_name try: newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule) except Exception as e: self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e)) raise Exception # add job scheduling mechanism and cleanup to scheduler and start scheduler try: self.scheduler.add_interval_job(self.checkJobs, seconds=self.jobSubmitInterval) self.scheduler.add_interval_job(self.cleanup, minutes=self.jobCleanupInterval) self.scheduler.start() except Exception as e: self.logger.error("Unknown error while initializing scheduler: %s" % str(e)) raise Exception # initialize bus instance for receiving job notifications try: notificationBus = Bus.createConfigurableBus(self.logger, self.config, 'notifications') notificationBus.openFwChannel() notificationBus.attachToMonitoring(self.onNotification) notificationBus.close() except BusException, e: self.logger.error("Cannot connect to HSN2 Bus because '%s'" % e) raise Exception except BusTimeoutException, e: self.logger.error("Response timeout") raise Exception
def main(): configure_logging() logger = logging.getLogger('taskhost') config = {} try: with open(_tasks_config_file) as f: config = json.loads(f.read()) logger.debug('Successfully read configuration file.') except Exception as e: logger.critical('Cannot read configuration file: {0}' .format(_tasks_config_file)) logger.critical(e) sys.exit(1) from simplegauges.datastores.azuretable import AzureGaugeDatastore gauges_ds = AzureGaugeDatastore(config['azure.account'], config['azure.key'], config['azure.table']) gauge_factory = simplegauges.gauge_factory(gauges_ds) tasks.set_simplegauges_factory(gauge_factory) tasks.set_config(config) import fixture # should be imported after setting configs for decorators if not fixture.tasks: logger.error('No tasks found in the fixture.py') sys.exit(1) for task in fixture.tasks: method = task[0] name = '{0}.{1}'.format(method.__module__, method.__name__) # if not task[1]: # logger.critical('Task {0} scheduling interval is invalid' # .format(name)) try: task[0]() logger.info('Successfully bootstrapped: {0}'.format(name)) except Exception as e: logger.error('Error while bootstrapping: {0}'.format(name)) logger.error(e) raise e logger.info('All tasks ran successfully once. Starting scheduler...') # at this point all tasks ran once successfully sched = Scheduler() # schedule tasks for task in fixture.tasks: cron_kwargs = parse_cron_tuple(task[1]) sched.add_cron_job(task[0], **cron_kwargs) sched.start() logger.info('Scheduler started with {0} jobs.' .format(len(sched.get_jobs()))) now = datetime.datetime.now() for j in sched.get_jobs(): logger.debug('Scheduled: {0}.{1}, next run:{2}' .format(j.func.__module__, j.func.__name__, j.compute_next_run_time(now))) # deamonize the process while True: time.sleep(10)
class DawnduskAPI: """ dawndusk API """ def __init__(self, lgt, lat, use_cron, myxpl, log): """ Init the dawndusk API @param lgt : longitude of the observer @param lat : latitude of the observer """ self.use_cron = use_cron self.log = log self.myxpl = myxpl if self.use_cron == False: self._scheduler = Scheduler() self._scheduler.start() else: self._cronquery = CronQuery(self.myxpl, self.log) self.mycity = ephem.Observer() self.mycity.lat, self.mycity.lon = lat, lgt self.mycity.horizon = '-6' self.job = None self.job_test_dawn = None self.job_test_dusk = None def __del__(self): """ Kill the dawndusk API @param lgt : longitude of the observer @param lat : latitude of the observer """ if self.use_cron == True: self._cronquery.halt_job("dawndusk") self._cronquery.halt_job("dawn-test") self._cronquery.halt_job("dusk-test") else: self._scheduler.shutdown() def sched_add(self, sdate, cb_function, label): """ Add an event in the schedulered tasks @param sdate : the date of the event @param cb_function : the callback function to call @param : the label of the event """ self.log.debug("dawndusk.schedAdd : Start ... %s" % label) if self.use_cron == False: if label == "dawn" or label == "dusk": self.job = self._scheduler.add_date_job(cb_function, \ sdate, args = [label]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % label) elif label == "dawn-test": self.job_test_dawn = self._scheduler.add_date_job\ (cb_function, sdate, args = ["dawn"]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % "dawn") elif label == "dusk-test": self.job_test_dusk = self._scheduler.add_date_job\ (cb_function, sdate, args = ["dusk"]) self.log.debug("dawndusk.schedAdd : Use internal cron \ for %s" % "dusk") for i in self._scheduler.get_jobs(): self.log.debug("APScheduler : %-10s | %8s" % \ (str(i.trigger), i.runs)) else: self.log.debug("dawndusk.schedAdd : Use external cron ...") if label == "dawn" or label == "dusk": device = "dawndusk" elif label == "dawn-test": device = "dawn-test" elif label == "dusk-test": device = "dusk-test" if self._cronquery.status_job(device, extkey = "current") \ != "halted": self._cronquery.halt_job(device) self.log.debug("dawndusk.schedAdd : Halt old device") nstmess = XplMessage() nstmess.set_type("xpl-trig") nstmess.set_schema("dawndusk.basic") nstmess.add_data({"type": "dawndusk"}) if label == "dawn": nstmess.add_data({"status": "dawn"}) elif label == "dusk": nstmess.add_data({"status": "dusk"}) elif label == "dawn-test": nstmess.add_data({"status": "dawn"}) elif label == "dusk-test": nstmess.add_data({"status": "dusk"}) if self._cronquery.start_date_job(device, nstmess, sdate): self.log.debug("dawndusk.schedAdd : External cron activated") self.log.debug("dawndusk.schedAdd : Done :)") else: self.log.error("dawndusk.schedAdd : Can't activate \ external cron") self.log.debug("dawndusk.schedAdd : Done :(") return False self.log.info("Add a new event of type %s at %s" % (label, sdate)) return True def get_next_dawn(self): """ Return the date and time of the next dawn @return : the next dawn daytime """ self.mycity.date = datetime.datetime.today() dawn = ephem.localtime(self.mycity.next_rising(ephem.Sun(), \ use_center = True)) return dawn def get_next_dusk(self): """ Return the date and time of the dusk @return : the next dusk daytime """ self.mycity.date = datetime.datetime.today() dusk = ephem.localtime(self.mycity.next_setting(ephem.Sun(), \ use_center = True)) return dusk def get_next_fullmoon_dawn(self): """ Return the date and time of the next dawn and dusk of the next fullmoon @return : the next dawn daytime """ self.mycity.date = self._get_next_fullmoon() dawn = ephem.localtime(self.mycity.next_rising(ephem.Moon(), \ use_center = True)) dusk = ephem.localtime(self.mycity.next_setting(ephem.Moon(), \ use_center = True)) if dawn > dusk: dawn = ephem.localtime(self.mycity.previous_rising(ephem.Moon(), \ use_center = True)) return dawn def get_next_fullmoon_dusk(self): """ Return the date and time of the dusk of the next fullmoon @return : the next dusk daytime """ self.mycity.date = self._get_next_fullmoon() dusk = ephem.localtime(self.mycity.next_setting(ephem.Moon(), \ use_center = True)) return dusk def get_next_fullmoon(self): """ Return the date and time of the next fullmoon @return : the next full moon daytime """ dusk = ephem.localtime(self._get_next_fullmoon()) return dusk def _get_next_fullmoon(self): """ Return the date and time of the next full moon @return : the next full moon daytime """ now = datetime.datetime.today() nextfullmoon = ephem.next_full_moon(now) return nextfullmoon
class DataLogger: """ In this class the different systems are initialiased: - logger and its handlers - local configuration - connection with internet If there is no connection with the internet: - A timed job is created that controls internet connection. - Logging of data is started with local configuration. - If the internet connection is started, and the online\ configuration differs, the old wrongly logged data will be removed. If there is connection with the internet and the server is working: - Check if the online configuration differs from the local one. If so,\ the configuration will be updated. - Logging of data is started. - Sending of data is started. - A timed job is created that checks if the online configuration is\ updated - The management of leds is started. """ def __init__(self): try: # initiate logger self.logger = logging.getLogger() self.logger.setLevel(logging.DEBUG) self.log_send_store_handler = LogSendStoreHandler(LOG_LOCATION) formatter = logging.Formatter( '%(asctime)s - %(levelname)s - %(name)s - %(message)s') self.log_send_store_handler.setFormatter(formatter) self.logger.addHandler(self.log_send_store_handler) self.logger.info('Initialising system...') job_info_filter = JobInfoFilter() logging.getLogger('apscheduler.scheduler').addFilter( job_info_filter) logging.getLogger('apscheduler.threadpool').addFilter( job_info_filter) # load local configuration self.conf_man = ConfigurationManager(CONFIG_LOCATION) self.log_send_store_handler.update_configuration() self.scheduler = Scheduler() self.scheduler.start() self.packet_manager = PacketManager(self.scheduler) # initiate network connection self.connection = ConnectionManager() # add scheduler and connection to log handler self.log_send_store_handler.update_configuration( scheduler=self.scheduler, connection=self.connection) # try to connect connected_to_internet = self.connection.check_internet_connection() connected_to_server = self.connection.check_server_connection() if connected_to_internet and connected_to_server: self.load_online_configuration_and_initiate_sending_data() self.packet_manager.update_time() self.packet_manager.initiate_send_packets(self.connection) else: ''' if there is no connection: keep checking for a connection temporarily use offline timer and modbus slave configuration ''' if connected_to_internet: self.packet_manager.update_time() self.wait_for_connection_to_load_configuration() # initiate sensor timers self.read_sensor_scheduler = ReadSensorScheduler( self.scheduler, self.packet_manager) self.led_manager = LedManager(self.scheduler) self.led_manager.update_led(PinName.powered, LedState.on) self.set_up_led_manager_calls() # sleep 2 seconds to intialise led of log handler sleep(1) self.logger.info('Initialisation complete') while True: sleep(10) self.logger.debug('Alive and kicking') if self.logger.level is logging.DEBUG: scheduler_jobs = self.scheduler.get_jobs() if len(scheduler_jobs) > 1: self.logger.debug('Current scheduler jobs:') for index, job in enumerate(scheduler_jobs): self.logger.debug(' Job {0}: {1} {2}'.format( index, job.name, job.next_run_time)) else: self.logger.debug('No running scheduler jobs') except Exception as e: self.logger.error(e) raise self.log_send_store_handler.send_logs_job() def load_online_configuration_and_initiate_sending_data(self): # check online configuration try: online_checksum = self.connection.get_configuration_checksum() self.logger.info("Checking online configuration..") if self.conf_man.is_online_configuration_different(online_checksum): self.logger.info( 'Online configuration is new, updating configuration..') online_configuration = self.connection.get_configuration() self.conf_man.validate_json_configuration(online_configuration) self.conf_man.save_configuration_local( online_checksum, online_configuration) self.packet_manager.remove_all_packets_from_memory() # update systems that make use of the configuration self.log_send_store_handler.update_configuration( scheduler=self.scheduler, connection=self.connection) self.connection.update_configuration() try: self.read_sensor_scheduler.update_configuration() except: pass self.packet_manager.update_configuration() except: self.logger.warning('Problem updating configuration') raise try: # try to remove job self.scheduler.unschedule_func( self.load_online_configuration_and_initiate_sending_data) except: pass # periodically check changes in configuration self.scheduler.add_interval_job( self.load_online_configuration_and_initiate_sending_data, seconds=configuration.get_time_interval_to_check_online_config()) self.packet_manager.initiate_send_packets(self.connection) def wait_for_connection_to_load_configuration(self): if not self.connection.is_connected(): # no internet connection, start job to check connection self.scheduler.add_interval_job(self.try_to_connect_to_internet, seconds=CHECK_CONNECTION_INTERVAL) else: self.packet_manager.update_time() if not self.connection.check_server_connection(): # no connection with server, start job to check connection self.scheduler.add_interval_job( self.try_to_load_online_configuration, seconds=CHECK_CONNECTION_INTERVAL) def try_to_connect_to_internet(self): if self.connection.check_internet_connection(): self.scheduler.unschedule_func(self.try_to_connect_to_internet) self.packet_manager.update_time() if not self.connection.check_server_connection(): # no connection with server, start job to check connection self.scheduler.add_interval_job( self.try_to_load_online_configuration, seconds=CHECK_CONNECTION_INTERVAL) else: self.load_online_configuration_and_initiate_sending_data() def try_to_load_online_configuration(self): if self.connection.check_server_connection(): self.load_online_configuration_and_initiate_sending_data() self.scheduler.unschedule_func( self.try_to_load_online_configuration) def set_up_led_manager_calls(self): sensor_led_call = LedCall(self.led_manager, PinName.readingsensor) connected_led_call = LedCall(self.led_manager, PinName.connected) logging_led_call = LedCall(self.led_manager, PinName.logging) self.read_sensor_scheduler.set_led_call(sensor_led_call) self.connection.set_led_call(connected_led_call) self.log_send_store_handler.set_led_call(logging_led_call)
class EventScheduler(): """Class to scheduler regular events in a similar manner to cron.""" __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \ % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy'))) '''This determines the number of seconds after the designated run time that the job is still allowed to be run. If jobs are not being run, try increasing this in increments of 1.''' __GRACE_PERIOD = 31536000 # Amazing grace! Time in seconds before the job is considered misfired. Currently a year __COALESCE = True # Force the job to only run once instead of retrying multiple times '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads in the pool that the filter would get clogged up!''' __threadpool_corethreads = 0 # Maximum number of persistent threads in the pool __threadpool_maxthreads = 20 # Maximum number of total threads in the pool __threadpool_keepalive = 1 # Seconds to keep non-core worker threads in the pool def __init__(self, start=True): try: config = {'apscheduler.daemon': True, 'apscheduler.standalone': False, 'apscheduler.threadpool.core_threads': self.__threadpool_corethreads, 'apscheduler.threadpool.max_threads': self.__threadpool_maxthreads, 'apscheduler.threadpool.keepalive': self.__threadpool_keepalive, 'apscheduler.coalesce': self.__COALESCE} self.__sched = Scheduler(config) '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the shelve job store working.''' self.__sched.add_jobstore(SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'), 'default') atexit.register(lambda: self.__sched.shutdown(wait=False)) # Stop the scheduler when the program exits if start: self.__sched.start() except KeyError: logging.warning('An error occurred starting the scheduler.') def start_scheduler(self): self.__sched.start() def add_cron_event(self, func, name, year=None, month=None, week=None, day=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, *args, **kwargs): """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed. The following expressions can be used in each field: Expression Field Description * any Fire on every value */a any Fire on every 'a' values, starting from the minimum a-b any Fire on any value in the 'a-b' range (a must be smaller than b a-b/c any Fire every 'c' values within the 'a-b' range xth y day Fire on the x -th occurrence of weekday y within the month last x day Fire on the last occurrence of weekday 'x' within the month last day Fire on the last day within the month x,y,z any Fire on any matching expression; can combine any number of any of the above expressions If you want to add **options to the event, use kwargs (keyword arguments dictionary)""" if self.__sched is not None: event_exists = False if self.__find_event(name) is not None: event_exists = True if not event_exists: self.__sched.add_cron_job(func=func, name=name, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, misfire_grace_time=self.__GRACE_PERIOD) logging.info('New cron event added') else: '''Every event needs a unique name so we can keep track of the little bastards. And please use descriptive names so that they can be properly identified in the job schedule.''' logging.warning('add_cron_event: Event already exists') raise EventExistsError('A job with name %s already exists' % name) else: raise SchedulerNotFoundError('add_cron_event: Scheduler does not exist. It may have not started.') def __find_event(self, event_name): if self.__sched is not None: events = self.__sched.get_jobs() for event in events: if event.name == event_name: return event else: return None else: logging.warning('__find_event: Scheduler does not exist. It may have not started.') raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.') def add_onceoff_event(self, func, name, date, args=None): """Add a once off event to the schedule. The job is executed once at the specified date and time. Date/time format: YYYY-MM-DD HH:MM:SS""" if self.__sched is not None: try: if args is None: # If there are no arguments to be passed to the function self.__sched.add_date_job(func=func, name=name, date=date, misfire_grace_time=self.__GRACE_PERIOD) else: # If there are arguments to be passed to the function self.__sched.add_date_job(func=func, name=name, date=date, arge=args, misfire_grace_time=self.__GRACE_PERIOD) except ValueError: '''If the event is in the past, it will not run. This program is not capable of manipulating space and time. Try import __time_travel__''' raise EventWontRunError('The event will not run: Event time has expired.') logging.info('New once off event added') else: logging.warning('add_onceoff_event: Scheduler does not exist. It may have not started.') raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.') def remove_event(self, event_name): """Remove the event 'event_name' from the schedule.""" if self.__sched is not None: removed = False event = self.__find_event(event_name=event_name) if event is not None: # If the event exists, remove it self.__sched.unschedule_job(event) removed = True if not removed: '''Raise an error so that it can be handled correctly''' logging.warning('remove_event: Event not found for removal.') raise EventNotFoundError('Event not found for removal: %s' % event_name) else: raise SchedulerNotFoundError('remove_event: Scheduler does not exist. It may have not started.') def get_jobs(self): """Get the list of events currently in the job store.""" if self.__sched is not None: return self.__sched.get_jobs() else: raise SchedulerNotFoundError('get_events: Scheduler does not exist. It may have not started.') def get_job_names(self): """ Get the names of all the jobs in the job store :return: list """ jobs = self.get_jobs() job_list = [] if jobs: for job in jobs: job_list.append(job.name) return job_list def get_scheduler(self): """Returns the Scheduler object. Rather add functionality to this class than call this method.""" if self.__sched is not None: return self.__sched else: raise SchedulerNotFoundError('get_scheduler: Scheduler does not exist. It may have not started.')
class bakCron(object): def __init__(self): self.sched = Scheduler() self.sched.daemonic = False self.sched.start() self.assign_jobs() self.assign_monitor() def get_fileconfig(self): ''' 获取配置文件的路径,此路径在软件安装时指定目录。 ''' policyfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Policy.conf" serverfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Server.conf" policys = [] PolicyConfig = ConfigParser.ConfigParser(allow_no_value=True) PolicyConfig.read(policyfile) for section in PolicyConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in PolicyConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value policys.append(dictTmp.copy()) dictTmp.clear() servers = [] ServerConfig = ConfigParser.ConfigParser(allow_no_value=True) ServerConfig.read(serverfile) for section in ServerConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in ServerConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value servers.append(dictTmp.copy()) dictTmp.clear() return policys, servers def assign_jobs(self): ''' 读取配置文件,获得针对不同数据库的备份策略,设定备份线程。 ''' (policys, servers) = self.get_fileconfig() for dictTmp in policys: if dictTmp['flag'] == '1': for dict in servers: if dict['server'] == dictTmp['server']: serverInfo = dict for key in dictTmp.keys(): if dictTmp[key] == '': dictTmp[key] = None glob_bak_name = 'glob_bak_' + dictTmp['server'] print[serverInfo, dictTmp['bakcon']] self.sched.add_cron_job(self.glob_bak, args=[serverInfo, dictTmp['bakcon']], month=dictTmp['globmonth'], day=dictTmp['globday'], day_of_week=dictTmp['globweekday'], hour=dictTmp['globhour'], minute=dictTmp['globminute'], second='*/3', name=glob_bak_name) incr_bak_name = 'incr_bak_' + dictTmp['server'] self.sched.add_cron_job(self.incr_bak, month=dictTmp['incmonth'], day=dictTmp['incday'], day_of_week=dictTmp['incweekday'], hour=dictTmp['inchour'], minute=dictTmp['incminute'], name=incr_bak_name) print self.sched.get_jobs() print 'assign jobs finished!' def assign_monitor(self): ''' 设定文件监控线程。 ''' self.sched.add_interval_job(self.monitorfile, name='monitorDaemon') print self.sched.get_jobs() print 'assign monitor finished' def filechange(self, monitor, file1, file2, evt_type): ''' 备份策略文件发生变化时,撤销计划列表中除文件监控以外的所有计划,然后重新设定备份线程。 ''' if evt_type == gio.FILE_MONITOR_EVENT_CHANGED: print 'file changed' for job in self.sched.get_jobs(): print job if job.name != 'monitorDaemon': self.sched.unschedule_job(job) self.assign_jobs() def monitorfile(self): ''' 启动文件监控线程,并设定多线程运行环境。 ''' gfile = gio.File(self.filepath) monitor = gfile.monitor_file(gio.FILE_MONITOR_NONE, None) monitor.connect("changed", self.filechange) gobject.threads_init() gml = gobject.MainLoop() gml.run() def glob_bak(self, serConf, bakcontainer): ''' 负责执行一次全局备份,将备份文件上传至云存储。 ''' timestr = time.strftime(r"%Y-%m-%d_%H-%M-%S", time.localtime()) print timestr conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.glob_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'global backup error!' def incr_bak(self, serConf, bakcontainer): ''' 负责执行一次增量备份,将备份文件上传至云存储。 ''' conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.incr_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'increase backup error!'
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_AMS = 'AMS' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, cachedir, stacks_dir, common_services_dir, extensions_dir, host_scripts_dir, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.extensions_dir = extensions_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration # a mapping between a cluster name and a unique hash for all definitions self._cluster_hashes = {} # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int(config.get('agent', 'alert_grace_period', 5)) if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical( "[AlertScheduler] Could not create the cache directory {0}" .format(cachedir)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def _job_context_injector(self, config): """ apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running in embedded mode Please note, this function called in job context thus all injects should be time-running optimized :type config AmbariConfig.AmbariConfig """ if not config.use_system_proxy_setting(): from ambari_commons.network import reconfigure_urllib2_opener reconfigure_urllib2_opener(ignore_system_proxy=True) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning( "There are no alert definition commands in the heartbeat; unable to update definitions" ) return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # determine how to reschedule the jobs reschedule_all = False if "clusterName" in command_copy and command_copy[ "clusterName"] not in self._cluster_hashes: reschedule_all = True if reschedule_all is True: # reschedule all jobs, creating new instances self.reschedule_all() else: # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info( "[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid is False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled is False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled" .format(str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ logger.info("[AlertScheduler] Rescheduling all jobs...") jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled" .format(str(jobs_removed), str(jobs_scheduled))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. This wil also populate the cluster-to-hash dictionary. :return: """ definitions = [] alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning( '[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.' .format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] clusterHash = None if not 'hash' in command_json else command_json[ 'hash'] # cache the cluster and cluster hash after loading the JSON if clusterName != '' and clusterHash is not None: logger.info( '[AlertScheduler] Caching cluster {0} with alert hash {1}'. format(clusterName, clusterHash)) self._cluster_hashes[clusterName] = clusterHash for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug( "[AlertScheduler] Creating job type {0} with {1}".format( source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_AMS: alert = AmsAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['extensions_directory'] = self.extensions_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.config, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, hostName) except Exception, exception: logger.exception( "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped." ) return alert
def start_schedule(): #if __name__ == '__main__': os_user = config.OS_USER os_password = config.OS_APPS_PASSWD scheduler = Scheduler(daemonic = False) scheduler.print_jobs() #scheduler.remove_jobstore('file',close=True) #scheduler.shutdown(wait=False) scheduler.shutdown() #scheduler.unschedule_func(backup) scheduler.add_jobstore(ShelveJobStore('/tmp/db_schedule'), 'file') v_current_jobs = scheduler.get_jobs() print v_current_jobs if v_current_jobs: # 如果job存在的话,先请客 scheduler.unschedule_func(backup) #scheduler = Scheduler(standalone=True) #scheduler = Scheduler(daemon=True) #连接配置中心库,获取数据库备份周期等信息 db = Connection('/tmp/mysql3306.sock', config.DB_NAME, config.DB_USER, config.DB_PASSWD, time_zone='+8:00') v_sql = r"""SELECT a.instance_id,b.ip,b.port,a.backup_interval_type,a.backup_start_time from mysql_ins_bak_setup a,tag b where a.instance_id=b.id """ print v_sql bak_server_list = db.query(v_sql) if bak_server_list: # 有server需要配置 i=0 # 把还没有开始的调度任务,置为手工结束 backup_result_type=4 v_manual_end_sql = 'update mysql_ins_bak_log set backup_result_type=4 where backup_result_type=0' db.execute(v_manual_end_sql) for bak_server in bak_server_list: instance_id = bak_server['instance_id'] from_host = bak_server['ip'] #print from_host mysql_port = bak_server['port'] backup_interval_type = bak_server['backup_interval_type'] backup_start_time = bak_server['backup_start_time'] str_start_date= time.strftime("%Y-%m-%d") + ' ' + backup_start_time print str_start_date if backup_interval_type == 1: # every day #内存jobstore #scheduler.add_interval_job(backup, days=1, start_date=str_start_date, args=[from_host, mysql_port, os_user, os_password]) #文件jobstore jobstore='file' scheduler.add_interval_job(backup, days=1, start_date=str_start_date, args=[from_host, mysql_port, os_user, os_password], jobstore='file') #scheduler.add_interval_job(backup, days=1, start_date='2014-07-18 18:17:01', args=[from_host, mysql_port, os_user, os_password]) elif backup_interval_type == 2: # every week scheduler.add_interval_job(backup, weeks=1, start_date=str_start_date, args=[from_host, mysql_port, os_user, os_password]) elif backup_interval_type == 3: # every hour scheduler.add_interval_job(backup, hours=1, start_date=str_start_date, args=[from_host, mysql_port, os_user, os_password]) # 开始在数据库记录备份的调度任务状态 0:调度任务已启动,实际备份还没有开始 v_sche_start_sql = """insert into mysql_ins_bak_log(instance_id,backup_result_type) values(%d,0)""" % (instance_id) db.execute(v_sche_start_sql) i=i+1 db.close() if bak_server_list: # 有server需要配置 scheduler.start() print 'success!' scheduler.print_jobs() '''
from apscheduler.scheduler import Scheduler from datetime import datetime, timedelta from time import sleep from random import randint sched = Scheduler(coalesce=True) sched.start() def hello(): print "hello", datetime.now() sched.add_cron_job(hello, second='1,11,21,31,41,51') job = sched.get_jobs()[0] print len(job.get_run_times(datetime.now() + timedelta(minutes=60))) def check(): n = job.next_run_time if randint(1, 3) >= 2: job.compute_next_run_time(datetime.now() + timedelta(seconds=15)) print 'rescheduling from', n, 'to', job.next_run_time return 1 if job.runs > 10: print 'ran all my times, giving up.' raise return 0 i = 0
jobs = 0 last_check = 0 try: last_check = int(pydis.get("cron_last_run")) except TypeError: pass pydis.set("cron_last_run", int(time())) for job in db.jobqueue.select().where(db.jobqueue.ts >= last_check): jobs += 1 t = threading.Thread(target=self.run, args=(job, )) t.start() threads.append(t) [t.join() for t in threads] # Started %d jobs" % jobs cjob = Jobs() sched = Scheduler(daemonic=False) sched.add_jobstore(ShelveJobStore('./cron.jobs'), 'file') sched.add_interval_job(cjob.start, seconds=10) sched.start() pydis.set("cron_next_run", sched.get_jobs()[0].next_run_time)
print('Inventory Data done at: ' + str(datetime.utcnow()) + ' Updating LastUpdate now...........') try: abcstore.updateLastUpdate(executionTime) except: print('Last Update Failed') completeTime = datetime.utcnow() timetaken = completeTime - executionTime print('Complete: ' + str(completeTime) + ' took this long: ' + str(timetaken)) atexit.register(lambda: cron.shutdown(wait=False)) print(cron.get_jobs()) @app.before_request def before_request(): if current_user.is_authenticated: current_user.last_seen = datetime.utcnow() db.session.commit() @app.route('/', methods=['GET', 'POST']) @app.route('/index', methods=['GET', 'POST']) @login_required def index(): ## TEMP!! to redirect to inventory return redirect(url_for('inventory'))
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in a :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Errors in the ab-initio code Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError. The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. The flow tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases: #. The number of python exceptions is > max_num_pyexcs #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, the scheduler will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy") Error = PyFlowSchedulerError @classmethod def autodoc(cls): i = cls.__init__.__doc__.index("Args:") return cls.__init__.__doc__[i+5:] def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait (DEFAULT: 0). days: number of days to wait (DEFAULT: 0). hours: number of hours to wait (DEFAULT: 0). minutes: number of minutes to wait (DEFAULT: 0). seconds: number of seconds to wait (DEFAULT: 0). mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds. (DEFAULT: None i.e. not used). verbose: (int) verbosity level. (DEFAULT: 0) use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from file before launching the jobs. (DEFAULT: "no") max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200) remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds. (int, DEFAULT: 1 day). max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs (int, DEFAULT: 0) max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs (int, DEFAULT: 0) safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5) max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler. (DEFAULT: -1 i.e. no limit) debug: Debug level. Use 0 for production (int, DEFAULT: 0) fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes") rmflow: If "yes", the scheduler will remove the flow directory if the calculation completed successfully. (DEFAULT: "no") killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs before exiting due to an error. (DEFAULT: "yes") """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = as_bool(kwargs.pop("use_dynamic_manager", False)) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.max_ncores_used = kwargs.pop("max_ncores_used", None) self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True)) self.rmflow = as_bool(kwargs.pop("rmflow", False)) self.killjobs_if_errors = as_bool(kwargs.pop("killjobs_if_errors", True)) self.customer_service_dir = kwargs.pop("customer_service_dir", None) if self.customer_service_dir is not None: self.customer_service_dir = Directory(self.customer_service_dir) self._validate_customer_service() if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: logger.warning("Using scheduler v>=3.0.0") from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "rt") as fh: return cls(**yaml.safe_load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.safe_load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: `RuntimeError` if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) if self.flow is not None: app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" try: return self._flow except AttributeError: return None @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """ Add an :class:`Flow` flow to the scheduler. """ if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") # Check if we are already using a scheduler to run this flow flow.check_pid_file() flow.set_spectator_mode(False) # Build dirs and files (if not yet done) flow.build() with open(flow.pid_file, "wt") as fh: fh.write(str(self.pid)) self._pid_file = flow.pid_file self._flow = flow def _validate_customer_service(self): """ Validate input parameters if customer service is on then create directory for tarball files with correct premissions for user and group. """ direc = self.customer_service_dir if not direc.exists: mode = 0o750 print("Creating customer_service_dir %s with mode %s" % (direc, mode)) direc.makedirs() os.chmod(direc.path, mode) if self.mailto is None: raise RuntimeError("customer_service_dir requires mailto option in scheduler.yml") def _do_customer_service(self): """ This method is called before the shutdown of the scheduler. If customer_service is on and the flow didn't completed successfully, a lightweight tarball file with inputs and the most important output files is created in customer_servide_dir. """ if self.customer_service_dir is None: return doit = self.exceptions or not self.flow.all_ok doit = True if not doit: return prefix = os.path.basename(self.flow.workdir) + "_" import tempfile, datetime suffix = str(datetime.datetime.now()).replace(" ", "-") # Remove milliseconds i = suffix.index(".") if i != -1: suffix = suffix[:i] suffix += ".tar.gz" #back = os.getcwd() #os.chdir(self.customer_service_dir.path) _, tmpname = tempfile.mkstemp(suffix="_" + suffix, prefix=prefix, dir=self.customer_service_dir.path, text=False) print("Dear customer,\n We are about to generate a tarball in\n %s" % tmpname) self.flow.make_light_tarfile(name=tmpname) #os.chdir(back) def start(self): """ Starts the scheduler in a new thread. Returns 0 if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return 1 # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions) return 1 try: self.sched.start() return 0 except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"): print("Number of jobs cancelled:", self.flow.cancel()) self.flow.pickle_dump() return -1 def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # and flow.TaskManager.qadapter.QTYPE == "shell": # This call is expensive and therefore it's optional (must be activate in manager.yml) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') else: # Here we just count the number of tasks in the flow who are running. # This logic breaks down if there are multiple schedulers runnig # but it's easy to implement without having to contact the resource manager. nqjobs = (len(list(flow.iflat_tasks(status=flow.S_RUN))) + len(list(flow.iflat_tasks(status=flow.S_SUB)))) if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s. No job will be submitted." % nqjobs) flow.check_status(show=False) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_used %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: cprint("[%s] Number of launches: %d" % (time.asctime(), nlaunch), "yellow") except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! s = straceback() self.exceptions.append(s) # This is useful when debugging #try: # print("Exception in callback, will cancel all tasks") # for task in self.flow.iflat_tasks(): # task.cancel() #except Exception: # pass self.shutdown(msg="Exception raised in callback!\n" + s) def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append("No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append("No task is running and cannot find other tasks to submit.") # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint("killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint("Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError as exc: logger.critical("Could not remove pid_file: %s", exc) # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on: %s" % time.asctime()) self.history.append("Elapsed time: %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "wt") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow %s didn't complete successfully" % repr(self.flow.workdir)) app("use `abirun.py FLOWDIR debug` to analyze the problem.") app("Shutdown message:\n%s" % msg) print("") print("\n".join(lines)) print("") self._do_customer_service() if self.flow.all_ok: print("Calling flow.finalize()...") self.flow.finalize() #print("finalized:", self.flow.finalized) if self.rmflow: app("Flow directory will be removed...") try: self.flow.rmtree() except Exception: logger.warning("Ignoring exception while trying to remove flow dir.") finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('This should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() if not has_sched_v3: for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
def log_STAMPS(): STAMPS_list = map(lambda item: (item[0], time.strftime("%x %X %Z", time.localtime(item[1]))), STAMPS.iteritems()) stamps_s = ("STAMPS=%s" % dict(STAMPS_list)) log(stamps_s) STAMPS_list = map(lambda item: (item[0], time.strftime("%x %X %Z", time.localtime(item[1]))), STAMPS.iteritems()) STAMPS_str = str(dict(STAMPS_list)) if options.verbose: print("STAMPS=%s" % STAMPS_str) log("STAMPS=%s" % STAMPS_str) sched.add_interval_job(log_STAMPS, hours=1) sched.start() for job in sched.get_jobs(): print("%s(%s) (trigger %s, next run at %s)" % (job.name, job.args, job.trigger, job.next_run_time)) try: while True: # When used pass instead of time.sleep() function the script uses 100% # of cpu. time.sleep(60) except KeyboardInterrupt: sys.exit(os.EX_OK)
__author__ = 'wenychan' # For version apscheduler 2.1.2 import time import atexit from apscheduler.scheduler import Scheduler # Start the scheduler sched = Scheduler(daemonic=True) # register three exit handlers atexit.register(lambda: sched.shutdown()) sched.start() def job_function(): print "Hello World" time.sleep(3) print "exit Hello World" # Schedule job_function to be called every two hours sched.add_interval_job(job_function, seconds=1, name='test_job1', max_instances=1) sched.add_interval_job(job_function, seconds=3, name='test_job2', max_instances=1, max_runs=1) # sched.add_interval_job(job_function, seconds=5, name='test_job3', max_instances=1) sched.add_interval_job(job_function, seconds=5, name='test_job3', max_instances=1) sched.add_interval_job(job_function, seconds=6, name='test_job4', max_instances=1) sched.add_interval_job(job_function, seconds=3, name='test_job5', max_instances=1) sched.add_interval_job(job_function, seconds=1, name='test_job6', max_instances=1) print len(sched.get_jobs()) time.sleep(5) print len(sched.get_jobs()) time.sleep(50)
class TNActionScheduler (TNArchipelPlugin): """ This plugin allows to create scheduled actions. """ def __init__(self, configuration, entity, entry_point_group): """ Initialize the plugin. @type configuration: Configuration object @param configuration: the configuration @type entity: L{TNArchipelEntity} @param entity: the entity that owns the plugin @type entry_point_group: string @param entry_point_group: the group name of plugin entry_point """ TNArchipelPlugin.__init__(self, configuration=configuration, entity=entity, entry_point_group=entry_point_group) self.scheduler = Scheduler() self.scheduler.start() self.database = sqlite3.connect(self.configuration.get("SCHEDULER", "database"), check_same_thread=False) self.database.execute("create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)") self.database.commit() self.cursor = self.database.cursor() self.restore_jobs() self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause") self.supported_actions_for_hypervisor = ("alloc", "free") # permissions self.entity.permission_center.create_permission("scheduler_jobs", "Authorizes user to get the list of task", False) self.entity.permission_center.create_permission("scheduler_schedule", "Authorizes user to schedule a task", False) self.entity.permission_center.create_permission("scheduler_unschedule", "Authorizes user to unschedule a task", False) self.entity.permission_center.create_permission("scheduler_actions", "Authorizes user to get available actions", False) # hooks if self.entity.__class__.__name__ == "TNArchipelVirtualMachine": self.entity.register_hook("HOOK_VM_TERMINATE", method=self.vm_terminate) ### Plugin interface def register_handlers(self): """ This method will be called by the plugin user when it will be necessary to register module for listening to stanza. """ self.entity.xmppclient.RegisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) def unregister_handlers(self): """ Unregister the handlers. """ self.entity.xmppclient.UnregisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) @staticmethod def plugin_info(): """ Return informations about the plugin. @rtype: dict @return: dictionary contaning plugin informations """ plugin_friendly_name = "Action Scheduler" plugin_identifier = "action_scheduler" plugin_configuration_section = "SCHEDULER" plugin_configuration_tokens = ["database"] return { "common-name" : plugin_friendly_name, "identifier" : plugin_identifier, "configuration-section" : plugin_configuration_section, "configuration-tokens" : plugin_configuration_tokens } ### Persistance def delete_job(self, uid): """ Remove a job from the database. @type uid: string @param uid: the uid of the job to remove """ self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, )) self.database.commit() def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None): """ Save a job in the database. @type uid: string @param uid: the uid of the job @type action: string @param action: the action @type year: string @param year: year of execution @type month: string @param month: month of execution @type day: string @param day: day of execution @type hour: string @param hour: hour of execution @type minute: string @param minute: minute of execution @type second: string @param second: second of execution @type comment: string @param comment: comment about the job @type params: string @param params: random parameter of the job """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (entity_uid, uid, action, year, month, day, hour, minute, second, comment, params, )) self.database.commit() def restore_jobs(self): """ Restore the jobs from the database. """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid, )) for values in self.cursor: try: entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second) self.scheduler.add_cron_job(self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment]) except Exception as ex: self.entity.log.error("unable to restore a job: %s" % str(ex)) def vm_terminate(self, origin, user_info, arguments): """ Close the database connection. @type origin: TNArchipelEntity @param origin: the origin of the hook @type user_info: object @param user_info: random user information @type arguments: object @param arguments: runtime argument """ self.database.close() ### Jobs def get_jod_with_uid(self, uid): """ Get a job with given uid. @type uid: string @param uid: the uid of the job """ if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: if str(job.args[1]) == uid: return job return None def do_job_for_vm(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "create": self.entity.create() elif action == "shutdown": self.entity.shutdown() elif action == "destroy": self.entity.destroy() elif action == "suspend": self.entity.suspend() elif action == "resume": self.entity.resume() elif action == "pause": if self.entity.libvirt_status == 1: self.entity.suspend() elif self.entity.libvirt_status == 3: self.entity.resume() elif action == "migrate": pass job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") def do_job_for_hypervisor(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "alloc": self.entity.alloc() elif action == "free": pass #self.entity.free() job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") ### Process IQ def process_iq(self, conn, iq): """ This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received. It understands IQ of type: - jobs - schedule - unschedule @type conn: xmpp.Dispatcher @param conn: ths instance of the current connection that send the stanza @type iq: xmpp.Protocol.Iq @param iq: the received IQ """ reply = None action = self.entity.check_acp(conn, iq) self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_") if action == "schedule": reply = self.iq_schedule(iq) elif action == "unschedule": reply = self.iq_unschedule(iq) elif action == "jobs": reply = self.iq_jobs(iq) elif action == "actions": reply = self.iq_actions(iq) if reply: conn.send(reply) raise xmpp.protocol.NodeProcessed def iq_schedule(self, iq): """ Schedule a task. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") job = iq.getTag("query").getTag("archipel").getAttr("job") entityClass = self.entity.__class__.__name__ param = None if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm: raise Exception("action %s is not valid" % job) elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor: raise Exception("action %s is not valid" % job) year = iq.getTag("query").getTag("archipel").getAttr("year") month = iq.getTag("query").getTag("archipel").getAttr("month") day = iq.getTag("query").getTag("archipel").getAttr("day") hour = iq.getTag("query").getTag("archipel").getAttr("hour") minute = iq.getTag("query").getTag("archipel").getAttr("minute") second = iq.getTag("query").getTag("archipel").getAttr("second") comment = iq.getTag("query").getTag("archipel").getAttr("comment") if iq.getTag("query").getTag("archipel").has_attr("param"): param = iq.getTag("query").getTag("archipel").getAttr("param") uid = str(uuid.uuid1()) str_date = "%s-%s-%s @ %s : %02d : %02d" % (year, month, day, hour, int(minute), int(second)) if entityClass == "TNArchipelVirtualMachine": func = self.do_job_for_vm elif entityClass == "TNArchipelHypervisor": func = self.do_job_for_hypervisor self.scheduler.add_cron_job(func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param]) self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param) self.entity.push_change("scheduler", "scheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_jobs(self, iq): """ Get jobs. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") nodes = [] if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: job_node = xmpp.Node(tag="job", attrs={"action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3]}) nodes.append(job_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_unschedule(self, iq): """ Unschedule a job. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") uid = iq.getTag("query").getTag("archipel").getAttr("uid") the_job = self.get_jod_with_uid(uid) if not the_job: raise Exception("job with uid %s doesn't exists" % uid) self.delete_job(uid) self.scheduler.unschedule_job(the_job) self.entity.push_change("scheduler", "unscheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_actions(self, iq): """ Get available actions. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": actions = self.supported_actions_for_vm elif entityClass == "TNArchipelHypervisor": actions = self.supported_actions_for_hypervisor nodes = [] for action in actions: action_node = xmpp.Node(tag="action") action_node.setData(action) nodes.append(action_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply
class PhoneManager: # Default Sleep time between checks kDefaultLoopSleep = 10 # SMS Forwarding Information bForwardSMS = False sForwardSMSNumber = "" bCallForwardNotify = False # Call Forwarding Information. bEnableCallForward = False sCallForwardNumber = "" bSMSForwardNotify = False # SMS Auto Reply Informaiton. bAutoReply = False sAutoReplyText = "" bDeleteAfterResponse = False # Diable Call Forwarding bDisableCallForward = False # Determins if all SMS's should be processed, or just unread mesaages. bCheckAllSMS = True # The last result of a Send SMS bLastResult = True # MSISDN of last received SMS sLastMessageFrom = "" # Maintains a list of numbers that have received an autoreply listAutoReply = [] # THread for prcessing _thread = None _bActive = False _lastRunTime = None # IMSI _IMSI = None # Message Queue messageQueue = None # Config File. config = None configFile = '' kModemSection = 'modem' kIMSIOption = 'IMSI' kSettingsSection = 'settings' kForwardOption = 'forwardsms' kDivertOption = 'divert' kAutoReplyOption = 'autoreply' kScheduleSetting = 'schedule' kCommandOption = 'command' kOptionOption = 'option' kDayOption = 'day' kWeekOption = 'week' kDayOfWeekOption = 'day_of_week' kHourOption = 'hour' kMinuteOption = 'minute' # Scheudler. sched = None # Config file changes configFileLastChanged = 0 def __init__(self, modems, configFile): logger.debug('Init Modem, modems=' + str(modems) + ' configFileconfigFile=' + configFile) if modems == None: raise Exception('A Modem Must be Provided') if configFile == None: raise Exception('A config file must be provided.') self.messageQueue = [] self.sched = Scheduler() self.loadConfig(configFile) if self._IMSI in modems: self.modem = modems[self._IMSI] else: raise Exception('No modem for selected IMSI') def loadConfig(self, configFile): logger.info('Loading config:' + configFile) self.configFile = configFile workConfigFile = configFile + '~' logger.debug('Renaming to ' + workConfigFile) shutil.copyfile(configFile, workConfigFile) logger.debug('Clearing Scheduled Tasks') jobs = self.sched.get_jobs() for job in jobs: logger.debug('Removing ' + job.__str__()) self.sched.unschedule_job(job) try: config = ConfigParser.ConfigParser() config.read(workConfigFile) self.config = config IMSI = config.get(self.kModemSection, self.kIMSIOption) logger.debug('modem/IMSI-' + IMSI) self._IMSI = IMSI if config.has_option(self.kSettingsSection, self.kForwardOption): number = config.get(self.kSettingsSection, self.kForwardOption) if len(number) > 0: logger.debug('Forward SMS Number-' + number) self.enableSMSForwarding(number) else: self.disableSMSForwarding() if config.has_option(self.kSettingsSection, self.kDivertOption): number = config.get(self.kSettingsSection, self.kDivertOption) if len(number) > 0: logger.debug('Divert Number-' + number) self.enableCallForward(number) else: config.set(self.kSettingsSection, self.kDivertOption, '') #Diable call forward, need to right into main thread if config.has_option(self.kSettingsSection, self.kAutoReplyOption): autoReplyText = config.get(self.kSettingsSection, self.kAutoReplyOption) if len(autoReplyText) > 0: logger.debug('Auto Reply Text-' + autoReplyText) self.enableSMSAutoReply(autoReplyText) else: self.disableSMSAutoReply() else: self.disableSMSAutoReply() for section in config.sections(): if section.startswith(self.kScheduleSetting): logger.debug('Adding schedule ' + section) if config.has_option(section, self.kCommandOption): command = config.get(section, self.kCommandOption) if config.has_option(section, self.kOptionOption): option = config.get(section, self.kOptionOption) optionList = option.split(',') _optionDict = [] for opt in optionList: logger.debug('Option: ' + opt) _optionDict.append(self.getStringAsType(opt)) logger.debug('Options: ' + str(_optionDict)) else: option = '' _day = self.getSetConfigOption(config, section, self.kDayOption) _week = self.getSetConfigOption( config, section, self.kWeekOption) _dayOfWeek = self.getSetConfigOption( config, section, self.kDayOfWeekOption) _hour = self.getSetConfigOption( config, section, self.kHourOption) _minute = self.getSetConfigOption( config, section, self.kMinuteOption) logger.debug('Add Schdule. Comamnd=[' + command + '] options [' + option + '] day=' + _day + ' week=' + _week + ' day_of_week=' + _dayOfWeek + ' hour=' + _hour + ' minutes=' + _minute) if command == self.kForwardOption and len(option) > 0: job = self.sched.add_cron_job( self.enableSMSForwarding, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kForwardOption and len(option) == 0: job = self.sched.add_cron_job( self.disableSMSForwarding, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if command == self.kDivertOption and len(option) > 0: job = self.sched.add_cron_job( self.enableCallForward, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kDivertOption and len(option) == 0: job = self.sched.add_cron_job( self.disableCallForward, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if command == self.kAutoReplyOption and len( option) > 0: job = self.sched.add_cron_job( self.enableSMSAutoReply, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute, args=_optionDict) if command == self.kAutoReplyOption and len( option) == 0: job = self.sched.add_cron_job( self.disableSMSAutoReply, day=_day, week=_week, day_of_week=_dayOfWeek, hour=_hour, minute=_minute) if job is not None: logger.info(job.__str__()) self.configFileLastChanged = time.ctime( os.path.getmtime(self.configFile)) logger.debug('Config file last changed: ' + self.configFileLastChanged) except Exception, e: logger.critical('Error loading config file') logger.exception(e)
class Sched(Basemodule): # ################################################################################ # initialization of module and optional load of config files # ################################################################################ def __init__(self, instance_queue, global_queue): # # "sched|port|command or action" # self.logger = logging.getLogger('Hasip.sched') self.sched = Scheduler() self.items = ConfigItemReader() self.jobs_config = ConfigJobReader() self.mod_list = self.items.get_items_dict() # getting module list from item file self.queue_identifier = 'sched' # this is the 'module address' self.instance_queue = instance_queue # worker queue to receive jobs self.global_queue = global_queue # queue to communicate back to main thread self.jobstore = {} self.sched.start() # read jobs configuration self.jobstore = self.jobs_config.get_jobs_dict() for job in self.jobstore.keys(): self.logger.debug(self.jobstore[job][2]) self.sched.add_cron_job(self.send_msg, name=job, year=self.jobstore[job][2], month=self.jobstore[job][3], day=self.jobstore[job][4], week=self.jobstore[job][5], day_of_week=self.jobstore[job][6], hour=self.jobstore[job][7], minute=self.jobstore[job][8], second=self.jobstore[job][9], args=(self.jobstore[job][0], self.jobstore[job][1])) # ################################################################################ # main thread of this module file which runs in background and constantly checks working queue for new tasks. # ################################################################################ def worker(self): while True: instance_queue_element = self.instance_queue.get(True) _senderport = instance_queue_element.get("module_from_port") _sender = instance_queue_element.get("module_from") _port = instance_queue_element.get("module_addr") _action = instance_queue_element.get("cmd") _optargs = instance_queue_element.get("opt_args") options = { "create": self.create, "delete": self.delete, "list_jobs": self.list_jobs } options[_action](_sender, _senderport, _port, _optargs) # ################################################################################ # # "private" methods from here on... # # ################################################################################ #Function to put jobs in the running scheduler job queue and store them persistent def create(self, sender, senderport, port, optargs): if optargs.get('name') in self.jobstore: self.delete(sender, senderport, port, optargs.get('name')) self.sched.add_cron_job(self.send_msg, name=optargs.get('name'), year=optargs.get('year'), month=optargs.get('month'), day=optargs.get('day'), week=optargs.get('week'), day_of_week=optargs.get('day_of_week'), hour=optargs.get('hour'), minute=optargs.get('minute'), second=optargs.get('second'), args=(optargs.get('module'), optargs.get('action'))) self.jobstore.update( { optargs.get('name'): [ optargs.get('module'), optargs.get('action'), optargs.get('year'), optargs.get('month'), optargs.get('day'), optargs.get('week'), optargs.get('day_of_week'), optargs.get('hour'), optargs.get('minute'), optargs.get('second') ] } ) self.write_to_config() def delete(self, sender, senderport, port, optargs): for job in self.sched.get_jobs(): if job.name == optargs: self.sched.unschedule_job(job) del self.jobstore[job.name] self.write_to_config() def list_jobs(self, sender, senderport, port, optargs): dict1 = {} string = [] for job in self.jobstore.keys(): dict1['jobname'] = job dict1['device'] = self.jobstore[job][0] dict1['action'] = self.jobstore[job][1] dict1['year'] = self.jobstore[job][2] dict1['month'] = self.jobstore[job][3] dict1['day'] = self.jobstore[job][4] dict1['week'] = self.jobstore[job][5] dict1['day_of_week'] = self.jobstore[job][6] dict1['hour'] = self.jobstore[job][7] dict1['min'] = self.jobstore[job][8] dict1['sec'] = self.jobstore[job][9] string.append(dict1.copy()) dict1.clear() self.logger.debug("help") queue_msg = { 'module_from_port': str(port), 'module_from': self.queue_identifier, 'module_rcpt': sender, 'module_addr': senderport, 'cmd': 'reply', 'opt_args': string } self.global_queue.put(queue_msg) def send_msg(self, module, action): # ######################################## if module in self.mod_list.keys(): # checking existence of requested module rcpt = self.mod_list[module][0] # setting receiving module from item file mid = self.mod_list[module][1] # setting module id from item file msg = { # creating queue message 'module_from_port': 0, # ######################################## 'module_from': 'sched', 'module_rcpt': rcpt, 'module_addr': mid, 'cmd': action, 'opt_args': '' } self.global_queue.put(msg) def write_to_config(self): with open('/home/hasip/hasip/config/jobs/example.jobs', 'w') as f: conf = ConfigParser() for job in self.jobstore.keys(): try: conf.add_section(job) except Exception: pass conf.set(job.upper(), 'module', self.jobstore[job][0]) conf.set(job.upper(), 'action', self.jobstore[job][1]) conf.set(job.upper(), 'year', self.jobstore[job][2]) conf.set(job.upper(), 'month', self.jobstore[job][3]) conf.set(job.upper(), 'week', self.jobstore[job][4]) conf.set(job.upper(), 'day', self.jobstore[job][5]) conf.set(job.upper(), 'day_of_week', self.jobstore[job][6]) conf.set(job.upper(), 'hour', self.jobstore[job][7]) conf.set(job.upper(), 'minute', self.jobstore[job][8]) conf.set(job.upper(), 'second', self.jobstore[job][9]) conf.write(f)
class EventScheduler(): logging.basicConfig() """Class to scheduler regular events in a similar manner to cron.""" __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \ % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy'))) '''This determines the number of seconds after the designated run time that the job is still allowed to be run. If jobs are not being run, try increasing this in increments of 1.''' __GRACE_PERIOD = 31536000 # Amazing grace! Time in seconds before the job is considered misfired. Currently a year __COALESCE = True # Force the job to only run once instead of retrying multiple times '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads in the pool that the filter would get clogged up!''' __threadpool_corethreads = 0 # Maximum number of persistent threads in the pool __threadpool_maxthreads = 20 # Maximum number of total threads in the pool __threadpool_keepalive = 1 # Seconds to keep non-core worker threads in the pool def __init__(self, start=True): try: config = { 'apscheduler.daemon': True, 'apscheduler.standalone': False, 'apscheduler.threadpool.core_threads': self.__threadpool_corethreads, 'apscheduler.threadpool.max_threads': self.__threadpool_maxthreads, 'apscheduler.threadpool.keepalive': self.__threadpool_keepalive, 'apscheduler.coalesce': self.__COALESCE } self.__sched = Scheduler(config) '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the shelve job store working.''' self.__sched.add_jobstore( SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'), 'default') atexit.register(lambda: self.__sched.shutdown(wait=False) ) # Stop the scheduler when the program exits if start: self.__sched.start() except KeyError: logging.warning('An error occurred starting the scheduler.') def start_scheduler(self): self.__sched.start() def add_cron_event(self, func, name, year=None, month=None, week=None, day=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, *args, **kwargs): """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed. The following expressions can be used in each field: Expression Field Description * any Fire on every value */a any Fire on every 'a' values, starting from the minimum a-b any Fire on any value in the 'a-b' range (a must be smaller than b a-b/c any Fire every 'c' values within the 'a-b' range xth y day Fire on the x -th occurrence of weekday y within the month last x day Fire on the last occurrence of weekday 'x' within the month last day Fire on the last day within the month x,y,z any Fire on any matching expression; can combine any number of any of the above expressions If you want to add **options to the event, use kwargs (keyword arguments dictionary)""" if self.__sched is not None: event_exists = False if self.__find_event(name) is not None: event_exists = True if not event_exists: self.__sched.add_cron_job( func=func, name=name, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, misfire_grace_time=self.__GRACE_PERIOD) logging.info('New cron event added') else: '''Every event needs a unique name so we can keep track of the little bastards. And please use descriptive names so that they can be properly identified in the job schedule.''' logging.warning('add_cron_event: Event already exists') raise EventExistsError('A job with name %s already exists' % name) else: raise SchedulerNotFoundError( 'add_cron_event: Scheduler does not exist. It may have not started.' ) def __find_event(self, event_name): if self.__sched is not None: events = self.__sched.get_jobs() for event in events: if event.name == event_name: return event else: return None else: logging.warning( '__find_event: Scheduler does not exist. It may have not started.' ) raise SchedulerNotFoundError( 'Scheduler does not exist. It may have not started.') def add_onceoff_event(self, func, name, date, args=None): """Add a once off event to the schedule. The job is executed once at the specified date and time. Date/time format: YYYY-MM-DD HH:MM:SS""" if self.__sched is not None: try: if args is None: # If there are no arguments to be passed to the function self.__sched.add_date_job( func=func, name=name, date=date, misfire_grace_time=self.__GRACE_PERIOD) else: # If there are arguments to be passed to the function self.__sched.add_date_job( func=func, name=name, date=date, arge=args, misfire_grace_time=self.__GRACE_PERIOD) except ValueError: '''If the event is in the past, it will not run. This program is not capable of manipulating space and time. Try import __time_travel__''' raise EventWontRunError( 'The event will not run: Event time has expired.') logging.info('New once off event added') else: logging.warning( 'add_onceoff_event: Scheduler does not exist. It may have not started.' ) raise SchedulerNotFoundError( 'Scheduler does not exist. It may have not started.') def remove_event(self, event_name): """Remove the event 'event_name' from the schedule.""" if self.__sched is not None: removed = False event = self.__find_event(event_name=event_name) if event is not None: # If the event exists, remove it self.__sched.unschedule_job(event) removed = True if not removed: '''Raise an error so that it can be handled correctly''' logging.warning('remove_event: Event not found for removal.') raise EventNotFoundError('Event not found for removal: %s' % event_name) else: raise SchedulerNotFoundError( 'remove_event: Scheduler does not exist. It may have not started.' ) def get_jobs(self): """Get the list of events currently in the job store.""" if self.__sched is not None: return self.__sched.get_jobs() else: raise SchedulerNotFoundError( 'get_events: Scheduler does not exist. It may have not started.' ) def get_job_names(self): """ Get the names of all the jobs in the job store :return: list """ jobs = self.get_jobs() job_list = [] if jobs: for job in jobs: job_list.append(job.name) return job_list def get_scheduler(self): """Returns the Scheduler object. Rather add functionality to this class than call this method.""" if self.__sched is not None: return self.__sched else: raise SchedulerNotFoundError( 'get_scheduler: Scheduler does not exist. It may have not started.' )
def main(): configure_logging() logger = logging.getLogger('taskhost') config = {} try: with open(_tasks_config_file) as f: config = json.loads(f.read()) logger.debug('Successfully read configuration file.') except Exception as e: logger.critical('Cannot read configuration file: {0}' .format(_tasks_config_file)) logger.critical(e) sys.exit(1) from simplegauges.datastores.azuretable import AzureGaugeDatastore gauges_ds = AzureGaugeDatastore(config['azure.account'], config['azure.key'], config['azure.table']) gauge_factory = simplegauges.gauge_factory(gauges_ds) tasks.set_simplegauges_factory(gauge_factory) tasks.set_config(config) import fixture # should be imported after setting configs for decorators if not fixture.tasks: logger.error('No tasks found in the fixture.py') sys.exit(1) errors = False for task in fixture.tasks: method = task[0] name = '{0}.{1}'.format(method.__module__, method.__name__) try: task[0]() logger.info('Successfully bootstrapped: {0}'.format(name)) except Exception as e: errors = True logger.error('Error while bootstrapping: {0}'.format(name)) logger.error(e) if errors: logger.info('Starting scheduler in 10 seconds...') time.sleep(10) else: logger.info('Starting scheduler...') # at this point all tasks ran once successfully sched = Scheduler() # schedule tasks for task in fixture.tasks: cron_kwargs = parse_cron_tuple(task[1]) sched.add_cron_job(task[0], **cron_kwargs) sched.start() logger.info('Scheduler started with {0} jobs.' .format(len(sched.get_jobs()))) now = datetime.datetime.now() for j in sched.get_jobs(): logger.debug('Scheduled: {0}.{1}, next run:{2}' .format(j.func.__module__, j.func.__name__, j.compute_next_run_time(now))) # deamonize the process while True: time.sleep(10)
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in an :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Abinit Errors. Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError. The set of Abinit Errors is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically if #. The number of python exceptions is > MAX_NUM_PYEXC #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS #. The number of jobs launched becomes greater than (SAFETY_RATIO * total_number_of_tasks). #. The scheduler will send an email to the user (specified by mailto) every REMINDME_S seconds. If the mail cannot be sent, it will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy") DEBUG = 0 Error = PyFlowSchedulerError def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait days: number of days to wait hours: number of hours to wait minutes: number of minutes to wait seconds: number of seconds to wait verbose: (int) verbosity level max_njobs_inque: Limit on the number of jobs that can be present in the queue use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from file before launching the jobs. Default: False max_nlaunch: Maximum number of tasks launched by radpifire (default -1 i.e. no limit) """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.REMINDME_S = float(kwargs.pop("REMINDME_S", 4 * 24 * 3600)) self.MAX_NUM_PYEXCS = int(kwargs.pop("MAX_NUM_PYEXCS", 0)) self.MAX_NUM_ABIERRS = int(kwargs.pop("MAX_NUM_ABIERRS", 0)) self.SAFETY_RATIO = int(kwargs.pop("SAFETY_RATIO", 5)) #self.MAX_ETIME_S = kwargs.pop("MAX_ETIME_S", ) self.max_nlaunch = kwargs.pop("max_nlaunch", -1) if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if has_sched_v3: from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = collections.deque(maxlen=self.MAX_NUM_PYEXCS + 10) # Used to push additional info during the execution. self.history = collections.deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "r") as fh: return cls(**yaml.load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: RuntimeError if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) err_msg = "Cannot locate %s neither in current directory nor in %s" % ( cls.YAML_FILE, path) raise cls.Error(err_msg) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" return self._flow @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """Add an :class:`Flow` flow to the scheduler.""" if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid") if os.path.isfile(pid_file): flow.show_status() err_msg = (""" pid_file %s already exists There are two possibilities: 1) There's an another instance of PyFlowScheduler running 2) The previous scheduler didn't exit in a clean way To solve case 1: Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file) Then you can restart the new scheduler. To solve case 2: Remove the pid_file and restart the scheduler. Exiting""" % pid_file) raise self.Error(err_msg) with open(pid_file, "w") as fh: fh.write(str(self.pid)) self._pid_file = pid_file self._flow = flow def start(self): """ Starts the scheduler in a new thread. Returns True if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: print(errors) self.exceptions.append(errors) return False # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email( msg= "Error while trying to run the flow for the first time!\n %s" % self.exceptions) return False self.sched.start() return True def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinitio.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 print('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue, returning") return if self.max_nlaunch == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunch) # check status flow.check_status() flow.show_status() # fix problems # Try to restart the unconverged tasks # todo donot fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: print("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except Exception: excs.append(straceback()) # move here from withing rapid fire ... # fix only prepares for restarting, and sets to ready flow.fix_critical() # update database flow.pickle_dump() #if self.num_restarts == self.max_num_restarts: # info_msg = "Reached maximum number of restarts. Cannot restart anymore Returning" # logger.info(info_msg) # self.history.append(info_msg) # return 1 # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! self.exceptions.append(straceback()) self.shutdown(msg="Exception raised in callback!") def _callback(self): """The actual callback.""" if self.DEBUG: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) #print('before _runem_all in _callback') self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.MAX_ETIME_S: # err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.MAX_NUM_PYEXCS: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.MAX_NUM_PYEXCS) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS) err_msg += boxed(msg) # Count the number of tasks with status == S_UNCONVERGED. #if self.flow.num_unconverged_tasks: # # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet # msg = ("Found %d unconverged tasks." # "Automatic restarting is not available yet. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) #deadlocks = self.detect_deadlocks() #if deadlocks: # msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError: logger.critical("Could not remove pid_file") pass # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() #if False and self.flow.has_db: # try: # self.flow.db_insert() # except Exception: # logger.critical("MongoDb insertion failed.") self.history.append("Completed on %s" % time.asctime()) self.history.append("Elapsed time %s" % self.get_delta_etime()) if self.DEBUG: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.DEBUG: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "w") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) finally: # Shutdown the scheduler thus allowing the process to exit. print('this should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown self.sched.print_jobs() for job in self.sched.get_jobs(): self.sched.unschedule_job(job) self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
log_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") # add formatter to our console handler file_handler.setFormatter(log_formatter) file_handler.setLevel(logging.DEBUG) app.logger.addHandler(file_handler) led_chain = Chain_Communicator() sched = Scheduler(config) sched.start() # calculate our events from the auto_state_events list, need to find a better way of doing this, maybe a config file. for event in auto_state_events: app.logger.info("Processing scheduled event : %s" % event['event_name']) start_hour = event['event_start_time'].split(':')[0] start_minute = event['event_start_time'].split(':')[1] start_second = event['event_start_time'].split(':')[2] start_time = datetime.strptime(event['event_start_time'],time_format) end_time = datetime.strptime(event['event_end_time'],time_format) event_duration = (end_time - start_time).seconds sched.add_cron_job(led_chain.auto_transition, hour=start_hour, minute=start_minute, second=start_second , name=event['event_name'], kwargs={'state' : event['event_state'], 'transition_duration' : event['transition_duration']}, misfire_grace_time=event_duration) app.logger.debug("Startup job list contains : %s" % sched.get_jobs()) try: app.run(host='0.0.0.0', port=8080, use_reloader=False) except KeyboardInterrupt: app.logger.warning("Caught keyboard interupt. Shutting down ...") led_chain.shutdown() sched.shutdown(wait=False)
opcLogger = logging.getLogger("opcVarsData") opcLogger.setLevel(logging.INFO) opcLogger.addHandler(opcVarDataRF) # Instantiate the finite state machine for system state following sm = AlgData_OPC(opcclient_name="GPC_Read.OPCClient", # opcserver='OPCManager.DA.XML-DA.Server.DA', ## Only for local (CRP simulation tests) ) # Instantiate scheduler sched = Scheduler() d = datetime.now() + timedelta( seconds = 1 ) BZx = sched.add_interval_job( sm.process, start_date=d, seconds=10*60, #max_runs=10, #args=[aFSM.fsm,] ) # Start the scheduler sched.start() while sched.get_jobs() != []: print "%s While Loop - alive" % datetime.now() sleep(2*60) else: print "Scheduler goes down" sched.shutdown(wait=False) logging.shutdown() print "Scheduler has shutdowned"
class AlertSchedulerHandler(): make_cachedir = True FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' APS_CONFIG = { 'threadpool.core_threads': 3, 'coalesce': True, 'standalone': False } def __init__(self, cachedir, stacks_dir, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir if not os.path.exists( cachedir) and AlertSchedulerHandler.make_cachedir: try: os.makedirs(cachedir) except: logger.critical( "Could not create the cache directory {0}".format( cachedir)) pass self._collector = AlertCollector() self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) self.__in_minutes = in_minutes self.__config_maps = {} def update_definitions(self, alert_commands, reschedule_jobs=False): ''' updates the persisted definitions and restarts the scheduler ''' with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_commands, f, indent=2) if reschedule_jobs: self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): ''' loads definitions from file and starts the scheduler ''' if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.debug("Starting scheduler {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) def reschedule(self): ''' Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. ''' jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "Alert Reschedule Summary: {0} rescheduled, {1} unscheduled". format(str(jobs_scheduled), str(jobs_removed))) def collector(self): ''' gets the collector for reporting to the server ''' return self._collector def __load_definitions(self): ''' loads all alert commands from the file. all clusters are stored in one file ''' definitions = [] all_commands = None try: with open(os.path.join(self.cachedir, self.FILENAME)) as fp: all_commands = json.load(fp) except: if (logger.isEnabledFor(logging.DEBUG)): traceback.print_exc() return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] configmap = None # each cluster gets a map of key/value pairs of substitution values self.__config_maps[clusterName] = {} if 'configurations' in command_json: configmap = command_json['configurations'] for definition in command_json['alertDefinitions']: obj = self.__json_to_callable(clusterName, hostName, definition) if obj is None: continue # get the config values for the alerts 'lookup keys', # eg: hdfs-site/dfs.namenode.http-address : host_and_port vals = self.__find_config_values(configmap, obj.get_lookup_keys()) self.__config_maps[clusterName].update(vals) obj.set_helpers(self._collector, self.__config_maps[clusterName]) definitions.append(obj) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): ''' converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual ''' source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("Creating job type {0} with {1}".format( source_type, str(json_definition))) alert = None if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_dir'] = self.stacks_dir alert = ScriptAlert(json_definition, source) if alert is not None: alert.set_cluster(clusterName, hostName) return alert def __find_config_values(self, configmap, obj_keylist): ''' finds templated values in the configuration map provided by the server ''' if configmap is None: return {} result = {} for key in obj_keylist: try: obj = configmap for layer in key.split('/'): obj = obj[layer] result[key] = obj except KeyError: # the nested key is missing somewhere pass return result def update_configurations(self, commands): ''' when an execution command comes in, update any necessary values. status commands do not contain useful configurations ''' for command in commands: clusterName = command['clusterName'] if not clusterName in self.__config_maps: continue if 'configurations' in command: configmap = command['configurations'] keylist = self.__config_maps[clusterName].keys() vals = self.__find_config_values(configmap, keylist) self.__config_maps[clusterName].update(vals) def schedule_definition(self, definition): ''' Schedule a definition (callable). Scheduled jobs are given the UUID as their name so that they can be identified later on. <p/> This function can be called with a definition that is disabled; it will simply NOOP. ''' # NOOP if the definition is disabled; don't schedule it if definition.is_enabled() == False: logger.info( "The alert {0} with UUID {1} is disabled and will not be scheduled" .format(definition.get_name(), definition.get_uuid())) return job = None if self.__in_minutes: job = self.__scheduler.add_interval_job( self.__make_function(definition), minutes=definition.interval()) else: job = self.__scheduler.add_interval_job( self.__make_function(definition), seconds=definition.interval()) # although the documentation states that Job(kwargs) takes a name # key/value pair, it does not actually set the name; do it manually if job is not None: job.name = definition.get_uuid() logger.info("Scheduling {0} with UUID {1}".format( definition.get_name(), definition.get_uuid())) def get_job_count(self): ''' Gets the number of jobs currently scheduled. This is mainly used for test verification of scheduling ''' if self.__scheduler is None: return 0 return len(self.__scheduler.get_jobs()) def execute_alert(self, execution_commands): ''' Executes an alert immediately, ignoring any scheduled jobs. The existing jobs remain untouched. The result of this is stored in the alert collector for tranmission during the next heartbeat ''' if self.__scheduler is None or execution_commands is None: return for execution_command in execution_commands: try: alert_definition = execution_command['alertDefinition'] clusterName = '' if not 'clusterName' in execution_command else execution_command[ 'clusterName'] hostName = '' if not 'hostName' in execution_command else execution_command[ 'hostName'] alert = self.__json_to_callable(clusterName, hostName, alert_definition) if alert is None: continue logger.info("Executing on-demand alert {0} ({1})".format( alert.get_name(), alert.get_uuid())) alert.set_helpers(self._collector, self.__config_maps[clusterName]) alert.collect() except: logger.exception( "Unable to execute the alert outside of the job scheduler")
class PeetsMediaTranslator(DatagramProtocol): ''' A translator protocol to relay local udp traffic to NDN and remote NDN traffic to local udp. This class also implements the strategy for fetching remote data. If the remote seq is unknown, use a short prefix without seq to probe; otherwise use a naive leaking-bucket like method to fetch the remote data We seperate the fetching of the media stream and the fetching of the control stream (RTCP, STUN, etc). ''' __logger = Logger.get_logger('PeetsMediaTranslator') def __init__(self, factory, pipe_size): ''' Args: factory (PeetsServerFactory) : the factory that stores necessory information about the local user pipe_size (int) : the pipeline size for fetching the remote media stream. Pipelining allows us to minimize impact of the interest-data roundtrip delay. ''' self.factory = factory self.pipe_size = pipe_size self.factory = factory self.factory.set_local_status_callback(self.toggle_scheduler) # here we use two sockets, because the pending interests sent by a socket can not be satisified # by the content published later by the same socket self.ccnx_int_socket = CcnxSocket() self.ccnx_int_socket.start() self.ccnx_con_socket = CcnxSocket() self.ccnx_con_socket.start() self.stream_closure = PeetsClosure(msg_callback = self.stream_callback, timeout_callback = self.stream_timeout_callback) self.probe_closure = PeetsClosure(msg_callback = self.probe_callback, timeout_callback = self.probe_timeout_callback) self.ctrl_probe_closure = PeetsClosure(msg_callback = self.ctrl_probe_callback, timeout_callback = self.ctrl_probe_timeout_callback) self.scheduler = None self.peets_status = None def toggle_scheduler(self, status): '''Start or stop the scheduler for periodic jobs. Args: status (str): either 'Running' or 'Stopped' ''' if status == 'Running': self.peets_status = 'Running' self.scheduler = Scheduler() self.scheduler.start() self.scheduler.add_interval_job(self.fetch_media, seconds = 0.01, max_instances = 2) elif status == 'Stopped': self.peets_status = 'Stopped' for job in self.scheduler.get_jobs(): self.scheduler.unschedule_job(job) self.scheduler.shutdown(wait = True) self.scheduler = None def datagramReceived(self, data, (host, port)): '''Intercept the webrtc traffice from the local front end and relay it to the NDN Args: data (bytes) : the UDP data host (str) : the IP of the source port (int) : the port of the source 1. Differentiate RTP vs RTCP RTCP: packet type (PT) = 200 - 208 SR (sender report) 200 RR (receiver report) 201 SDES (source description) 202 BYE (goodbye) 203 App (application-defined) 204 other types go until 208 RFC 5761 (implemented by WebRTC) makes sure that RTP's PT field plus M field (which is equal to the PT field in RTCP) would not conflict 2. Differentiate STUN vs RTP & RTCP STUN: the most significant 2 bits of every STUN msg MUST be zeros (RFC 5389) RTP & RTCP: version bits (2 bits) value equals 2 Note: Tried to fake a Stun request and response so that we don't have to relay stun msgs to NDN, but failed. It worked for a time, although will significantly high rate of the STUN message exchanges We need to use the username exchanged in the sdps for stun it worked for a while but magically stopped working, so now we still send it over NDN Note 2: We only publish one medai stream from the local user (with the default offer SDP). We publish RTCP and STUN for each PeerConnections though. ''' # mask to test most significant 2 bits msg = bytearray(data) c = self.factory.client if msg[0] & 0xC0 == 0 or msg[1] > 199 and msg[1] < 209: try: ctrl_seq = c.ctrl_seqs[port] cid = c.remote_cids[port] # RTCP and STUN is for each peerconnection. the cid of remote user is used to identify the peer connection so that remote user knows which one to fetch name = c.local_user.get_ctrl_prefix() + '/' + cid + '/' + str(ctrl_seq) c.ctrl_seqs[port] = ctrl_seq + 1 self.ccnx_con_socket.publish_content(name, data) except KeyError: pass elif c.media_source_port == port: # only publish one media stream name = c.local_user.get_media_prefix() + '/' + str(c.local_seq) c.local_seq += 1 self.ccnx_con_socket.publish_content(name, data)
class SchedulerContainer( DaemonContainer ): def __init__(self, environment): super(Scheduler, self).__init__(environment) gconfig = environment.get("gconfig", {}) options = environment.get("options", {}) self.scheduler = Scheduler(gconfig, **options) def on_start(self): self.scheduler.start() def on_stop(self): self.scheduler.stop() def unschedule_func(self, func): self.scheduler.unschedule_func(func) def unschedule_job(self, job): self.scheduler.unschedule_job(job) def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, args=None, kwargs=None, **options): return self.scheduler.add_interval_job(func=func, weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds, start_date=start_date, args=args, kwargs=kwargs, **options) def add_cron_job(self, func, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, args=None, kwargs=None, **options): return self.scheduler.add_cron_job(func=func, year=year, month=month, day=day, week=week, day_of_week=day_of_week, hour=hour, minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs, **options) def add_date_job(self, func, date, args=None, kwargs=None, **options): return self.scheduler.add_date_job(func=func, date=date, args=args, kwargs=kwargs, **options) def get_jobs(self): return self.scheduler.get_jobs() def add_job(self, trigger, func, args, kwargs, jobstore='default', **options): return self.scheduler.add_job(trigger=trigger, func=func, args=args, kwargs=kwargs, jobstore=jobstore, **options) def add_listener(self, callback, mask): self.scheduler.add_listener(callback, mask) def remove_listener(self, callback): self.scheduler.remove_listener(callback)
class TrainScheduler(object): def __init__(self): logging.basicConfig(level=logging.DEBUG, filename="debug.log", format='%(asctime)s %(levelname)-8s %(message)s', datefmt="%d.%m.%Y %H:%M:%S") self.scheduler = Scheduler() self.scheduler.add_listener(self.checkForDuplicates, apscheduler.events.EVENT_JOBSTORE_JOB_ADDED) self.scheduler.start() if len(self.scheduler.get_jobs()) == 0: self.createInitSchedule() self.log("Initial tasks completed. Waiting for next event..") while True: try: time.sleep(10) #self.scheduler.print_jobs() except KeyboardInterrupt: self.log("Shutting down..") self.scheduler.shutdown() quit() def createInitSchedule(self): self.log("Perform initial query for passenger trains..") self.processPassenger() self.log("Perform initial query for freight trains..") self.processFreight() self.log("Perform initial query for auto trains..") self.processAutotrain() self.log("Creating initial train schedule..") # request passenger trains every hour self.scheduler.add_cron_job(self.processPassenger, hour="*/1", minute="0", day="*", month="*", year="*") # request freight trains every day self.scheduler.add_cron_job(self.processFreight, hour="0", minute="2", day="*", month="*", year="*") # request auto trains every month self.scheduler.add_cron_job(self.processAutotrain, hour="0", minute="5", day="1", month="*", year="*") def processPassenger(self): # return trains for station in question tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID) for train in tReq.getTrainList(): trainTime = train.actualTime if (train.actualTime) else train.scheduledTime trainTimeCheck = trainTime - datetime.timedelta(minutes=CHECKBEFORE) try: self.scheduler.add_date_job(self.checkIfOnTime, trainTimeCheck, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be checked on %s." % (train.name, trainTimeCheck)) except ValueError: try: self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime)) except ValueError: self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime)) def checkIfOnTime(self, remTrain): # return trains for station in question tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID) for train in tReq.getTrainList(): if remTrain.name == train.name: trainTime = train.actualTime if (train.actualTime) else train.scheduledTime try: self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name) self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime)) except ValueError: self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime)) break def processFreight(self): # return trains for station in question freightTrains = freight.FreightTrainRequest(FREIGHT_STATION_ID) for train in freightTrains.getTrainList(): # FIXME: only arrival atm if train.arrival > datetime.datetime.now(): self.log("Schedule freight train '%s' to be displayed on %s." % (train.name, train.arrival)) self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name) else: self.log("Freight train '%s' (%s) already passed." % (train.name, train.arrival)) def processAutotrain(self): # return trains for station in question freightTrains = autotrain.AutoTrainRequest(AUTO_TRAIN_STATION_NAME) for train in freightTrains.getTrainList(): if train.arrival > datetime.datetime.now(): self.log("Schedule auto train '%s' to be displayed on %s." % (train.name, train.arrival)) self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name) else: self.log("Auto train '%s' (%s) already passed." % (train.name, train.arrival)) def checkForDuplicates(self, event): jobs = self.scheduler.get_jobs() if jobs: # events with the same name (train name) and the next "next run time" are duplicates dups = [job for job in jobs if job.name == event.job.name and job.next_run_time == event.job.next_run_time] if len(dups) > 1: self.log("Unscheduling %s." % event.job) self.scheduler.unschedule_job(event.job) def output(self, train): self.log("OUTPUT: %s" % train) f = open(OUTPUT_FILE, "a") f.write("%s\n" % train) f.close() def log(self, message): logging.info("* %s" % message)
class TestOfflineScheduler(object): def setup(self): self.scheduler = Scheduler() def teardown(self): if self.scheduler.running: self.scheduler.shutdown() @raises(KeyError) def test_jobstore_twice(self): self.scheduler.add_jobstore(RAMJobStore(), 'dummy') self.scheduler.add_jobstore(RAMJobStore(), 'dummy') def test_add_tentative_job(self): job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore='dummy') assert isinstance(job, Job) eq_(self.scheduler.get_jobs(), []) def test_configure_jobstore(self): conf = { 'apscheduler.jobstore.ramstore.class': 'apscheduler.jobstores.ram_store:RAMJobStore' } self.scheduler.configure(conf) self.scheduler.remove_jobstore('ramstore') def test_shutdown_offline(self): self.scheduler.shutdown() def test_configure_no_prefix(self): global_options = {'misfire_grace_time': '2', 'daemonic': 'false'} self.scheduler.configure(global_options) eq_(self.scheduler.misfire_grace_time, 1) eq_(self.scheduler.daemonic, True) def test_configure_prefix(self): global_options = { 'apscheduler.misfire_grace_time': 2, 'apscheduler.daemonic': False } self.scheduler.configure(global_options) eq_(self.scheduler.misfire_grace_time, 2) eq_(self.scheduler.daemonic, False) def test_add_listener(self): val = [] self.scheduler.add_listener(val.append) event = SchedulerEvent(EVENT_SCHEDULER_START) self.scheduler._notify_listeners(event) eq_(len(val), 1) eq_(val[0], event) event = SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN) self.scheduler._notify_listeners(event) eq_(len(val), 2) eq_(val[1], event) self.scheduler.remove_listener(val.append) self.scheduler._notify_listeners(event) eq_(len(val), 2) def test_pending_jobs(self): # Tests that pending jobs are properly added to the jobs list when # the scheduler is started (and not before!) self.scheduler.add_date_job(lambda: None, datetime(9999, 9, 9)) eq_(self.scheduler.get_jobs(), []) self.scheduler.start() jobs = self.scheduler.get_jobs() eq_(len(jobs), 1)
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in a :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Errors in the ab-initio code Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError. The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. The flow tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases: #. The number of python exceptions is > max_num_pyexcs #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, the scheduler will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy") Error = PyFlowSchedulerError @classmethod def autodoc(cls): i = cls.__init__.__doc__.index("Args:") return cls.__init__.__doc__[i + 5:] def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait (DEFAULT: 0). days: number of days to wait (DEFAULT: 0). hours: number of hours to wait (DEFAULT: 0). minutes: number of minutes to wait (DEFAULT: 0). seconds: number of seconds to wait (DEFAULT: 0). mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds. (DEFAULT: None i.e. not used). verbose: (int) verbosity level. (DEFAULT: 0) use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from file before launching the jobs. (DEFAULT: "no") max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200) remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds. (int, DEFAULT: 1 day). max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs (int, DEFAULT: 0) max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs (int, DEFAULT: 0) safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5) max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler. (DEFAULT: -1 i.e. no limit) debug: Debug level. Use 0 for production (int, DEFAULT: 0) fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes") rmflow: If "yes", the scheduler will remove the flow directory if the calculation completed successfully. (DEFAULT: "no") killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs before exiting due to an error. (DEFAULT: "yes") """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = as_bool( kwargs.pop("use_dynamic_manager", False)) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.max_ncores_used = kwargs.pop("max_ncores_used", None) self.contact_resource_manager = as_bool( kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True)) self.rmflow = as_bool(kwargs.pop("rmflow", False)) self.killjobs_if_errors = as_bool( kwargs.pop("killjobs_if_errors", True)) self.customer_service_dir = kwargs.pop("customer_service_dir", None) if self.customer_service_dir is not None: self.customer_service_dir = Directory(self.customer_service_dir) self._validate_customer_service() if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: logger.warning("Using scheduler v>=3.0.0") from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "rt") as fh: return cls(**yaml.safe_load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.safe_load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: `RuntimeError` if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error( "Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) if self.flow is not None: app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" try: return self._flow except AttributeError: return None @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """ Add an :class:`Flow` flow to the scheduler. """ if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") # Check if we are already using a scheduler to run this flow flow.check_pid_file() flow.set_spectator_mode(False) # Build dirs and files (if not yet done) flow.build() with open(flow.pid_file, "wt") as fh: fh.write(str(self.pid)) self._pid_file = flow.pid_file self._flow = flow def _validate_customer_service(self): """ Validate input parameters if customer service is on then create directory for tarball files with correct premissions for user and group. """ direc = self.customer_service_dir if not direc.exists: mode = 0o750 print("Creating customer_service_dir %s with mode %s" % (direc, mode)) direc.makedirs() os.chmod(direc.path, mode) if self.mailto is None: raise RuntimeError( "customer_service_dir requires mailto option in scheduler.yml") def _do_customer_service(self): """ This method is called before the shutdown of the scheduler. If customer_service is on and the flow didn't completed successfully, a lightweight tarball file with inputs and the most important output files is created in customer_servide_dir. """ if self.customer_service_dir is None: return doit = self.exceptions or not self.flow.all_ok doit = True if not doit: return prefix = os.path.basename(self.flow.workdir) + "_" import tempfile, datetime suffix = str(datetime.datetime.now()).replace(" ", "-") # Remove milliseconds i = suffix.index(".") if i != -1: suffix = suffix[:i] suffix += ".tar.gz" #back = os.getcwd() #os.chdir(self.customer_service_dir.path) _, tmpname = tempfile.mkstemp(suffix="_" + suffix, prefix=prefix, dir=self.customer_service_dir.path, text=False) print("Dear customer,\n We are about to generate a tarball in\n %s" % tmpname) self.flow.make_light_tarfile(name=tmpname) #os.chdir(back) def start(self): """ Starts the scheduler in a new thread. Returns 0 if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if not has_apscheduler: raise RuntimeError("Install apscheduler with pip") if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return 1 # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email( msg= "Error while trying to run the flow for the first time!\n %s" % self.exceptions) return 1 try: self.sched.start() return 0 except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno( "Do you want to cancel all the jobs in the queue? [Y/n]"): print("Number of jobs cancelled:", self.flow.cancel()) self.flow.pickle_dump() return -1 def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s, returning" % nqjobs) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_use:d %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info( "Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! s = straceback() self.exceptions.append(s) # This is useful when debugging #try: # print("Exception in callback, will cancel all tasks") # for task in self.flow.iflat_tasks(): # task.cancel() #except Exception: # pass self.shutdown(msg="Exception raised in callback!\n" + s) def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append( "No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append( "No task is running and cannot find other tasks to submit." ) # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint( "killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint( "Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError as exc: logger.critical("Could not remove pid_file: %s", exc) # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on: %s" % time.asctime()) self.history.append("Elapsed time: %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "wt") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow %s didn't complete successfully" % repr(self.flow.workdir)) app("use `abirun.py FLOWDIR debug` to analyze the problem.") app("Shutdown message:\n%s" % msg) print("") print("\n".join(lines)) print("") self._do_customer_service() if self.flow.all_ok: print("Calling flow.finalize()...") self.flow.finalize() #print("finalized:", self.flow.finalized) if self.rmflow: app("Flow directory will be removed...") try: self.flow.rmtree() except Exception: logger.warning( "Ignoring exception while trying to remove flow dir." ) finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('This should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() if not has_sched_v3: for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on: %s" % time.ctime(self.start_time)) app("Completed on: %s" % time.asctime()) app("Elapsed time: %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class PyFlowScheduler(object): """ This object schedules the submission of the tasks in an :class:`Flow`. There are two types of errors that might occur during the execution of the jobs: #. Python exceptions #. Abinit Errors. Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError. The set of Abinit Errors is much much broader. It includes wrong input data, segmentation faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases but there's still a lot of room for improvement. Note, in particular, that `PyFlowScheduler` will shutdown automatically if #. The number of python exceptions is > MAX_NUM_PYEXC #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks). #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds. If the mail cannot be sent, it will shutdown automatically. This check prevents the scheduler from being trapped in an infinite loop. """ # Configuration file. YAML_FILE = "scheduler.yml" USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy") Error = PyFlowSchedulerError def __init__(self, **kwargs): """ Args: weeks: number of weeks to wait days: number of days to wait hours: number of hours to wait minutes: number of minutes to wait seconds: number of seconds to wait verbose: (int) verbosity level max_njobs_inque: Limit on the number of jobs that can be present in the queue use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from file before launching the jobs. Default: False max_nlaunches: Maximum number of tasks launched by radpifire (default -1 i.e. no limit) """ # Options passed to the scheduler. self.sched_options = AttrDict( weeks=kwargs.pop("weeks", 0), days=kwargs.pop("days", 0), hours=kwargs.pop("hours", 0), minutes=kwargs.pop("minutes", 0), seconds=kwargs.pop("seconds", 0), #start_date=kwargs.pop("start_date", None), ) if all(not v for v in self.sched_options.values()): raise self.Error("Wrong set of options passed to the scheduler.") self.mailto = kwargs.pop("mailto", None) self.verbose = int(kwargs.pop("verbose", 0)) self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False) self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200) self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False)) self.remindme_s = float(kwargs.pop("remindme_s", 4 * 24 * 3600)) self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0)) self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0)) self.safety_ratio = int(kwargs.pop("safety_ratio", 5)) #self.max_etime_s = kwargs.pop("max_etime_s", ) self.max_nlaunches = kwargs.pop("max_nlaunches", -1) self.debug = kwargs.pop("debug", 0) if kwargs: raise self.Error("Unknown arguments %s" % kwargs) if has_sched_v3: from apscheduler.schedulers.blocking import BlockingScheduler self.sched = BlockingScheduler() else: from apscheduler.scheduler import Scheduler self.sched = Scheduler(standalone=True) self.nlaunch = 0 self.num_reminders = 1 # Used to keep track of the exceptions raised while the scheduler is running self.exceptions = collections.deque(maxlen=self.max_num_pyexcs + 10) # Used to push additional info during the execution. self.history = collections.deque(maxlen=100) @classmethod def from_file(cls, filepath): """Read the configuration parameters from a Yaml file.""" with open(filepath, "r") as fh: return cls(**yaml.load(fh)) @classmethod def from_string(cls, s): """Create an istance from string s containing a YAML dictionary.""" stream = cStringIO(s) stream.seek(0) return cls(**yaml.load(stream)) @classmethod def from_user_config(cls): """ Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'. Search first in the working directory and then in the configuration directory of abipy. Raises: RuntimeError if file is not found. """ # Try in the current directory. path = os.path.join(os.getcwd(), cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) # Try in the configuration directory. path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE) if os.path.exists(path): return cls.from_file(path) raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path)) def __str__(self): """String representation.""" lines = [self.__class__.__name__ + ", Pid: %d" % self.pid] app = lines.append app("Scheduler options: %s" % str(self.sched_options)) app(80 * "=") app(str(self.flow)) return "\n".join(lines) @property def pid(self): """The pid of the process associated to the scheduler.""" try: return self._pid except AttributeError: self._pid = os.getpid() return self._pid @property def pid_file(self): """ Absolute path of the file with the pid. The file is located in the workdir of the flow """ return self._pid_file @property def flow(self): """`Flow`.""" return self._flow @property def num_excs(self): """Number of exceptions raised so far.""" return len(self.exceptions) def get_delta_etime(self): """Returns a `timedelta` object representing with the elapsed time.""" return timedelta(seconds=(time.time() - self.start_time)) def add_flow(self, flow): """Add an :class:`Flow` flow to the scheduler.""" if hasattr(self, "_flow"): raise self.Error("Only one flow can be added to the scheduler.") pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid") if os.path.isfile(pid_file): flow.show_status() raise self.Error("""\ pid_file %s already exists There are two possibilities: 1) There's an another instance of PyFlowScheduler running 2) The previous scheduler didn't exit in a clean way To solve case 1: Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file) Then you can restart the new scheduler. To solve case 2: Remove the pid_file and restart the scheduler. Exiting""" % pid_file) with open(pid_file, "w") as fh: fh.write(str(self.pid)) self._pid_file = pid_file self._flow = flow def start(self): """ Starts the scheduler in a new thread. Returns True if success. In standalone mode, this method will block until there are no more scheduled jobs. """ self.history.append("Started on %s" % time.asctime()) self.start_time = time.time() if has_sched_v3: self.sched.add_job(self.callback, "interval", **self.sched_options) else: self.sched.add_interval_job(self.callback, **self.sched_options) errors = self.flow.look_before_you_leap() if errors: self.exceptions.append(errors) return False # Try to run the job immediately. If something goes wrong return without initializing the scheduler. self._runem_all() if self.exceptions: self.cleanup() self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions) return False try: self.sched.start() return True except KeyboardInterrupt: self.shutdown(msg="KeyboardInterrupt from user") if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"): self.flow.cancel() self.flow.pickle_dump() return False def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinitio.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: logger.info("Too many jobs in the queue, returning") return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status and print it. flow.check_status(show=False) # fix problems # Try to restart the unconverged tasks # todo donot fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # move here from withing rapid fire ... # fix only prepares for restarting, and sets to ready nfixed = flow.fix_abi_critical() if nfixed: print("Fixed %d AbiCritical errors" % nfixed) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters if False: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QueueCritical errors" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs) def callback(self): """The function that will be executed by the scheduler.""" try: return self._callback() except: # All exceptions raised here will trigger the shutdown! self.exceptions.append(straceback()) self.shutdown(msg="Exception raised in callback!") def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.max_etime_s: # err_msg += "\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_msg += boxed(msg) deadlocked, runnables, running = self.flow.deadlocked_runnables_running() #print("\ndeadlocked:\n", deadlocked, "\nrunnables:\n", runnables, "\nrunning\n", running) if deadlocked and not runnables and not running: msg = "No runnable job with deadlocked tasks:\n %s\nWill shutdown the scheduler and exit" % str(deadlocked) err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions) def cleanup(self): """Cleanup routine: remove the pid file and save the pickle database""" try: os.remove(self.pid_file) except OSError: logger.critical("Could not remove pid_file") # Save the final status of the flow. self.flow.pickle_dump() def shutdown(self, msg): """Shutdown the scheduler.""" try: self.cleanup() self.history.append("Completed on %s" % time.asctime()) self.history.append("Elapsed time %s" % self.get_delta_etime()) if self.debug: print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds()) retcode = self.send_email(msg) if self.debug: print("send_mail retcode", retcode) # Write file with the list of exceptions: if self.exceptions: dump_file = os.path.join(self.flow.workdir, "_exceptions") with open(dump_file, "w") as fh: fh.writelines(self.exceptions) fh.write("Shutdown message:\n%s" % msg) lines = [] app = lines.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) if self.flow.all_ok: app("Flow completed successfully") else: app("Flow didn't complete successfully") app("Shutdown message:\n%s" % msg) print("\n".join(lines)) finally: # Shutdown the scheduler thus allowing the process to exit. logger.debug('this should be the shutdown of the scheduler') # Unschedule all the jobs before calling shutdown #self.sched.print_jobs() for job in self.sched.get_jobs(): self.sched.unschedule_job(job) #self.sched.print_jobs() self.sched.shutdown() # Uncomment the line below if shutdown does not work! #os.system("kill -9 %d" % os.getpid()) def send_email(self, msg, tag=None): """ Send an e-mail before completing the shutdown. Returns 0 if success. """ try: return self._send_email(msg, tag) except: self.exceptions.append(straceback()) return -2 def _send_email(self, msg, tag): if self.mailto is None: return -1 header = msg.splitlines() app = header.append app("Submitted on %s" % time.ctime(self.start_time)) app("Completed on %s" % time.asctime()) app("Elapsed time %s" % str(self.get_delta_etime())) app("Number of errored tasks: %d" % self.flow.num_errored_tasks) app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks) strio = cStringIO() strio.writelines("\n".join(header) + 4 * "\n") # Add the status of the flow. self.flow.show_status(stream=strio) if self.exceptions: # Report the list of exceptions. strio.writelines(self.exceptions) if tag is None: tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]" return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
class TestJobExecution(object): def setup(self): self.scheduler = Scheduler(threadpool=FakeThreadPool()) self.scheduler.add_jobstore(RAMJobStore(), 'default') # Make the scheduler think it's running self.scheduler._thread = FakeThread() self.logstream = StringIO() self.loghandler = StreamHandler(self.logstream) self.loghandler.setLevel(ERROR) scheduler.logger.addHandler(self.loghandler) def teardown(self): scheduler.logger.removeHandler(self.loghandler) if scheduler.datetime == FakeDateTime: scheduler.datetime = datetime FakeDateTime._now = original_now @raises(TypeError) def test_noncallable(self): date = datetime.now() + timedelta(days=1) self.scheduler.add_date_job('wontwork', date) def test_job_name(self): def my_job(): pass job = self.scheduler.add_interval_job(my_job, start_date=datetime(2010, 5, 19)) eq_( repr(job), '<Job (name=my_job, ' 'trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), ' 'start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>') def test_schedule_object(self): # Tests that any callable object is accepted (and not just functions) class A: def __init__(self): self.val = 0 def __call__(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_schedule_method(self): # Tests that bound methods can be scheduled (at least with RAMJobStore) class A: def __init__(self): self.val = 0 def method(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a.method, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_unschedule_job(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_cron_job(increment) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) self.scheduler.unschedule_job(job) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) def test_unschedule_func(self): def increment(): vals[0] += 1 def increment2(): vals[0] += 1 vals = [0] job1 = self.scheduler.add_cron_job(increment) job2 = self.scheduler.add_cron_job(increment2) job3 = self.scheduler.add_cron_job(increment) eq_(self.scheduler.get_jobs(), [job1, job2, job3]) self.scheduler.unschedule_func(increment) eq_(self.scheduler.get_jobs(), [job2]) @raises(KeyError) def test_unschedule_func_notfound(self): self.scheduler.unschedule_func(copy) def test_job_finished(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_interval_job(increment, max_runs=1) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [1]) assert job not in self.scheduler.get_jobs() def test_job_exception(self): def failure(): raise DummyException job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9)) self.scheduler._process_jobs(job.next_run_time) assert 'DummyException' in self.logstream.getvalue() def test_misfire_grace_time(self): self.scheduler.misfire_grace_time = 3 job = self.scheduler.add_interval_job(lambda: None, seconds=1) eq_(job.misfire_grace_time, 3) job = self.scheduler.add_interval_job(lambda: None, seconds=1, misfire_grace_time=2) eq_(job.misfire_grace_time, 2) def test_coalesce_on(self): # Makes sure that the job is only executed once when it is scheduled # to be executed twice in a row def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job(increment, seconds=1, start_date=FakeDateTime.now(), coalesce=True, misfire_grace_time=2) # Turn the clock 14 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 1) eq_(len(events), 1) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(vals, [1]) def test_coalesce_off(self): # Makes sure that every scheduled run for the job is executed even # when they are in the past (but still within misfire_grace_time) def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job(increment, seconds=1, start_date=FakeDateTime.now(), coalesce=False, misfire_grace_time=2) # Turn the clock 2 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 3) eq_(len(events), 3) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(events[1].code, EVENT_JOB_EXECUTED) eq_(events[2].code, EVENT_JOB_EXECUTED) eq_(vals, [3]) def test_interval(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_interval_job(increment, seconds=1, args=[2]) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [4, 2]) def test_interval_schedule(self): @self.scheduler.interval_schedule(seconds=1) def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [2]) def test_cron(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_cron_job(increment, args=[3]) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vals, [3, 1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [6, 2]) self.scheduler._process_jobs(start + timedelta(seconds=2)) eq_(vals, [9, 3]) def test_cron_schedule_1(self): @self.scheduler.cron_schedule() def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals[0], 2) def test_cron_schedule_2(self): @self.scheduler.cron_schedule(minute='*') def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time next_run = start + timedelta(seconds=60) eq_(increment.job.get_run_times(next_run), [start, next_run]) self.scheduler._process_jobs(start) self.scheduler._process_jobs(next_run) eq_(vals[0], 2) def test_date(self): def append_val(value): vals.append(value) vals = [] date = datetime.now() + timedelta(seconds=1) self.scheduler.add_date_job(append_val, date, kwargs={'value': 'test'}) self.scheduler._process_jobs(date) eq_(vals, ['test']) def test_print_jobs(self): out = StringIO() self.scheduler.print_jobs(out) expected = 'Jobstore default:%s'\ ' No scheduled jobs%s' % (os.linesep, os.linesep) eq_(out.getvalue(), expected) self.scheduler.add_date_job(copy, datetime(2200, 5, 19)) out = StringIO() self.scheduler.print_jobs(out) expected = 'Jobstore default:%s '\ 'copy (trigger: date[2200-05-19 00:00:00], '\ 'next run at: 2200-05-19 00:00:00)%s' % (os.linesep, os.linesep) eq_(out.getvalue(), expected) def test_jobstore(self): self.scheduler.add_jobstore(RAMJobStore(), 'dummy') job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore='dummy') eq_(self.scheduler.get_jobs(), [job]) self.scheduler.remove_jobstore('dummy') eq_(self.scheduler.get_jobs(), []) @raises(KeyError) def test_remove_nonexistent_jobstore(self): self.scheduler.remove_jobstore('dummy2') def test_job_next_run_time(self): # Tests against bug #5 def increment(): vars[0] += 1 vars = [0] scheduler.datetime = FakeDateTime job = self.scheduler.add_interval_job(increment, seconds=1, misfire_grace_time=3, start_date=FakeDateTime.now()) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vars, [2])
class HouseControl(object): __scheduler = None __heatingStatusBean = None busJobsQueue = Queue.Queue() busWorkerThread = BusWorker(busJobsQueue) def __init__(self): self.logger = logging.getLogger(APPLICATION_LOGGER_NAME) self.logger.info("HouseControl starting...") configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION) #Initialize HeatingStatusBean self.__initalizeHeatingStatusBean(configurationReader) #Initialize Scheduler self.__initializeScheduler(configurationReader) #Initialize BusQueueWorker self.busWorkerThread.setDaemon(True) self.busWorkerThread.start() self.logger.info("HouseControl started.") def __initalizeHeatingStatusBean(self, configurationReader): #HeatingStatusBean self.__heatingStatusBean = HeatingStatusBean.HeatingStatusBean() #Configure Bean self.updateHeatingStatusBeanConfiguration(configurationReader) #Add ChangeListener self.__heatingStatusBean.addChangeListener(HeatingControlService.HeatingControlService(self)) self.__heatingStatusBean.addChangeListener(HeatingSwitchService.HeatingSwitchService(self)) ##self.__heatingStatusBean.addChangeListener(HeatingMonitorService.HeatingMonitorService(self)) self.logger.info("HeatingStatusBean configured.") def __initializeScheduler(self, configurationReader): #Scheduler self.__scheduler = Scheduler() self.__scheduler.configure(standalone=True) self.__scheduler.add_listener(schedulerListener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) #SchedulerTasks #TemperaturFeedService, TemperatureLogService, MixerControlService self.__loadBaseSchedulerTasks() self.__scheduler.start() #Benutzerdefinierte Schaltzeiten self.loadUserSchedulerTasks(configurationReader) self.logger.info("Scheduler started.") def getHeatingStatusBean(self): return self.__heatingStatusBean def getScheduler(self): return self.__scheduler def __loadBaseSchedulerTasks(self): temperatureFeedService = TemperatureFeedService.TemperatureFeedService(self) temperatureLogService = TemperatureLogService.TemperatureLogService(self) mixerControlService = MixerControlService.MixerControlService(self) #TemperaturFeedService job = self.__scheduler.add_interval_job(temperatureFeedService.run, seconds=INTERVALL_UPDATE_TEMPERATURE) job.name = SCHEDULE_SERVICE_TEMPERATURE_UPDATER self.logger.info("Scheduler-Job [" + job.name + "] loaded.") #TemperatureLogService job = self.__scheduler.add_interval_job(temperatureLogService.run, seconds=INTERVALL_LOG_TEMPERATURE) job.name = SCHEDULE_SERVICE_TEMPERATURE_LOGGER self.logger.info("Scheduler-Job [" + job.name + "] loaded.") #MixerControlService job = self.__scheduler.add_interval_job(mixerControlService.run, seconds=INTERVALL_UPDATE_MIXER) job.name = SCHEDULE_SERVICE_TEMPERATURE_MIXERCONTROL self.logger.info("Scheduler-Job [" + job.name + "] loaded.") def updateHeatingStatusBeanConfiguration(self, configurationReader): temperatures = configurationReader.temperatures self.__heatingStatusBean.setUpperFloorFlowTargetTemperature(float(temperatures.get('ogv'))) self.__heatingStatusBean.setGroundFloorFlowTargetTemperature(float(temperatures.get('egv'))) self.__heatingStatusBean.setWaterTargetTemperature(float(temperatures.get('hotwater'))) def reloadUserSchedulerTasks(self): self.removeUserSchedulerTasks() configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION) self.updateHeatingStatusBeanConfiguration(configurationReader) self.loadUserSchedulerTasks(configurationReader) def removeUserSchedulerTasks(self): prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) jobList = self.__scheduler.get_jobs() for job in jobList: jobName = job.name if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX): self.logger.info("Scheduler-Job [" + job.name + "] removed.") self.__scheduler.unschedule_job(job) def loadUserSchedulerTasks(self, configurationReader): baseCronSched = {'year':None, 'month':None, 'day':None, 'week':None, 'day_of_week':None, 'hour':None, 'minute':None, 'second':None, 'start_date':None} for task in configurationReader.heatingTasks: schedType = task.get('schedule').get('type') if(schedType == 'cron'): cronSched = baseCronSched.copy() cronSched.update(task.get('schedule')) cronSched.pop('type') if(task.get('type') == 'changeHeatingStatus'): taskFunction = self.__heatingStatusBean.setHeatingStatusMap job = self.__scheduler.add_cron_job(taskFunction, year=cronSched['year'], month=cronSched['month'], day=cronSched['day'], week=cronSched['week'], day_of_week=cronSched['day_of_week'], hour=cronSched['hour'], minute=cronSched['minute'], second=cronSched['second'], start_date=cronSched['start_date'], args=[task.get('status')]) n = SERVICE_HEATING_ACTION_PREFIX + str(task.get('name')) job.name = n prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) jobList = self.__scheduler.get_jobs() for job in jobList: jobName = job.name if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX): self.logger.info("Scheduler-Job [" + jobName + "] loaded.")
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' APS_CONFIG = { 'threadpool.core_threads': 3, 'coalesce': True, 'standalone': False } def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, cluster_configuration, config, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir)) self._collector = AlertCollector() self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) self.__in_minutes = in_minutes self.config = config # register python exit handler atexit.register(self.exit_handler) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions") return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. :return: """ definitions = [] all_commands = None alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json['clusterName'] hostName = '' if not 'hostName' in command_json else command_json['hostName'] for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition))) alert = None if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) if alert is not None: alert.set_cluster(clusterName, hostName) return alert def schedule_definition(self,definition): """ Schedule a definition (callable). Scheduled jobs are given the UUID as their name so that they can be identified later on. <p/> This function can be called with a definition that is disabled; it will simply NOOP. """ # NOOP if the definition is disabled; don't schedule it if not definition.is_enabled(): logger.info("[AlertScheduler] The alert {0} with UUID {1} is disabled and will not be scheduled".format( definition.get_name(),definition.get_uuid())) return job = None if self.__in_minutes: job = self.__scheduler.add_interval_job(self.__make_function(definition), minutes=definition.interval()) else: job = self.__scheduler.add_interval_job(self.__make_function(definition), seconds=definition.interval()) # although the documentation states that Job(kwargs) takes a name # key/value pair, it does not actually set the name; do it manually if job is not None: job.name = definition.get_uuid() logger.info("[AlertScheduler] Scheduling {0} with UUID {1}".format( definition.get_name(), definition.get_uuid())) def get_job_count(self): """ Gets the number of jobs currently scheduled. This is mainly used for test verification of scheduling. """ if self.__scheduler is None: return 0 return len(self.__scheduler.get_jobs()) def execute_alert(self, execution_commands): """ Executes an alert immediately, ignoring any scheduled jobs. The existing jobs remain untouched. The result of this is stored in the alert collector for tranmission during the next heartbeat """ if self.__scheduler is None or execution_commands is None: return for execution_command in execution_commands: try: alert_definition = execution_command['alertDefinition'] clusterName = '' if not 'clusterName' in execution_command else execution_command['clusterName'] hostName = '' if not 'hostName' in execution_command else execution_command['hostName'] alert = self.__json_to_callable(clusterName, hostName, alert_definition) if alert is None: continue logger.info("[AlertScheduler] Executing on-demand alert {0} ({1})".format(alert.get_name(), alert.get_uuid())) alert.set_helpers(self._collector, self._cluster_configuration) alert.collect() except: logger.exception("[AlertScheduler] Unable to execute the alert outside of the job scheduler")
class TestJobExecution(object): def setup(self): self.scheduler = Scheduler(threadpool=FakeThreadPool()) self.scheduler.add_jobstore(RAMJobStore(), "default") # Make the scheduler think it's running self.scheduler._thread = FakeThread() self.logstream = StringIO() self.loghandler = StreamHandler(self.logstream) self.loghandler.setLevel(ERROR) scheduler.logger.addHandler(self.loghandler) def teardown(self): scheduler.logger.removeHandler(self.loghandler) if scheduler.datetime == FakeDateTime: scheduler.datetime = datetime FakeDateTime._now = original_now def test_job_name(self): def my_job(): pass job = self.scheduler.add_interval_job(my_job, start_date=datetime(2010, 5, 19)) eq_( repr(job), "<Job (name=my_job, trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), " "start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>", ) def test_schedule_object(self): # Tests that any callable object is accepted (and not just functions) class A: def __init__(self): self.val = 0 def __call__(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_schedule_method(self): # Tests that bound methods can be scheduled (at least with RAMJobStore) class A: def __init__(self): self.val = 0 def method(self): self.val += 1 a = A() job = self.scheduler.add_interval_job(a.method, seconds=1) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(a.val, 2) def test_unschedule_job(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_cron_job(increment) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) self.scheduler.unschedule_job(job) self.scheduler._process_jobs(job.next_run_time) eq_(vals[0], 1) def test_unschedule_func(self): def increment(): vals[0] += 1 def increment2(): vals[0] += 1 vals = [0] job1 = self.scheduler.add_cron_job(increment) job2 = self.scheduler.add_cron_job(increment2) job3 = self.scheduler.add_cron_job(increment) eq_(self.scheduler.get_jobs(), [job1, job2, job3]) self.scheduler.unschedule_func(increment) eq_(self.scheduler.get_jobs(), [job2]) @raises(KeyError) def test_unschedule_func_notfound(self): self.scheduler.unschedule_func(copy) def test_job_finished(self): def increment(): vals[0] += 1 vals = [0] job = self.scheduler.add_interval_job(increment, max_runs=1) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [1]) assert job not in self.scheduler.get_jobs() def test_job_exception(self): def failure(): raise DummyException job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9)) self.scheduler._process_jobs(job.next_run_time) assert "DummyException" in self.logstream.getvalue() def test_misfire_grace_time(self): self.scheduler.misfire_grace_time = 3 job = self.scheduler.add_interval_job(lambda: None, seconds=1) eq_(job.misfire_grace_time, 3) job = self.scheduler.add_interval_job(lambda: None, seconds=1, misfire_grace_time=2) eq_(job.misfire_grace_time, 2) def test_coalesce_on(self): # Makes sure that the job is only executed once when it is scheduled # to be executed twice in a row def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job( increment, seconds=1, start_date=FakeDateTime.now(), coalesce=True, misfire_grace_time=2 ) # Turn the clock 14 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 1) eq_(len(events), 1) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(vals, [1]) def test_coalesce_off(self): # Makes sure that every scheduled run for the job is executed even # when they are in the past (but still within misfire_grace_time) def increment(): vals[0] += 1 vals = [0] events = [] scheduler.datetime = FakeDateTime self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED) job = self.scheduler.add_interval_job( increment, seconds=1, start_date=FakeDateTime.now(), coalesce=False, misfire_grace_time=2 ) # Turn the clock 2 seconds forward FakeDateTime._now += timedelta(seconds=2) self.scheduler._process_jobs(FakeDateTime.now()) eq_(job.runs, 3) eq_(len(events), 3) eq_(events[0].code, EVENT_JOB_EXECUTED) eq_(events[1].code, EVENT_JOB_EXECUTED) eq_(events[2].code, EVENT_JOB_EXECUTED) eq_(vals, [3]) def test_interval(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_interval_job(increment, seconds=1, args=[2]) self.scheduler._process_jobs(job.next_run_time) self.scheduler._process_jobs(job.next_run_time) eq_(vals, [4, 2]) def test_interval_schedule(self): @self.scheduler.interval_schedule(seconds=1) def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [2]) def test_cron(self): def increment(amount): vals[0] += amount vals[1] += 1 vals = [0, 0] job = self.scheduler.add_cron_job(increment, args=[3]) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vals, [3, 1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals, [6, 2]) self.scheduler._process_jobs(start + timedelta(seconds=2)) eq_(vals, [9, 3]) def test_cron_schedule_1(self): @self.scheduler.cron_schedule() def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time self.scheduler._process_jobs(start) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vals[0], 2) def test_cron_schedule_2(self): @self.scheduler.cron_schedule(minute="*") def increment(): vals[0] += 1 vals = [0] start = increment.job.next_run_time next_run = start + timedelta(seconds=60) eq_(increment.job.get_run_times(next_run), [start, next_run]) self.scheduler._process_jobs(start) self.scheduler._process_jobs(next_run) eq_(vals[0], 2) def test_date(self): def append_val(value): vals.append(value) vals = [] date = datetime.now() + timedelta(seconds=1) self.scheduler.add_date_job(append_val, date, kwargs={"value": "test"}) self.scheduler._process_jobs(date) eq_(vals, ["test"]) def test_print_jobs(self): out = StringIO() self.scheduler.print_jobs(out) expected = "Jobstore default:%s" " No scheduled jobs%s" % (os.linesep, os.linesep) eq_(out.getvalue(), expected) self.scheduler.add_date_job(copy, datetime(2200, 5, 19)) out = StringIO() self.scheduler.print_jobs(out) expected = ( "Jobstore default:%s " "copy (trigger: date[2200-05-19 00:00:00], " "next run at: 2200-05-19 00:00:00)%s" % (os.linesep, os.linesep) ) eq_(out.getvalue(), expected) def test_jobstore(self): self.scheduler.add_jobstore(RAMJobStore(), "dummy") job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore="dummy") eq_(self.scheduler.get_jobs(), [job]) self.scheduler.remove_jobstore("dummy") eq_(self.scheduler.get_jobs(), []) @raises(KeyError) def test_remove_nonexistent_jobstore(self): self.scheduler.remove_jobstore("dummy2") def test_job_next_run_time(self): # Tests against bug #5 def increment(): vars[0] += 1 vars = [0] scheduler.datetime = FakeDateTime job = self.scheduler.add_interval_job(increment, seconds=1, misfire_grace_time=3, start_date=FakeDateTime.now()) start = job.next_run_time self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start) eq_(vars, [1]) self.scheduler._process_jobs(start + timedelta(seconds=1)) eq_(vars, [2])
class TestOfflineScheduler(object): def setup(self): self.scheduler = Scheduler() def teardown(self): if self.scheduler.running: self.scheduler.shutdown() @raises(KeyError) def test_jobstore_twice(self): self.scheduler.add_jobstore(RAMJobStore(), "dummy") self.scheduler.add_jobstore(RAMJobStore(), "dummy") def test_add_tentative_job(self): job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore="dummy") assert isinstance(job, Job) eq_(self.scheduler.get_jobs(), []) def test_add_job_by_reference(self): job = self.scheduler.add_date_job("copy:copy", datetime(2200, 7, 24)) eq_(job.func, copy) eq_(job.func_ref, "copy:copy") def test_configure_jobstore(self): conf = {"apscheduler.jobstore.ramstore.class": "apscheduler.jobstores.ram_store:RAMJobStore"} self.scheduler.configure(conf) self.scheduler.remove_jobstore("ramstore") def test_shutdown_offline(self): self.scheduler.shutdown() def test_configure_no_prefix(self): global_options = {"misfire_grace_time": "2", "daemonic": "false"} self.scheduler.configure(global_options) eq_(self.scheduler.misfire_grace_time, 1) eq_(self.scheduler.daemonic, True) def test_configure_prefix(self): global_options = {"apscheduler.misfire_grace_time": 2, "apscheduler.daemonic": False} self.scheduler.configure(global_options) eq_(self.scheduler.misfire_grace_time, 2) eq_(self.scheduler.daemonic, False) def test_add_listener(self): val = [] self.scheduler.add_listener(val.append) event = SchedulerEvent(EVENT_SCHEDULER_START) self.scheduler._notify_listeners(event) eq_(len(val), 1) eq_(val[0], event) event = SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN) self.scheduler._notify_listeners(event) eq_(len(val), 2) eq_(val[1], event) self.scheduler.remove_listener(val.append) self.scheduler._notify_listeners(event) eq_(len(val), 2) def test_pending_jobs(self): # Tests that pending jobs are properly added to the jobs list when # the scheduler is started (and not before!) self.scheduler.add_date_job(lambda: None, datetime(9999, 9, 9)) eq_(self.scheduler.get_jobs(), []) self.scheduler.start() jobs = self.scheduler.get_jobs() eq_(len(jobs), 1)
class TNActionScheduler(TNArchipelPlugin): """ This plugin allows to create scheduled actions. """ def __init__(self, configuration, entity, entry_point_group): """ Initialize the plugin. @type configuration: Configuration object @param configuration: the configuration @type entity: L{TNArchipelEntity} @param entity: the entity that owns the plugin @type entry_point_group: string @param entry_point_group: the group name of plugin entry_point """ TNArchipelPlugin.__init__(self, configuration=configuration, entity=entity, entry_point_group=entry_point_group) self.scheduler = Scheduler() self.scheduler.start() self.database = sqlite3.connect(self.configuration.get( "SCHEDULER", "database"), check_same_thread=False) self.database.execute( "create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)" ) self.database.commit() self.cursor = self.database.cursor() self.restore_jobs() self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause") self.supported_actions_for_hypervisor = ("alloc", "free") # permissions self.entity.permission_center.create_permission( "scheduler_jobs", "Authorizes user to get the list of task", False) self.entity.permission_center.create_permission( "scheduler_schedule", "Authorizes user to schedule a task", False) self.entity.permission_center.create_permission( "scheduler_unschedule", "Authorizes user to unschedule a task", False) self.entity.permission_center.create_permission( "scheduler_actions", "Authorizes user to get available actions", False) # hooks if self.entity.__class__.__name__ == "TNArchipelVirtualMachine": self.entity.register_hook("HOOK_VM_TERMINATE", method=self.vm_terminate) ### Plugin interface def register_handlers(self): """ This method will be called by the plugin user when it will be necessary to register module for listening to stanza. """ self.entity.xmppclient.RegisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) def unregister_handlers(self): """ Unregister the handlers. """ self.entity.xmppclient.UnregisterHandler( 'iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER) @staticmethod def plugin_info(): """ Return informations about the plugin. @rtype: dict @return: dictionary contaning plugin informations """ plugin_friendly_name = "Action Scheduler" plugin_identifier = "action_scheduler" plugin_configuration_section = "SCHEDULER" plugin_configuration_tokens = ["database"] return { "common-name": plugin_friendly_name, "identifier": plugin_identifier, "configuration-section": plugin_configuration_section, "configuration-tokens": plugin_configuration_tokens } ### Persistance def delete_job(self, uid): """ Remove a job from the database. @type uid: string @param uid: the uid of the job to remove """ self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, )) self.database.commit() def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None): """ Save a job in the database. @type uid: string @param uid: the uid of the job @type action: string @param action: the action @type year: string @param year: year of execution @type month: string @param month: month of execution @type day: string @param day: day of execution @type hour: string @param hour: hour of execution @type minute: string @param minute: minute of execution @type second: string @param second: second of execution @type comment: string @param comment: comment about the job @type params: string @param params: random parameter of the job """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute( "INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( entity_uid, uid, action, year, month, day, hour, minute, second, comment, params, )) self.database.commit() def restore_jobs(self): """ Restore the jobs from the database. """ entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": entity_uid = self.entity.uuid elif entityClass == "TNArchipelHypervisor": entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid, )) for values in self.cursor: try: entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second) self.scheduler.add_cron_job( self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment]) except Exception as ex: self.entity.log.error("unable to restore a job: %s" % str(ex)) def vm_terminate(self, origin, user_info, arguments): """ Close the database connection. @type origin: TNArchipelEntity @param origin: the origin of the hook @type user_info: object @param user_info: random user information @type arguments: object @param arguments: runtime argument """ self.database.close() ### Jobs def get_jod_with_uid(self, uid): """ Get a job with given uid. @type uid: string @param uid: the uid of the job """ if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: if str(job.args[1]) == uid: return job return None def do_job_for_vm(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "create": self.entity.create() elif action == "shutdown": self.entity.shutdown() elif action == "destroy": self.entity.destroy() elif action == "suspend": self.entity.suspend() elif action == "resume": self.entity.resume() elif action == "pause": if self.entity.libvirt_status == 1: self.entity.suspend() elif self.entity.libvirt_status == 3: self.entity.resume() elif action == "migrate": pass job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") def do_job_for_hypervisor(self, action, uid, str_date, comment, param): """ Perform the job. @type action: string @param action: the action to execute @type uid: string @param uid: the uid of the job @type str_date: string @param str_date: the date of the job @type comment: string @param comment: comment about the job @type param: string @param param: a random parameter to give to job """ if action == "alloc": self.entity.alloc() elif action == "free": pass #self.entity.free() job = self.get_jod_with_uid(uid) if not job or not self.scheduler.is_job_active(job): self.delete_job(uid) self.entity.push_change("scheduler", "jobexecuted") ### Process IQ def process_iq(self, conn, iq): """ This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received. It understands IQ of type: - jobs - schedule - unschedule @type conn: xmpp.Dispatcher @param conn: ths instance of the current connection that send the stanza @type iq: xmpp.Protocol.Iq @param iq: the received IQ """ reply = None action = self.entity.check_acp(conn, iq) self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_") if action == "schedule": reply = self.iq_schedule(iq) elif action == "unschedule": reply = self.iq_unschedule(iq) elif action == "jobs": reply = self.iq_jobs(iq) elif action == "actions": reply = self.iq_actions(iq) if reply: conn.send(reply) raise xmpp.protocol.NodeProcessed def iq_schedule(self, iq): """ Schedule a task. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") job = iq.getTag("query").getTag("archipel").getAttr("job") entityClass = self.entity.__class__.__name__ param = None if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm: raise Exception("action %s is not valid" % job) elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor: raise Exception("action %s is not valid" % job) year = iq.getTag("query").getTag("archipel").getAttr("year") month = iq.getTag("query").getTag("archipel").getAttr("month") day = iq.getTag("query").getTag("archipel").getAttr("day") hour = iq.getTag("query").getTag("archipel").getAttr("hour") minute = iq.getTag("query").getTag("archipel").getAttr("minute") second = iq.getTag("query").getTag("archipel").getAttr("second") comment = iq.getTag("query").getTag("archipel").getAttr("comment") if iq.getTag("query").getTag("archipel").has_attr("param"): param = iq.getTag("query").getTag("archipel").getAttr("param") uid = str(uuid.uuid1()) str_date = "%s-%s-%s @ %s : %02d : %02d" % ( year, month, day, hour, int(minute), int(second)) if entityClass == "TNArchipelVirtualMachine": func = self.do_job_for_vm elif entityClass == "TNArchipelHypervisor": func = self.do_job_for_hypervisor self.scheduler.add_cron_job( func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param]) self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param) self.entity.push_change("scheduler", "scheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_jobs(self, iq): """ Get jobs. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") nodes = [] if hasattr(self.scheduler, "get_jobs"): jobs = self.scheduler.get_jobs() else: jobs = self.scheduler.jobs for job in jobs: job_node = xmpp.Node(tag="job", attrs={ "action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3] }) nodes.append(job_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_unschedule(self, iq): """ Unschedule a job. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") uid = iq.getTag("query").getTag("archipel").getAttr("uid") the_job = self.get_jod_with_uid(uid) if not the_job: raise Exception("job with uid %s doesn't exists" % uid) self.delete_job(uid) self.scheduler.unschedule_job(the_job) self.entity.push_change("scheduler", "unscheduled") except Exception as ex: reply = build_error_iq(self, ex, iq) return reply def iq_actions(self, iq): """ Get available actions. @type iq: xmpp.Protocol.Iq @param iq: the received IQ @rtype: xmpp.Protocol.Iq @return: a ready to send IQ containing the result of the action """ try: reply = iq.buildReply("result") entityClass = self.entity.__class__.__name__ if entityClass == "TNArchipelVirtualMachine": actions = self.supported_actions_for_vm elif entityClass == "TNArchipelHypervisor": actions = self.supported_actions_for_hypervisor nodes = [] for action in actions: action_node = xmpp.Node(tag="action") action_node.setData(action) nodes.append(action_node) reply.setQueryPayload(nodes) except Exception as ex: reply = build_error_iq(self, ex, iq) return reply
class AlertSchedulerHandler(): FILENAME = 'definitions.json' TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, alert_grace_period, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir)) self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': False, 'apscheduler.misfire_grace_time': alert_grace_period } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, heartbeat): """ Updates the persisted alert definitions JSON. :param heartbeat: :return: """ if 'alertDefinitionCommands' not in heartbeat: logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions") return # prune out things we don't want to store alert_definitions = [] for command in heartbeat['alertDefinitionCommands']: command_copy = command.copy() # no need to store these since we always use the in-memory cached values if 'configurations' in command_copy: del command_copy['configurations'] alert_definitions.append(command_copy) # write out the new definitions with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f: json.dump(alert_definitions, f, indent=2) # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid == False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled == False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format( str(jobs_scheduled), str(jobs_removed))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. :return: """ definitions = [] all_commands = None alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME) try: with open(alerts_definitions_path) as fp: all_commands = json.load(fp) except: logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path)) return definitions for command_json in all_commands: clusterName = '' if not 'clusterName' in command_json else command_json['clusterName'] hostName = '' if not 'hostName' in command_json else command_json['hostName'] for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, definition) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, hostName) except Exception,exception: logger.exception("[AlertScheduler] Unable to load an invalid alert definition. It will be skipped.") return alert
class schedulerDaemon(object): def __init__(self): #starting scheduler self.sched = Scheduler() self.sched.start() self.sched.add_listener(job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED) self.recoverySchedulerDaemon() self.newEmulationList =[] #Logger.init() def listJobs(self): schedFileLogger.debug("-> listJobs(self)") schJobsFormat=self.sched.get_jobs() #!have to convert list of jobs from scheduler into list of strings to send over the Pyro 4.20 which has new "serpent serializer" strJobsList=[] if schJobsFormat: for job in self.sched.get_jobs(): strJobsList.append(str(job)) schedFileLogger.debug("sending list of jobs") #[<Job (name=1-1-MEM_EMU-logger interval-3sec., trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-0-MEM_Distro-lookbusy-mem: 100 Duration: 60.0sec. End Time: 10:11:10, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-1-MEM_Distro-lookbusy-mem: 225 Duration: 48.0sec. End Time: 10:11:04, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 16))>)>, <Job (name=1-MEM_EMU-1-2-MEM_Distro-lookbusy-mem: 225 Duration: 36.0sec. End Time: 10:10:58, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 22))>)>, <Job (name=1-MEM_EMU-1-3-MEM_Distro-lookbusy-mem: 225 Duration: 24.0sec. End Time: 10:10:52, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 28))>)>, <Job (name=1-MEM_EMU-1-4-MEM_Distro-lookbusy-mem: 225 Duration: 12.0sec. End Time: 10:10:46, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 34))>)>] return strJobsList else: schedFileLogger.debug("No jobs to send") return [] def stopSchedulerDaemon(self): schedFileLogger.debug("-> stopSchedulerDaemon(self)") schedFileLogger.info("stopping Daemon") sys.exit(1) sys.exit(0) def hello(self): schedFileLogger.debug("-> hello(self)") greeting = "Pong!" schedFileLogger.debug(greeting) return greeting def deleteJobs(self,emulationID,distribitionName): schedFileLogger.debug("-> deleteJobs(self,emulationID,distribitionName)") #stringify emulationID =str(emulationID) distribitionName=str(distribitionName) schedFileLogger.debug("Looking for job name:"+emulationID+"-"+distribitionName) if emulationID=="all": schedFileLogger.info("Jobs deleted:") #setting emulation objects list to initial state self.newEmulationList =[] for job in self.sched.get_jobs(): self.sched.unschedule_job(job) msg = {"Action":"Job Deleted","jobName":job.name} schedFileLogger.info(msg) #schedFileLogger.info(str(job.name)) else: for job in self.sched.get_jobs(): if distribitionName in job.name : self.sched.unschedule_job(job) msg = {"Action":"Job Deleted","jobName":job.name} schedFileLogger.info(msg) #schedFileLogger.info( "Job: "+job.name+" Deleted") else: schedFileLogger.info( "These jobs remain: "+job.name) #Adding current emulation object for further availability def setEmuObject(self,newEmulation): try: #prevent duplicate entries of emulations to be created # print "\nlen emuObject: ", str(len(self.newEmulationList)) n=0 if len(self.newEmulationList) >= 1: for emus in self.newEmulationList: #print "emu compare:",emus.emulationID,newEmulation.emulationID if emus.emulationID==newEmulation.emulationID: #print "FOUND IT" self.newEmulationList.pop(n) n+=1 self.newEmulationList.append(newEmulation) else: self.newEmulationList.append(newEmulation) return True except Exception,e: print e return False
class Scheduler(object): schedulr = None aps3 = True def __init__(self): ##### # ApScheduler version detection try: # APScheduler 3.x implementation from apscheduler.schedulers.background import BackgroundScheduler self.schedulr = BackgroundScheduler() self.aps3 = True except ImportError: # APScheduler 2.x implementation from apscheduler.scheduler import Scheduler self.schedulr = Scheduler() self.aps3 = False def start(self): return self.schedulr.start() def get_job(self, name): if self.aps3: return self.schedulr.get_job(name) else: jobs = self.schedulr.get_jobs() for job in jobs: if job.name == name: return job return None def add_job(self, func, trigger, args=None, kwargs=None, id=None, **trigger_args): if self.aps3: return self.schedulr.add_job(func, trigger, id=id, args=args, kwargs=kwargs, **trigger_args) else: if trigger is 'date': run_date = trigger_args['run_date'] # by intention: to raise if not set! del trigger_args['run_date'] return self.schedulr.add_date_job(func, run_date, name=id, args=args, kwargs=kwargs) elif trigger is 'interval': # only partially implemented!! seconds = 0 minutes = 0 hours = 0 if 'seconds' in trigger_args: seconds = trigger_args.get('seconds', 0) del trigger_args['seconds'] if 'minutes' in trigger_args: minutes = trigger_args.get('minutes', 0) del trigger_args['minutes'] if 'hours' in trigger_args: hours = trigger_args.get('hours', 0) del trigger_args['hours'] return self.schedulr.add_interval_job(func, name=id, hours=hours, minutes=minutes, seconds=seconds, args=args, kwargs=kwargs) elif trigger is 'cron': # only partially implemented!! second = 0 minute = 0 hour = 0 if 'second' in trigger_args: second = trigger_args.get('second', 0) del trigger_args['second'] if 'minute' in trigger_args: minute = trigger_args.get('minute', 0) del trigger_args['minute'] if 'hour' in trigger_args: hour = trigger_args.get('hour', 0) del trigger_args['hour'] return self.schedulr.add_cron_job(func, name=id, hour=hour, minute=minute, second=second) else: raise NotImplementedError def shutdown(self): return self.schedulr.shutdown() # https://github.com/ralphwetzel/theonionbox/issues/19#issuecomment-263110953 def check_tz(self): from tzlocal import get_localzone try: # APScheduler 3.x from apscheduler.util import astimezone except ImportError: # https://github.com/ralphwetzel/theonionbox/issues/31 # APScheduler 2.x # import six from pytz import timezone, utc from datetime import tzinfo # copied here from apscheduler/util.py (version 3.4) # copyright Alex Grönholm # https://github.com/agronholm/apscheduler def astimezone(obj): """ Interprets an object as a timezone. :rtype: tzinfo """ # if isinstance(obj, six.string_types): if isinstance(obj, (str, unicode)): return timezone(obj) if isinstance(obj, tzinfo): if not hasattr(obj, 'localize') or not hasattr(obj, 'normalize'): raise TypeError('Only timezones from the pytz library are supported') if obj.zone == 'local': raise ValueError( 'Unable to determine the name of the local timezone -- you must explicitly ' 'specify the name of the local timezone. Please refrain from using timezones like ' 'EST to prevent problems with daylight saving time. Instead, use a locale based ' 'timezone name (such as Europe/Helsinki).') return obj if obj is not None: raise TypeError('Expected tzinfo, got %s instead' % obj.__class__.__name__) tz = get_localzone() try: res = astimezone(tz) except ValueError as ve: return False return True
class AlertSchedulerHandler(): TYPE_PORT = 'PORT' TYPE_METRIC = 'METRIC' TYPE_AMS = 'AMS' TYPE_SCRIPT = 'SCRIPT' TYPE_WEB = 'WEB' TYPE_RECOVERY = 'RECOVERY' def __init__(self, initializer_module, in_minutes=True): self.initializer_module = initializer_module self.cachedir = initializer_module.config.alerts_cachedir self.stacks_dir = initializer_module.config.stacks_dir self.common_services_dir = initializer_module.config.common_services_dir self.extensions_dir = initializer_module.config.extensions_dir self.host_scripts_dir = initializer_module.config.host_scripts_dir self.configuration_builder = initializer_module.configuration_builder self._cluster_configuration = initializer_module.configurations_cache self.alert_definitions_cache = initializer_module.alert_definitions_cache self.config = initializer_module.config # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int( self.config.get('agent', 'alert_grace_period', 5)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': self.config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.recovery_manger = initializer_module.recovery_manager # register python exit handler ExitHelper().register(self.exit_handler) def _job_context_injector(self, config): """ apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running in embedded mode Please note, this function called in job context thus all injects should be time-running optimized :type config AmbariConfig.AmbariConfig """ if not config.use_system_proxy_setting(): from ambari_commons.network import reconfigure_urllib2_opener reconfigure_urllib2_opener(ignore_system_proxy=True) def exit_handler(self): """ Exit handler """ self.stop() def update_definitions(self, event_type): """ Updates the persisted alert definitions JSON. :return: """ # prune out things we don't want to store alert_definitions = [] for cluster_id, command in self.alert_definitions_cache.iteritems(): command_copy = Utils.get_mutable_copy(command) alert_definitions.append(command_copy) if event_type == "CREATE": # reschedule all jobs, creating new instances self.reschedule_all() else: # reschedule only the jobs that have changed self.reschedule() def __make_function(self, alert_def): return lambda: alert_def.collect() def start(self): """ loads definitions from file and starts the scheduler """ if self.__scheduler is None: return if self.__scheduler.running: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) alert_callables = self.__load_definitions() # schedule each definition for _callable in alert_callables: self.schedule_definition(_callable) logger.info( "[AlertScheduler] Starting {0}; currently running: {1}".format( str(self.__scheduler), str(self.__scheduler.running))) self.__scheduler.start() def stop(self): if not self.__scheduler is None: self.__scheduler.shutdown(wait=False) self.__scheduler = Scheduler(self.APS_CONFIG) logger.info("[AlertScheduler] Stopped the alert scheduler.") def reschedule(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() self.initializer_module.alert_status_reporter.reported_alerts.clear() # for every scheduled job, see if its UUID is still valid for scheduled_job in scheduled_jobs: uuid_valid = False for definition in definitions: definition_uuid = definition.get_uuid() if scheduled_job.name == definition_uuid: uuid_valid = True break # jobs without valid UUIDs should be unscheduled if uuid_valid is False: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, determine if there is a scheduled job for definition in definitions: definition_scheduled = False for scheduled_job in scheduled_jobs: definition_uuid = definition.get_uuid() if definition_uuid == scheduled_job.name: definition_scheduled = True break # if no jobs are found with the definitions UUID, schedule it if definition_scheduled is False: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled" .format(str(jobs_scheduled), str(jobs_removed))) def reschedule_all(self): """ Removes jobs that are scheduled where their UUID no longer is valid. Schedules jobs where the definition UUID is not currently scheduled. """ logger.info("[AlertScheduler] Rescheduling all jobs...") jobs_scheduled = 0 jobs_removed = 0 definitions = self.__load_definitions() scheduled_jobs = self.__scheduler.get_jobs() # unschedule all scheduled jobs for scheduled_job in scheduled_jobs: jobs_removed += 1 logger.info("[AlertScheduler] Unscheduling {0}".format( scheduled_job.name)) self._collector.remove_by_uuid(scheduled_job.name) self.__scheduler.unschedule_job(scheduled_job) # for every definition, schedule a job for definition in definitions: jobs_scheduled += 1 self.schedule_definition(definition) logger.info( "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled" .format(str(jobs_removed), str(jobs_scheduled))) def collector(self): """ gets the collector for reporting to the server """ return self._collector def __load_definitions(self): """ Loads all alert definitions from a file. All clusters are stored in a single file. This wil also populate the cluster-to-hash dictionary. :return: """ definitions = [] for cluster_id, command_json in self.alert_definitions_cache.iteritems( ): clusterName = '' if not 'clusterName' in command_json else command_json[ 'clusterName'] hostName = '' if not 'hostName' in command_json else command_json[ 'hostName'] publicHostName = '' if not 'publicHostName' in command_json else command_json[ 'publicHostName'] clusterHash = None if not 'hash' in command_json else command_json[ 'hash'] # cache the cluster and cluster hash after loading the JSON if clusterName != '' and clusterHash is not None: logger.info( '[AlertScheduler] Caching cluster {0} with alert hash {1}'. format(clusterName, clusterHash)) for definition in command_json['alertDefinitions']: alert = self.__json_to_callable( clusterName, hostName, publicHostName, Utils.get_mutable_copy(definition)) if alert is None: continue alert.set_helpers(self._collector, self._cluster_configuration, self.configuration_builder) definitions.append(alert) return definitions def __json_to_callable(self, clusterName, hostName, publicHostName, json_definition): """ converts the json that represents all aspects of a definition and makes an object that extends BaseAlert that is used for individual """ alert = None try: source = json_definition['source'] source_type = source.get('type', '') if logger.isEnabledFor(logging.DEBUG): logger.debug( "[AlertScheduler] Creating job type {0} with {1}".format( source_type, str(json_definition))) if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_AMS: alert = AmsAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: alert = PortAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir source['extensions_directory'] = self.extensions_dir source['host_scripts_directory'] = self.host_scripts_dir alert = ScriptAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: alert = RecoveryAlert(json_definition, source, self.config, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, json_definition['clusterId'], hostName, publicHostName) except Exception, exception: logger.exception( "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped." ) return alert
start_time = datetime.strptime(event['event_start_time'], time_format) end_time = datetime.strptime(event['event_end_time'], time_format) event_duration = (end_time - start_time).seconds sched.add_cron_job(led_chain.auto_transition, hour=start_hour, minute=start_minute, second=start_second, name=event['event_name'], kwargs={ 'state': event['event_state'], 'transition_duration': event['transition_duration'] }, misfire_grace_time=event_duration) app.logger.debug("Startup job list contains : %s" % sched.get_jobs()) try: app.run(host='0.0.0.0', port=int(app_config.get("general", "web_port")), use_reloader=False) except KeyboardInterrupt: app.logger.warning("Caught keyboard interupt. Shutting down ...") app.logger.info("Calling shutdown on led chain") led_chain.shutdown() app.logger.info("Calling shutdown on scheduler") sched.shutdown(wait=False) app.logger.info("Shutting down logger and exiting ...") logging.shutdown() exit(0)
class bakCron(object): def __init__(self): self.sched = Scheduler() self.sched.daemonic = False self.sched.start() self.assign_jobs() self.assign_monitor() def get_fileconfig(self): ''' 获取配置文件的路径,此路径在软件安装时指定目录。 ''' policyfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Policy.conf" serverfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Server.conf" policys = [] PolicyConfig = ConfigParser.ConfigParser(allow_no_value=True) PolicyConfig.read(policyfile) for section in PolicyConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in PolicyConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value policys.append(dictTmp.copy()) dictTmp.clear() servers = [] ServerConfig = ConfigParser.ConfigParser(allow_no_value=True) ServerConfig.read(serverfile) for section in ServerConfig.sections(): dictTmp = {} colon = section.find(':') key, value = section[:colon], section[colon + 1:] dictTmp[key] = value for key, value in ServerConfig.items(section): if 'pass' in key: dictTmp[key] = base64.decodestring(value) else: dictTmp[key] = value servers.append(dictTmp.copy()) dictTmp.clear() return policys, servers def assign_jobs(self): ''' 读取配置文件,获得针对不同数据库的备份策略,设定备份线程。 ''' (policys, servers) = self.get_fileconfig() for dictTmp in policys: if dictTmp['flag'] == '1': for dict in servers: if dict['server'] == dictTmp['server']: serverInfo = dict for key in dictTmp.keys(): if dictTmp[key] == '': dictTmp[key] = None glob_bak_name = 'glob_bak_' + dictTmp['server'] print [serverInfo, dictTmp['bakcon']] self.sched.add_cron_job(self.glob_bak, args = [serverInfo, dictTmp['bakcon']], month=dictTmp['globmonth'], day=dictTmp['globday'], day_of_week=dictTmp['globweekday'], hour=dictTmp['globhour'], minute=dictTmp['globminute'], second = '*/3', name=glob_bak_name) incr_bak_name = 'incr_bak_' + dictTmp['server'] self.sched.add_cron_job(self.incr_bak, month=dictTmp['incmonth'], day=dictTmp['incday'], day_of_week=dictTmp['incweekday'], hour=dictTmp['inchour'], minute=dictTmp['incminute'], name=incr_bak_name) print self.sched.get_jobs() print 'assign jobs finished!' def assign_monitor(self): ''' 设定文件监控线程。 ''' self.sched.add_interval_job(self.monitorfile, name = 'monitorDaemon') print self.sched.get_jobs() print 'assign monitor finished' def filechange(self, monitor, file1, file2, evt_type): ''' 备份策略文件发生变化时,撤销计划列表中除文件监控以外的所有计划,然后重新设定备份线程。 ''' if evt_type == gio.FILE_MONITOR_EVENT_CHANGED: print 'file changed' for job in self.sched.get_jobs(): print job if job.name != 'monitorDaemon': self.sched.unschedule_job(job) self.assign_jobs() def monitorfile(self): ''' 启动文件监控线程,并设定多线程运行环境。 ''' gfile = gio.File(self.filepath) monitor = gfile.monitor_file(gio.FILE_MONITOR_NONE, None) monitor.connect("changed", self.filechange) gobject.threads_init() gml = gobject.MainLoop() gml.run() def glob_bak(self, serConf, bakcontainer): ''' 负责执行一次全局备份,将备份文件上传至云存储。 ''' timestr = time.strftime(r"%Y-%m-%d_%H-%M-%S", time.localtime()) print timestr conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.glob_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'global backup error!' def incr_bak(self, serConf, bakcontainer): ''' 负责执行一次增量备份,将备份文件上传至云存储。 ''' conndb = ConnDatabase(serConf) connStor = ConnStorage(serConf) (result, bakfilepath) = conndb.conn.incr_bak() if result: connStor.upload_file(bakcontainer, bakfilepath) else: print 'increase backup error!'