Exemple #1
0
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        addEvent("schedule.cron", self.cron)
        addEvent("schedule.interval", self.interval)
        addEvent("schedule.remove", self.remove)

        self.sched = Sched(misfire_grace_time=60)
        self.sched.start()
        self.started = True

    def remove(self, identifier):
        for cron_type in ["intervals", "crons"]:
            try:
                self.sched.unschedule_job(getattr(self, cron_type)[identifier]["job"])
                log.debug("%s unscheduled %s", (cron_type.capitalize(), identifier))
            except:
                pass

    def doShutdown(self):
        self.stop()
        return super(Scheduler, self).doShutdown()

    def stop(self):
        if self.started:
            log.debug("Stopping scheduler")
            self.sched.shutdown()
            log.debug("Scheduler stopped")
        self.started = False

    def cron(self, identifier="", handle=None, day="*", hour="*", minute="*"):
        log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s', (identifier, day, hour, minute))

        self.remove(identifier)
        self.crons[identifier] = {
            "handle": handle,
            "day": day,
            "hour": hour,
            "minute": minute,
            "job": self.sched.add_cron_job(handle, day=day, hour=hour, minute=minute),
        }

    def interval(self, identifier="", handle=None, hours=0, minutes=0, seconds=0):
        log.info(
            "Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s", (identifier, hours, minutes, seconds)
        )

        self.remove(identifier)
        self.intervals[identifier] = {
            "handle": handle,
            "hours": hours,
            "minutes": minutes,
            "seconds": seconds,
            "job": self.sched.add_interval_job(handle, hours=hours, minutes=minutes, seconds=seconds),
        }
Exemple #2
0
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        addEvent('schedule.cron', self.cron)
        addEvent('schedule.interval', self.interval)
        addEvent('schedule.start', self.start)
        addEvent('schedule.restart', self.start)

        addEvent('app.load', self.start)
        addEvent('app.shutdown', self.stop)

        self.sched = Sched(misfire_grace_time=60)

    def remove(self, identifier):
        for type in ['interval', 'cron']:
            try:
                self.sched.unschedule_job(
                    getattr(self, type)[identifier]['job'])
                log.debug('%s unscheduled %s' %
                          (type.capitalize(), identifier))
            except:
                pass

    def start(self):

        # Stop all running
        self.stop()

        # Crons
        for identifier in self.crons:
            try:
                self.remove(identifier)
                cron = self.crons[identifier]
                job = self.sched.add_cron_job(cron['handle'],
                                              day=cron['day'],
                                              hour=cron['hour'],
                                              minute=cron['minute'])
                cron['job'] = job
            except ValueError, e:
                log.error("Failed adding cronjob: %s" % e)

        # Intervals
        for identifier in self.intervals:
            try:
                self.remove(identifier)
                interval = self.intervals[identifier]
                job = self.sched.add_interval_job(interval['handle'],
                                                  hours=interval['hours'],
                                                  minutes=interval['minutes'],
                                                  seconds=interval['seconds'],
                                                  repeat=interval['repeat'])
                interval['job'] = job
            except ValueError, e:
                log.error("Failed adding interval cronjob: %s" % e)
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        addEvent('schedule.cron', self.cron)
        addEvent('schedule.interval', self.interval)
        addEvent('schedule.remove', self.remove)

        self.sched = Sched(misfire_grace_time = 60)
        self.sched.start()
        self.started = True

    def remove(self, identifier):
        for cron_type in ['intervals', 'crons']:
            try:
                self.sched.unschedule_job(getattr(self, cron_type)[identifier]['job'])
                log.debug('%s unscheduled %s', (cron_type.capitalize(), identifier))
            except:
                pass

    def doShutdown(self):
        super(Scheduler, self).doShutdown()
        self.stop()

    def stop(self):
        if self.started:
            log.debug('Stopping scheduler')
            self.sched.shutdown()
            log.debug('Scheduler stopped')
        self.started = False

    def cron(self, identifier = '', handle = None, day = '*', hour = '*', minute = '*'):
        log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s', (identifier, day, hour, minute))

        self.remove(identifier)
        self.crons[identifier] = {
            'handle': handle,
            'day': day,
            'hour': hour,
            'minute': minute,
            'job': self.sched.add_cron_job(handle, day = day, hour = hour, minute = minute)
        }

    def interval(self, identifier = '', handle = None, hours = 0, minutes = 0, seconds = 0):
        log.info('Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s', (identifier, hours, minutes, seconds))

        self.remove(identifier)
        self.intervals[identifier] = {
            'handle': handle,
            'hours': hours,
            'minutes': minutes,
            'seconds': seconds,
            'job': self.sched.add_interval_job(handle, hours = hours, minutes = minutes, seconds = seconds)
        }
Exemple #4
0
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        addEvent("schedule.cron", self.cron)
        addEvent("schedule.interval", self.interval)
        addEvent("schedule.start", self.start)
        addEvent("schedule.restart", self.start)

        addEvent("app.load", self.start)

        self.sched = Sched(misfire_grace_time=60)

    def remove(self, identifier):
        for type in ["interval", "cron"]:
            try:
                self.sched.unschedule_job(getattr(self, type)[identifier]["job"])
                log.debug("%s unscheduled %s", (type.capitalize(), identifier))
            except:
                pass

    def start(self):

        # Stop all running
        self.stop()

        # Crons
        for identifier in self.crons:
            try:
                self.remove(identifier)
                cron = self.crons[identifier]
                job = self.sched.add_cron_job(cron["handle"], day=cron["day"], hour=cron["hour"], minute=cron["minute"])
                cron["job"] = job
            except ValueError, e:
                log.error("Failed adding cronjob: %s", e)

        # Intervals
        for identifier in self.intervals:
            try:
                self.remove(identifier)
                interval = self.intervals[identifier]
                job = self.sched.add_interval_job(
                    interval["handle"],
                    hours=interval["hours"],
                    minutes=interval["minutes"],
                    seconds=interval["seconds"],
                )
                interval["job"] = job
            except ValueError, e:
                log.error("Failed adding interval cronjob: %s", e)
Exemple #5
0
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        sl = logging.getLogger('apscheduler.scheduler')
        sl.disabled = True

        addEvent('schedule.cron', self.cron)
        addEvent('schedule.interval', self.interval)
        addEvent('schedule.start', self.start)
        addEvent('schedule.restart', self.start)

        addEvent('app.load', self.start)
        addEvent('app.shutdown', self.stop)

        self.sched = Sched(misfire_grace_time = 60)

    def remove(self, identifier):
        for type in ['interval', 'cron']:
            try:
                self.sched.unschedule_job(getattr(self, type)[identifier]['job'])
                log.debug('%s unscheduled %s' % (type.capitalize(), identifier))
            except:
                pass

    def start(self):

        # Stop all running
        self.stop()

        # Crons
        for identifier in self.crons:
            try:
                self.remove(identifier)
                cron = self.crons[identifier]
                job = self.sched.add_cron_job(cron['handle'], day = cron['day'], hour = cron['hour'], minute = cron['minute'])
                cron['job'] = job
            except ValueError, e:
                log.error("Failed adding cronjob: %s" % e)

        # Intervals
        for identifier in self.intervals:
            try:
                self.remove(identifier)
                interval = self.intervals[identifier]
                job = self.sched.add_interval_job(interval['handle'], hours = interval['hours'], minutes = interval['minutes'], seconds = interval['seconds'], repeat = interval['repeat'])
                interval['job'] = job
            except ValueError, e:
                log.error("Failed adding interval cronjob: %s" % e)
Exemple #6
0
def recordAndRegulateTemp(number_of_hours,temperature,csvWriter):
	sched = Scheduler()
	sched.start()
	job = sched.add_interval_job(my_job, minutes=5, args = [temperature,csvWriter,temps])

	start_time = time.time()
	while time.time() - start_time < (3600*int(number_of_hours)):
		text = "time left: " + str(round((3600*int(number_of_hours)) - (time.time()-start_time),0))+ " seconds\n"
		sys.stdout.write(text); sys.stdout.flush()
#		print "temp list: " + str(temps)
		time.sleep(60)

	sched.unschedule_job(job)
Exemple #7
0
class TimeScheduler:

    instance = None
    
    def __init__(self):
        '''
        '''
    
    @staticmethod
    def getInstance():
        if TimeScheduler.instance is None:
            TimeScheduler.instance = TimeScheduler()
        return TimeScheduler.instance
    
    def init(self,threadpool = None):
        if threadpool is None :
            self.sched = Scheduler({'apscheduler.threadpool.core_threads':1,
                                    'apscheduler.threadpool.max_threads':1,
                                    'apscheduler.threadpool.keepalive':1})  
        else:
            self.sched = Scheduler({'apscheduler.threadpool':threadpool})  
        self.sched.daemonic = False 
    
    def registerCronExp(self,handler,year=None, month=None, day=None, hour=None, minute=None, second=None,
                     start_date=None):
        return self.sched.add_cron_job(handler.execute,year, month, day, None,None, 
                                       hour, minute, second,None)
        
    def registerCron(self, handler ,year=None, month=None, day=None, week=None,
                     day_of_week=None, hour=None, minute=None, second=None,
                     start_date=None):
        return self.sched.add_cron_job(handler.execute,year=None, month=None, day=None, week=None,
                     day_of_week=None, hour=None, minute=None, second=None,
                     start_date=None)

    '''
        register interval task
    '''
    def registerInterval(self, handler,weeks=0, days=0, hours=0, minutes=0,
                         seconds=0, start_date=None):
        
        return self.sched.add_interval_job(handler.execute,weeks,days,hours, minutes,
                        seconds,start_date)  
    def registerDate(self, handler,date):
        return self.sched.add_date_job(handler.execute,date)  

    def unregister(self,job):
        self.sched.unschedule_job(job)
        
    def start(self):
        self.sched.start() 
Exemple #8
0
def recordAndRegulateTemp(number_of_hours, temperature, csvWriter):
    sched = Scheduler()
    sched.start()
    job = sched.add_interval_job(my_job,
                                 minutes=5,
                                 args=[temperature, csvWriter, temps])

    start_time = time.time()
    while time.time() - start_time < (3600 * int(number_of_hours)):
        text = "time left: " + str(
            round((3600 * int(number_of_hours)) -
                  (time.time() - start_time), 0)) + " seconds\n"
        sys.stdout.write(text)
        sys.stdout.flush()
        #		print "temp list: " + str(temps)
        time.sleep(60)

    sched.unschedule_job(job)
class Job_Manager(object):
    def __init__(self, config):
        self.scheduler = Scheduler(config["SCHEDULER"])
        if self.scheduler is not None:
            self.scheduler.start()

    def add_job(self, task, interval, name, *args):
        args = args if args is not None else None
        self.scheduler.add_interval_job(task, seconds=interval, args=args, name=name, max_instances=50)

    def remove_job(self, name):
        matchedJobs = self.__get_jobs(name)
        self.__remove_jobs(matchedJobs)

    def __get_jobs(self, name):
        return [job for job in self.scheduler.get_jobs() if job.name == name]

    def __remove_jobs(self, matchedJobs):
        for job in matchedJobs:
            self.scheduler.unschedule_job(job)
Exemple #10
0
class MyScheduler:

    EVENTS = {
        '1': 'EVENT_SCHEDULER_START',
        '2': 'EVENT_SCHEDULER_SHUTDOWN',
        '3': 'EVENT_JOBSTORE_ADDED',
        '4': 'EVENT_JOBSTORE_REMOVED',
        '5': 'EVENT_JOBSTORE_JOB_ADDED',
        '32': 'EVENT_JOBSTORE_JOB_REMOVED',
        '64': 'EVENT_JOB_EXECUTED',
        '128': 'EVENT_JOB_ERROR',
        '256': 'EVENT_JOB_MISSED'
    }

    def __init__(self, db_path='sqlite:///scheduler.db'):
        self.scheduler = Scheduler()
        self.scheduler.add_jobstore(SQLAlchemyJobStore(url=db_path), 'default')

    def start(self):
        self.scheduler.start()

    def add_job(self, job, date, args):
        job = self.scheduler.add_date_job(job, date, args)
        print job

    def jobs(self):
        return self.scheduler.get_jobs()

    def remove_job(self, notfication_id):
        jobs = self.jobs()
        for job in jobs:
            if int(job.args[0]) == int(notfication_id):
                self.scheduler.unschedule_job(job)
                return True
        return False

    def shutdown(self):
        self.scheduler.shutdown()
Exemple #11
0
class SchedulerService(object):
    def __init__(self, config, task):
        self.config = config
        self.task = task
        self.task_thread = Scheduler()
        self.job = None

    def run(self):
        self.task.validate()
        self.task_thread.add_listener(self.reconfigure_interval,
                                      EVENT_JOB_EXECUTED)
        self.task_thread.add_listener(self.reconfigure_interval,
                                      EVENT_JOB_ERROR)
        self.task_thread.start()
        self.reconfigure_interval(None)

    def reconfigure_interval(self, event):
        if event:
            self.task_thread.unschedule_job(event.job)
        new_interval = self.task.get_new_interval()
        log.debug("=== interval for job:'" + str(self.task) + "' set to :'" +
                  str(new_interval) + "'===")
        self.job = self.task_thread.add_interval_job(self.task.do,
                                                     seconds=new_interval)
Exemple #12
0
class SchedulerDaemon(Daemon):

    def __init__(self, pid, config):
        super( SchedulerDaemon, self ).__init__(pid)
        self.config = config

        # set DaemonArgs for CommandDispatcher
        daemonArgs = DaemonArgs(config)

        # setup logger
        self.logger = None
        if os.path.exists(daemonArgs.log_file):
            logging.config.fileConfig(daemonArgs.log_file)
            self.logger = logging.getLogger('framework')

        # sftp settings
        self.sftpHost = self.config.get("sftp", "host")
        self.sftpPort = int(self.config.get("sftp", "port"))
        self.sftpRemotePath = self.config.get("sftp", "remote_path")
        self.sftpUsername = self.config.get("sftp", "username")
        self.sftpPassword = self.config.get("sftp", "password") or None
        self.sftpPrivateKey = self.config.get("sftp", "pkey") or None
        self.sftpPrivateKeyPassword = self.config.get("sftp", "pkey_password") or None
        self.sftpPrivateKeyType = self.config.get("sftp", "pkey_type") or None

        if self.sftpPrivateKeyType.lower() != 'rsa' \
            and self.sftpPrivateKeyType.lower() != 'dss':
            self.sftpPrivateKeyType = None

        self.jobSubmitInterval = int(self.config.get("scheduler", "jobsubmit_interval")) or 10
        self.jobCleanupInterval = int(self.config.get("scheduler", "jobcleanup_interval")) or 30
        
        self.scheduler = Scheduler(daemonic=True)
        self.cronScheduleSequence = ('minute', 'hour', 'day', 'month', 'day_of_week')
    
    @transaction.commit_on_success
    def saveJob(self, status, frameworkJobId, scheduledJob):
        now = datetime.now()
        newJob = None

        #create new job
        if frameworkJobId is not None:
            newJob, created = Job.objects.get_or_create( frameworkid=frameworkJobId )
            newJob.name = scheduledJob.job_name
            newJob.started = now
            newJob.workflow = scheduledJob.workflow
            newJob.is_public = scheduledJob.is_public
            newJob.owner = scheduledJob.created_by
            newJob.schedule = scheduledJob
            newJob.status = status
        else:
            newJob = Job(
                        name=scheduledJob.job_name,
                        started = now,
                        workflow = scheduledJob.workflow,
                        is_public = scheduledJob.is_public,
                        owner = scheduledJob.created_by,
                        schedule = scheduledJob,
                        status = status
                        )
        newJob.save()

    @transaction.commit_on_success
    def submitJobToFramework(self, **kwargs):
        jobCommand = 'job'
        
        daemonArgs = DaemonArgs(self.config)
        daemonArgs.command = jobCommand
        unScheduledJob = kwargs['unScheduledJob']
        
        is_fileFeeder = False
        fileFeederUploadedFile = None
        del daemonArgs.param[:]

        # go through all parameters
        for parameter in unScheduledJob.parameters.all():

            # add parameter to daemonArgs.param
            if parameter.service and parameter.param_key and parameter.param_value:

                # check if a file feeder is used
                if parameter.service == settings.FILE_FEEDER_ID:
                    is_fileFeeder = True
                    fileFeederUploadedFile = parameter.param_value

                    remoteFeederFile = os.path.join(self.sftpRemotePath, parameter.param_value)
                    parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, remoteFeederFile )
                else:
                    parameterString = '%s.%s=%s' % ( parameter.service, parameter.param_key, parameter.param_value )

                self.logger.debug("add parameter string: %s" % parameterString)
                daemonArgs.param.append([parameterString])

        # in case of a filefeeder upload file to framework server
        if is_fileFeeder:
            self.logger.debug("is file feeder")
            sftp = None
            transport = None
            try:
                transport = Transport((self.sftpHost, self.sftpPort))
                if self.sftpPassword:
                    transport.connect(username=self.sftpUsername, password=self.sftpPassword)
                else:
                    privateKey = None
                    if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'rsa':
                        privateKey = RSAKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword )
                    if self.sftpPrivateKeyType and self.sftpPrivateKeyType.lower() == 'dss':
                        privateKey = DSSKey.from_private_key_file(self.sftpPrivateKey, password=self.sftpPrivateKeyPassword )

                    transport.connect(username=self.sftpUsername, pkey=privateKey)

                sftp = SFTPClient.from_transport(transport)

                filePath = os.path.join( settings.MEDIA_ROOT, fileFeederUploadedFile )
                remotePath = os.path.join( self.sftpRemotePath, fileFeederUploadedFile )

                self.logger.debug("uploading file from %s to %s on remote machine" % (filePath, remotePath))

                sftp.put(filePath, remotePath)
#                            sftp.put(filePath, remotePath, confirm=False)
                sftp.chmod( remotePath, 0644 )

                self.logger.debug("put OK")

            except IOError as e:
                self.logger.error("IOError: %s. Will continue with next scheduled job." % e)
                self.saveJob(Job.FAILED_STATUS, None, unScheduledJob)
            except PasswordRequiredException as e:
                self.logger.error("PasswordRequiredException: %s. Will continue with next scheduled job." % e)
                self.saveJob(Job.FAILED_STATUS, None, unScheduledJob)
            except SSHException as e:
                self.logger.error("SSH Exception: %s. Will continue with next scheduled job." % e)
                self.saveJob(Job.FAILED_STATUS, None, unScheduledJob)
            except Exception as e:
                self.logger.error("Unkown SFTP problem. Will continue with next scheduled job. %s" % e)
                self.saveJob(Job.FAILED_STATUS, None, unScheduledJob)
            finally:
                if sftp is not None:
                    sftp.close()
                if transport is not None:
                    transport.close()
                
        # set job workflow
        daemonArgs.jd_workflow = unScheduledJob.workflow.name

        frameworkJobId = None
        
        try:
            setattr(daemonArgs, jobCommand, 'submit')
            frameworkJobId = self.sendFrameworkCommand(jobCommand, daemonArgs)
            self.saveJob(Job.PROCESSING_STATUS, frameworkJobId, unScheduledJob)
        except WorkflowNotDeployedException:
            # The workflow is not deployed in the framework. To prevent the scheduler retrying continuously
            # we disable this job
            unScheduledJob.status = Schedule.DEACTIVATE_STATUS
            unScheduledJob.save()
        except:
            self.saveJob(Job.FAILED_STATUS, None, unScheduledJob)
        finally:
            daemonArgs.clean(jobCommand)
        
        if unScheduledJob.scheduled_start is not None:
            unScheduledJob.status = Schedule.DEACTIVATED_STATUS
            unScheduledJob.save()
        
    def updateProcessingJobs(self):
        jobCommand = 'job'
        processingJobs = Job.objects.filter(status=Job.PROCESSING_STATUS)
        
        daemonArgs = DaemonArgs(self.config)
        
        if len(list(processingJobs)) != 0:
            jobs_dict = {}
            try:
                setattr(daemonArgs, jobCommand, 'list')
                jobs_dict = self.sendFrameworkCommand(jobCommand, daemonArgs)
            except:
                return
            finally:
                daemonArgs.clean(jobCommand)

            for processingJob in processingJobs:
                if processingJob.frameworkid in jobs_dict \
                and int(processingJob.status) != int(jobs_dict[processingJob.frameworkid]):
                    
                    try:
                        setattr(daemonArgs, jobCommand, 'details')
                        setattr(daemonArgs, 'gjd_id', processingJob.frameworkid)
                        job_details = self.sendFrameworkCommand(jobCommand, daemonArgs)
                    except:
                        continue
                    finally:
                        daemonArgs.clean(jobCommand)
                        daemonArgs.clean('gjd_id')
                        
                    processingJob.status = jobs_dict[processingJob.frameworkid]
                    processingJob.finished = job_details['job_end_time']
                    processingJob.save()
                elif processingJob.frameworkid not in jobs_dict:
                    processingJob.status = Job.COMPLETED_STATUS
                    processingJob.finished = None
                    processingJob.save()

    def checkJobs(self):
        scheduledJobs = self.scheduler.get_jobs()
        
        # remove scheduled jobs which are set to be deleted or deactivated
        deleteAndDeactivateJobs = Schedule.objects.filter( Q(status=Schedule.DELETE_STATUS) | Q(status=Schedule.DEACTIVATE_STATUS) )
        for deleteAndDeactivateJob in deleteAndDeactivateJobs:
            for scheduledJob in scheduledJobs:
                if scheduledJob.name == deleteAndDeactivateJob.job_name:
                    self.scheduler.unschedule_job(scheduledJob)
            deleteAndDeactivateJob.status = Schedule.DEACTIVATED_STATUS\
                if deleteAndDeactivateJob.status == Schedule.DEACTIVATE_STATUS\
                else Schedule.DELETED_STATUS

            deleteAndDeactivateJob.save()
        
        # add/update unscheduled jobs
        split_re  = re.compile("\s+")
        unScheduledJobs = Schedule.objects.filter( Q(status=Schedule.NEW_STATUS) | Q(status=Schedule.UPDATE_STATUS) )
        for unScheduledJob in unScheduledJobs:
            
            if unScheduledJob.status == Schedule.UPDATE_STATUS:
                for scheduledJob in scheduledJobs:
                    if scheduledJob.name == unScheduledJob.job_name:
                        self.scheduler.unschedule_job(scheduledJob)
            
            if unScheduledJob.scheduled_start is not None:
                schedule = { 'kwargs': { 'unScheduledJob': unScheduledJob }, 'name': unScheduledJob.job_name }
                
                try:
                    newJob = self.scheduler.add_date_job(self.submitJobToFramework, unScheduledJob.scheduled_start, **schedule)
                    self.logger.debug( 'Job will run on %s' % newJob.next_run_time )
                except Exception as e:
                    self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e))
                    raise Exception
                else:
                    unScheduledJob.status = Schedule.ACTIVE_STATUS
                    unScheduledJob.save()
            
            else:
                cronList = split_re.split(unScheduledJob.cron_expression)
                schedule = dict(itertools.izip(self.cronScheduleSequence, cronList))
                
                schedule['kwargs'] = { 'unScheduledJob': unScheduledJob }
                schedule['name'] = unScheduledJob.job_name
                
                try:
                    newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule)
                    self.logger.debug( 'First run of job will be on %s' % newJob.next_run_time )
                except Exception as e:
                    self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e))
                    raise Exception
                else:
                    unScheduledJob.status = Schedule.ACTIVE_STATUS
                    unScheduledJob.save()
                
    def cleanup(self):
        try:
            self.updateProcessingJobs()
        except Exception as e:
            self.logger.error("Unknown error while updating processing jobs: %s" % str(e))
            raise Exception
        
    def onNotification(self, eventType, body):
        if eventType == 'JobFinished':
            
            # sleep is added, because a failing job can be quicker than 
            # Django save the frameworkid of that job
            time.sleep(1) 
            event = JobFinished()
            event.ParseFromString(body)
            
            self.logger.debug('Job with ID %s is finished with status %s', str(event.job), str(event.status))

            Job.objects.update()
            finishedJob = Job.objects.get(frameworkid=event.job)
            finishedJob.status = event.status
            finishedJob.finished = datetime.now()
            finishedJob.save()
        return True
        
    def run(self):
        self.logger.info('Started scheduler')

        # add active schedules to scheduler
        split_re  = re.compile("\s+")
        scheduledJobs = Schedule.objects.filter( status=Schedule.ACTIVE_STATUS )

        for scheduledJob in scheduledJobs:

            if scheduledJob.scheduled_start is not None:
                schedule = { 'kwargs': { 'unScheduledJob': scheduledJob }, 'name': scheduledJob.job_name }
                
                try:
                    newJob = self.scheduler.add_date_job(self.submitJobToFramework, scheduledJob.scheduled_start, **schedule)
                except Exception as e:
                    self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e))
                    raise Exception
            else:
                cronList = split_re.split(scheduledJob.cron_expression)
                schedule = dict(itertools.izip(self.cronScheduleSequence, cronList))
                
                schedule['kwargs'] = { 'unScheduledJob': scheduledJob }
                schedule['name'] = scheduledJob.job_name
               
                try:
                    newJob = self.scheduler.add_cron_job(self.submitJobToFramework, **schedule)
                except Exception as e:
                    self.logger.error("Unknown error while submitting jobs to framework: %s" % str(e))
                    raise Exception

        # add job scheduling mechanism and cleanup to scheduler and start scheduler 
        try:
            self.scheduler.add_interval_job(self.checkJobs, seconds=self.jobSubmitInterval)
            self.scheduler.add_interval_job(self.cleanup, minutes=self.jobCleanupInterval)
            self.scheduler.start()
        except Exception as e:
            self.logger.error("Unknown error while initializing scheduler: %s" % str(e))
            raise Exception

        # initialize bus instance for receiving job notifications
        try:
            notificationBus = Bus.createConfigurableBus(self.logger, self.config, 'notifications')
            notificationBus.openFwChannel()
            notificationBus.attachToMonitoring(self.onNotification)
            notificationBus.close()
        except BusException, e:
            self.logger.error("Cannot connect to HSN2 Bus because '%s'" % e)
            raise Exception
        except BusTimeoutException, e:
            self.logger.error("Response timeout")
            raise Exception
Exemple #13
0
class LocalScheduler(object):
    scheduler_registry = {}
    _lockdown = False

    @classmethod
    def get(cls, name):
        return cls.scheduler_registry[name]

    @classmethod
    def get_all(cls):
        return cls.scheduler_registry.values()

    @classmethod
    def shutdown_all(cls):
        for scheduler in cls.scheduler_registry.values():
            scheduler.stop()

    @classmethod
    def lockdown(cls):
        cls._lockdown = True

    @classmethod
    def clear_all(cls):
        for scheduler in cls.scheduler_registry.values():
            scheduler.clear()

    def __init__(self, name, label=None):
        self.scheduled_jobs = {}
        self._scheduler = None
        self.name = name
        self.label = label
        self.__class__.scheduler_registry[self.name] = self

    def start(self):
        logger.info('Starting scheduler: %s' % self.name)
        if not self.__class__._lockdown:
            self._scheduler = OriginalScheduler()
            for job in self.scheduled_jobs.values():
                self._schedule_job(job)

            self._scheduler.start()
        else:
            logger.debug('lockdown in effect')

    def stop(self):
        if self._scheduler:
            self._scheduler.shutdown()
            del self._scheduler
            self._scheduler = None

    @property
    def running(self):
        if self._scheduler:
            return self._scheduler.running
        else:
            return False

    def clear(self):
        for job in self.scheduled_jobs.values():
            self.stop_job(job)

    def stop_job(self, job):
        if self.running:
            self._scheduler.unschedule_job(job._job)

        del(self.scheduled_jobs[job.name])
        job.scheduler = None

    def _schedule_job(self, job):
        if isinstance(job, IntervalJob):
            job._job = self._scheduler.add_interval_job(job.function, *job.args, **job.kwargs)
        elif isinstance(job, DateJob):
            job._job = self._scheduler.add_date_job(job.function, *job.args, **job.kwargs)
        elif isinstance(job, CronJob):
            job._job = self._scheduler.add_cron_job(job.function, *job.args, **job.kwargs)
        else:
            raise UnknownJobClass

    def add_job(self, job):
        logger.debug('adding job')
        if job.scheduler or job.name in self.scheduled_jobs.keys():
            raise AlreadyScheduled

        if self._scheduler:
            self._schedule_job(job)

        job.scheduler = self
        self.scheduled_jobs[job.name] = job

    def add_interval_job(self, name, label, function, *args, **kwargs):
        job = IntervalJob(name=name, label=label, function=function, *args, **kwargs)
        self.add_job(job)
        return job

    def add_date_job(self, name, label, function, *args, **kwargs):
        job = DateJob(name=name, label=label, function=function, *args, **kwargs)
        self.add_job(job)
        return job

    def add_cron_job(self, name, label, function, *args, **kwargs):
        job = CronJob(name=name, label=label, function=function, *args, **kwargs)
        self.add_job(job)
        return job

    def get_job_list(self):
        return self.scheduled_jobs.values()

    def get_job_by_name(self, name):
        try:
            return self.scheduled_jobs[name]
        except KeyError:
            raise UnknownJob

    def __unicode__(self):
        return unicode(self.label or self.name)
Exemple #14
0
class Controller:
    def __init__(self):

        # Start the scheduler
        self.sched = Scheduler()
        self.sched.start()

        # set default turn on and turn off times
        # default to everyday
        self.daysLabel=dayLabels[0]
        self.days=dayOptions[self.daysLabel]
        # turn on at 7am
        self.turnOnHour = 7
        self.turnOnMin = 0
        self.DisplayOnJob = self.sched.add_cron_job(self.displayPowerOn, day_of_week=self.days, hour=self.turnOnHour, minute=self.turnOnMin)

        # turn off at 7pm
        self.turnOffHour = 19
        self.turnOffMin = 0
        self.DisplayOffJob = self.sched.add_cron_job(self.displayPowerOff, day_of_week=self.days, hour=self.turnOffHour, minute=self.turnOffMin)

        # print the menu
        self.printMenu()

    def printMenu(self):
        print("""
        Timer Test Menu

            1. Set Turn On/Off Days
            2. Set Turn On Time
            3. Set Turn Off Time
            4. Get On-Off Times
            5. Quit/Exit
            """)

        # get the selection
        self.main_selection = input("Please select: ")
        print("\n")

        if self.main_selection == '1':
            print('Current Turn On/Off days:',self.daysLabel)
            print('1. Daily')
            print('2. WeekDays')
            self.newDays = input("Select which days to use: ")
            # validate entry
            if int(self.newDays)==1 or int(self.newDays)==2:
                self.daysLabel = dayLabels[int(self.newDays)-1]
                self.days = dayOptions[self.daysLabel]
                # cancel old jobs and start new ones
                self.schedDisplayOn()
                self.schedDisplayOff()
                print('New Turn On/Off days:', self.daysLabel)
            else:
                print('Invalid entry')
            self.printMenu()
        elif self.main_selection == '2':
            print('Current Turn On time ', str(self.turnOnHour), ':', str(self.turnOnMin).zfill(2), sep='')
            self.newTurnOnHour = input("Enter new turn on hour (in 24 hour clock): ")
            # validate hour entry
            if int(self.newTurnOnHour) < 24 and int(self.newTurnOnHour) >= 0:
                self.newTurnOnMin = input("Enter new turn on minute: ")
                # validate min entry
                if int(self.newTurnOnMin) < 60 and int(self.newTurnOnMin) >= 0:
                    # assign new hour
                    self.turnOnHour = int(self.newTurnOnHour)
                    # assign new minute
                    self.turnOnMin = int(self.newTurnOnMin)
                    # cancel old job and start new one
                    self.schedDisplayOn()
                    # print new turn on time
                    print('New Turn On time ', str(self.turnOnHour), ':', str(self.turnOnMin).zfill(2), sep='')
                else:
                    print('Invalid Turn On Min')
            else:
                print('Invalid Turn On Hour')
            self.printMenu()
        elif self.main_selection == '3':
            print('Current Turn Off time ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='')
            self.newTurnOffHour = input("Enter new turn off hour (in 24 hour clock): ")
            # validate hour entry
            if int(self.newTurnOffHour) < 24 and int(self.newTurnOffHour) >= 0:
                self.newTurnOffMin = input("Enter new turn off minute: ")
                # validate min entry
                if int(self.newTurnOffMin) < 60 and int(self.newTurnOffMin) >= 0:
                    # assign new hour
                    self.turnOffHour = int(self.newTurnOffHour)
                    # assign new minute
                    self.turnOffMin = int(self.newTurnOffMin)
                    # cancel old job and start new one
                    self.schedDisplayOff()
                    # print new turn off time
                    print('New Turn Off time ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='')
                else:
                    print('Invalid Turn Off Min')
            else:
                print('Invalid Turn Off Hour')
            self.printMenu()
        elif self.main_selection == '4':
            print('Turn On ',self.daysLabel,' at ',str(self.turnOnHour),':',str(self.turnOnMin).zfill(2), sep='')
            print('Turn Off ',self.daysLabel,' at ', str(self.turnOffHour), ':', str(self.turnOffMin).zfill(2), sep='')
            self.sched.print_jobs()
            self.printMenu()
        elif self.main_selection == '5':
            sys.exit()
        else:
            print("Invalid selection.\n")
            self.printMenu()

    def displayPowerOn(self):
        print("Display On")

    def displayPowerOff(self):
        print("Display Off")

    def schedDisplayOn(self):
        # cancel the old job
        self.sched.unschedule_job(self.DisplayOnJob)
        # schedule the new job
        self.DisplayOnJob = self.sched.add_cron_job(self.displayPowerOn, day_of_week=self.days,
                                                    hour=self.turnOnHour, minute=self.turnOnMin)

    def schedDisplayOff(self):
        # cancel the old job
        self.sched.unschedule_job(self.DisplayOffJob)
        # schedule the new job
        self.DisplayOffJob = self.sched.add_cron_job(self.displayPowerOff, day_of_week=self.days,
                                                     hour=self.turnOffHour, minute=self.turnOffMin)
Exemple #15
0
class TNActionScheduler (TNArchipelPlugin):
    """
    This plugin allows to create scheduled actions.
    """

    def __init__(self, configuration, entity, entry_point_group):
        """
        Initialize the plugin.
        @type configuration: Configuration object
        @param configuration: the configuration
        @type entity: L{TNArchipelEntity}
        @param entity: the entity that owns the plugin
        @type entry_point_group: string
        @param entry_point_group: the group name of plugin entry_point
        """
        TNArchipelPlugin.__init__(self, configuration=configuration, entity=entity, entry_point_group=entry_point_group)
        self.scheduler = Scheduler()
        self.scheduler.start()
        self.database = sqlite3.connect(self.configuration.get("SCHEDULER", "database"), check_same_thread=False)
        self.database.execute("create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)")
        self.database.commit()
        self.cursor = self.database.cursor()
        self.restore_jobs()
        self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause")
        self.supported_actions_for_hypervisor = ("alloc", "free")
        # permissions
        self.entity.permission_center.create_permission("scheduler_jobs", "Authorizes user to get the list of task", False)
        self.entity.permission_center.create_permission("scheduler_schedule", "Authorizes user to schedule a task", False)
        self.entity.permission_center.create_permission("scheduler_unschedule", "Authorizes user to unschedule a task", False)
        self.entity.permission_center.create_permission("scheduler_actions", "Authorizes user to get available actions", False)
        # hooks
        if self.entity.__class__.__name__ == "TNArchipelVirtualMachine":
            self.entity.register_hook("HOOK_VM_TERMINATE", method=self.vm_terminate)

    ### Plugin interface

    def register_handlers(self):
        """
        This method will be called by the plugin user when it will be
        necessary to register module for listening to stanza.
        """
        self.entity.xmppclient.RegisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER)

    def unregister_handlers(self):
        """
        Unregister the handlers.
        """
        self.entity.xmppclient.UnregisterHandler('iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER)

    @staticmethod
    def plugin_info():
        """
        Return informations about the plugin.
        @rtype: dict
        @return: dictionary contaning plugin informations
        """
        plugin_friendly_name           = "Action Scheduler"
        plugin_identifier              = "action_scheduler"
        plugin_configuration_section   = "SCHEDULER"
        plugin_configuration_tokens    = ["database"]
        return {    "common-name"               : plugin_friendly_name,
                    "identifier"                : plugin_identifier,
                    "configuration-section"     : plugin_configuration_section,
                    "configuration-tokens"      : plugin_configuration_tokens }


    ### Persistance

    def delete_job(self, uid):
        """
        Remove a job from the database.
        @type uid: string
        @param uid: the uid of the job to remove
        """
        self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, ))
        self.database.commit()

    def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None):
        """
        Save a job in the database.
        @type uid: string
        @param uid: the uid of the job
        @type action: string
        @param action: the action
        @type year: string
        @param year: year of execution
        @type month: string
        @param month: month of execution
        @type day: string
        @param day: day of execution
        @type hour: string
        @param hour: hour of execution
        @type minute: string
        @param minute: minute of execution
        @type second: string
        @param second: second of execution
        @type comment: string
        @param comment: comment about the job
        @type params: string
        @param params: random parameter of the job
        """
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute("INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (entity_uid, uid, action, year, month, day, hour, minute, second, comment, params, ))
        self.database.commit()

    def restore_jobs(self):
        """
        Restore the jobs from the database.
        """
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid, ))
        for values in self.cursor:
            try:
                entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values
                str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second)
                self.scheduler.add_cron_job(self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment])
            except Exception as ex:
                self.entity.log.error("unable to restore a job: %s" % str(ex))

    def vm_terminate(self, origin, user_info, arguments):
        """
        Close the database connection.
        @type origin: TNArchipelEntity
        @param origin: the origin of the hook
        @type user_info: object
        @param user_info: random user information
        @type arguments: object
        @param arguments: runtime argument
        """
        self.database.close()

    ### Jobs

    def get_jod_with_uid(self, uid):
        """
        Get a job with given uid.
        @type uid: string
        @param uid: the uid of the job
        """
        if hasattr(self.scheduler, "get_jobs"):
            jobs = self.scheduler.get_jobs()
        else:
            jobs = self.scheduler.jobs

        for job in jobs:
            if str(job.args[1]) == uid:
                return job
        return None

    def do_job_for_vm(self, action, uid, str_date, comment, param):
        """
        Perform the job.
        @type action: string
        @param action: the action to execute
        @type uid: string
        @param uid: the uid of the job
        @type str_date: string
        @param str_date: the date of the job
        @type comment: string
        @param comment: comment about the job
        @type param: string
        @param param: a random parameter to give to job
        """
        if action == "create":
            self.entity.create()
        elif action == "shutdown":
            self.entity.shutdown()
        elif action == "destroy":
            self.entity.destroy()
        elif action == "suspend":
            self.entity.suspend()
        elif action == "resume":
            self.entity.resume()
        elif action == "pause":
            if self.entity.libvirt_status == 1:
                self.entity.suspend()
            elif self.entity.libvirt_status == 3:
                self.entity.resume()
        elif action == "migrate":
            pass
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid)
        self.entity.push_change("scheduler", "jobexecuted")

    def do_job_for_hypervisor(self, action, uid, str_date, comment, param):
        """
        Perform the job.
        @type action: string
        @param action: the action to execute
        @type uid: string
        @param uid: the uid of the job
        @type str_date: string
        @param str_date: the date of the job
        @type comment: string
        @param comment: comment about the job
        @type param: string
        @param param: a random parameter to give to job
        """
        if action == "alloc":
            self.entity.alloc()
        elif action == "free":
            pass #self.entity.free()
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid)
        self.entity.push_change("scheduler", "jobexecuted")


    ### Process IQ

    def process_iq(self, conn, iq):
        """
        This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received.
        It understands IQ of type:
            - jobs
            - schedule
            - unschedule
        @type conn: xmpp.Dispatcher
        @param conn: ths instance of the current connection that send the stanza
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        """
        reply = None
        action = self.entity.check_acp(conn, iq)
        self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_")

        if   action == "schedule":
            reply = self.iq_schedule(iq)
        elif action == "unschedule":
            reply = self.iq_unschedule(iq)
        elif action == "jobs":
            reply = self.iq_jobs(iq)
        elif action == "actions":
            reply = self.iq_actions(iq)
        if reply:
            conn.send(reply)
            raise xmpp.protocol.NodeProcessed

    def iq_schedule(self, iq):
        """
        Schedule a task.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            job = iq.getTag("query").getTag("archipel").getAttr("job")
            entityClass = self.entity.__class__.__name__
            param = None
            if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm:
                raise Exception("action %s is not valid" % job)
            elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor:
                raise Exception("action %s is not valid" % job)
            year = iq.getTag("query").getTag("archipel").getAttr("year")
            month = iq.getTag("query").getTag("archipel").getAttr("month")
            day = iq.getTag("query").getTag("archipel").getAttr("day")
            hour = iq.getTag("query").getTag("archipel").getAttr("hour")
            minute = iq.getTag("query").getTag("archipel").getAttr("minute")
            second = iq.getTag("query").getTag("archipel").getAttr("second")
            comment = iq.getTag("query").getTag("archipel").getAttr("comment")
            if iq.getTag("query").getTag("archipel").has_attr("param"):
                param = iq.getTag("query").getTag("archipel").getAttr("param")
            uid = str(uuid.uuid1())
            str_date = "%s-%s-%s @ %s : %02d : %02d" % (year, month, day, hour, int(minute), int(second))
            if entityClass == "TNArchipelVirtualMachine":
                func = self.do_job_for_vm
            elif entityClass == "TNArchipelHypervisor":
                func = self.do_job_for_hypervisor
            self.scheduler.add_cron_job(func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param])
            self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param)
            self.entity.push_change("scheduler", "scheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_jobs(self, iq):
        """
        Get jobs.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            nodes = []
            if hasattr(self.scheduler, "get_jobs"):
                jobs = self.scheduler.get_jobs()
            else:
                jobs = self.scheduler.jobs

            for job in jobs:
                job_node = xmpp.Node(tag="job", attrs={"action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3]})
                nodes.append(job_node)
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_unschedule(self, iq):
        """
        Unschedule a job.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            uid = iq.getTag("query").getTag("archipel").getAttr("uid")
            the_job = self.get_jod_with_uid(uid)
            if not the_job:
                raise Exception("job with uid %s doesn't exists" % uid)
            self.delete_job(uid)
            self.scheduler.unschedule_job(the_job)
            self.entity.push_change("scheduler", "unscheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_actions(self, iq):
        """
        Get available actions.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            entityClass = self.entity.__class__.__name__
            if entityClass == "TNArchipelVirtualMachine":
                actions = self.supported_actions_for_vm
            elif entityClass == "TNArchipelHypervisor":
                actions = self.supported_actions_for_hypervisor
            nodes = []
            for action in actions:
                action_node = xmpp.Node(tag="action")
                action_node.setData(action)
                nodes.append(action_node)
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply
Exemple #16
0
    # this is the heart of the program:
    # send email to let me know I'm brewing
    send_email("starting brew log.", password, graph=False)

    # get list of temps:
    list_of_temps = sys.argv[2::2]
    # get lengths of time for each of those temperatures
    list_of_times = sys.argv[3::2]

    # convert to ints
    list_of_temps = map(int, list_of_temps)
    list_of_times = map(int, list_of_times)

    print "list of temps"
    print list_of_temps
    print "list of times"
    print list_of_times

    for i in range(0, len(list_of_times)):
        send_email("changing temperature to " + str(list_of_temps[i]) +
                   " for " + str(list_of_times[i]) + " hours.",
                   password,
                   graph=False)
        recordAndRegulateTemp(list_of_times[i], list_of_temps[i], writer)

    print "program done. fermenter shutting down."
    send_email("ending. fermenter is shutting off", password, graph=True)
    email_sched.unschedule_job(send_email)
    io.output(power_pin, False)
Exemple #17
0
class HypeScheduler(object):
    """Wraps APScheduler with some conveniences."""
    def __init__(self, local_tz: str = None):
        """Constructor.

    Args:
      local_tz: The local timezone the scheduler is running in.
    """
        self._scheduler = Scheduler()
        self._local_tz = local_tz
        self.StartScheduler()

    def StartScheduler(self):
        if self._scheduler and not self._scheduler.running:
            self._scheduler.start()

    def InSeconds(self, seconds: int, fn: Callable, *args, **kwargs) -> Job:
        """Schedule function to run in given seconds.

    Args:
      seconds: How many seconds to wait before scheduling function.
      fn: Function to call.
      *args: Arguments to pass to function.
      **kwargs: Keyworded arguments to pass to function.

    Returns:
      APScheduler Job.
    """
        schedule_time = arrow.now().shift(seconds=seconds)
        # APScheduler 2.1.2 doesn't understand timezones.
        return self._scheduler.add_date_job(fn,
                                            schedule_time.naive,
                                            args=args,
                                            kwargs=kwargs)

    def DailyCallback(self, schedule_time: arrow.Arrow, fn: Callable, *args,
                      **kwargs) -> Job:
        """Schedules fn to be run once a day at schedule_time.

    The actual scheduled time is perturbed randomly +/-30s unless the kwarg
    '_jitter' is set to False.

    Args:
      schedule_time: An Arrow object specifying when to run fn.
      fn: The function to be run.
      *args: Arguments to pass to fn.
      **kwargs: Keyworded arguments to pass to fn. Special kwargs listed below:
          _jitter - {int} How many seconds to perturb scheduling time by, in
                    both directions. Defaults to 30s.

    Returns:
      APScheduler Job.
    """
        if self._local_tz:
            schedule_time = schedule_time.to(self._local_tz)
        jitter = kwargs.get('_jitter', 30)
        if jitter:
            jitter_secs = random.randint(-jitter, jitter)
            schedule_time = schedule_time.shift(seconds=jitter_secs)
        kwargs.pop('_jitter', None)

        # APScheduler 2.1.2 doesn't understand timezones.
        return self._scheduler.add_interval_job(fn,
                                                args=args,
                                                kwargs=kwargs,
                                                start_date=schedule_time.naive,
                                                days=1)

    def FixedRate(self, initial_delay: int, period: int, fn: Callable, *args,
                  **kwargs) -> Job:
        """Schedules a recurring task at a fixed rate.

    Args:
      initial_delay: Seconds to wait before scheduling first instance.
      period: Interval in seconds between subsequent instances.
      fn: The function to run.
      *args: Arguments to pass to fn.
      **kwargs: Keyworded arguments to pass to fn.

    Returns:
      APScheduler Job.
    """
        start_time = arrow.now().shift(seconds=initial_delay)
        # APScheduler 2.1.2 doesn't understand timezones.
        return self._scheduler.add_interval_job(fn,
                                                args=args,
                                                kwargs=kwargs,
                                                start_date=start_time.naive,
                                                seconds=period)

    def UnscheduleJob(self, job: Job) -> None:
        """Unschedules job from running in the future.

    Args:
      job: Job to unschedule.
    """
        try:
            self._scheduler.unschedule_job(job)
        except KeyError:
            logging.info('Job %s not scheduled.', job)
Exemple #18
0
class SchedulerContainer( DaemonContainer ):
    
    def __init__(self, environment):
        super(Scheduler, self).__init__(environment)
        gconfig = environment.get("gconfig", {})
        options = environment.get("options", {})
        self.scheduler = Scheduler(gconfig, **options)
    
    def on_start(self):
        self.scheduler.start()
    
    def on_stop(self):
        self.scheduler.stop()
        
    def unschedule_func(self, func):
        self.scheduler.unschedule_func(func)
    
    def unschedule_job(self, job):
        self.scheduler.unschedule_job(job)
        
        
    def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, args=None, kwargs=None, **options):
        return self.scheduler.add_interval_job(func=func, 
                                        weeks=weeks, 
                                        days=days, 
                                        hours=hours, 
                                        minutes=minutes, 
                                        seconds=seconds, 
                                        start_date=start_date, 
                                        args=args, 
                                        kwargs=kwargs, 
                                        **options)
        
    def add_cron_job(self, func, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, second=None, start_date=None, args=None, kwargs=None, **options):
        return self.scheduler.add_cron_job(func=func, 
                                    year=year, 
                                    month=month, 
                                    day=day, 
                                    week=week, 
                                    day_of_week=day_of_week, 
                                    hour=hour, 
                                    minute=minute, 
                                    second=second, 
                                    start_date=start_date, 
                                    args=args, 
                                    kwargs=kwargs,
                                    **options)
    
    def add_date_job(self, func, date, args=None, kwargs=None, **options):
        return self.scheduler.add_date_job(func=func, 
                                    date=date, 
                                    args=args, 
                                    kwargs=kwargs,
                                    **options)
    
    def get_jobs(self):
        return self.scheduler.get_jobs()
    
    def add_job(self, trigger, func, args, kwargs, jobstore='default', **options):
        return self.scheduler.add_job(trigger=trigger, 
                                      func=func, 
                                      args=args, 
                                      kwargs=kwargs, 
                                      jobstore=jobstore,
                                      **options)
        
    def add_listener(self, callback, mask):
        self.scheduler.add_listener(callback, mask)
    
    def remove_listener(self, callback):
        self.scheduler.remove_listener(callback)
class ProgramHandler:

    def __init__(self, db, radio_station):
        self.__db = db
        self.__radio_station = radio_station
        self.__scheduler = None
        self.__scheduled_jobs = None
        self.__start_listeners()
        self.__radio_station.logger.info("Done initialising ProgramHandler for {0}".format(radio_station.station.name))

    def run(self):
        self.run_today_schedule()

    def __prepare_schedule(self):
        self.__load_programs()
        self.__scheduler = Scheduler()
        self.__scheduled_jobs = dict()

    def run_today_schedule(self):
        self.__prepare_schedule()
        self.__scheduler.start()
        self.__schedule_programs()
        self.__schedule_next_day_scheduler()
        print self.__scheduler.get_jobs()

    def stop(self):
        self.__stop_program()
        # any clean up goes here
        # unschedule stuff

    def __schedule_next_day_scheduler(self):
        #TODO: make this safe for differebt timezones!
        base_date = date.today() + timedelta(1,0)
        tomorrow_date = datetime.combine(base_date, time())
        #add the timezone offset
        tomorrow_date = tomorrow_date + timedelta(0, timezone(self.__radio_station.station.timezone).utcoffset(datetime.now()).seconds)
        self.__scheduler.add_date_job(getattr(self, 'run_today_schedule'), tomorrow_date) #schedule the scheduler to reload at midnight 

    def __schedule_programs(self):
        for scheduled_program in self.__scheduled_programs:
            if not self.__is_program_expired(scheduled_program):
                self.__add_scheduled_job(scheduled_program)
                self.__radio_station.logger.info(
                    "Scheduled program {0} for station {1} starting at {2}".format(scheduled_program.program.name,
                                                                                   self.__radio_station.station.name,
                                                                                   scheduled_program.start))
        return

    def __add_scheduled_job(self, scheduled_program):
        program = RadioProgram(self.__db, scheduled_program, self.__radio_station)
        scheduled_job = self.__scheduler.add_date_job(getattr(program, 'start'),
                                                      self.__get_program_start_time(scheduled_program).replace(
                                                          tzinfo=None))
        self.__scheduled_jobs[scheduled_program.id] = scheduled_job

    def __delete_scheduled_job(self, index):
        if index in self.__scheduled_jobs:
            self.__scheduler.unschedule_job(self.__scheduled_jobs[index])
            del self.__scheduled_jobs[index]

    def __stop_program(self):
        #self.__running_program.stop()
        return

    def __run_program(self):
        #self.__running_program.run()
        return

    def __load_programs(self):
        self.__scheduled_programs = self.__db.query(ScheduledProgram).filter(
            ScheduledProgram.station_id == self.__radio_station.id).filter(text("date(start at TIME ZONE 'UTC') = current_date at TIME ZONE 'UTC'")).filter(
            ScheduledProgram.deleted == False).all()
        self.__radio_station.logger.info("Loaded programs for {0}".format(self.__radio_station.station.name))

    def __load_program(self, id):
        return self.__db.query(ScheduledProgram).filter(ScheduledProgram.id == id).first()

    def __start_listeners(self):
        t = threading.Thread(target=self.__listen_for_scheduling_changes,
                             args=(DefaultConfig.SCHEDULE_EVENTS_SERVER_IP, DefaultConfig.SCHEDULE_EVENTS_SERVER_PORT))
        t.start()

    def __listen_for_scheduling_changes(self, ip, port):
        sck = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        addr = (ip, port)
        
         #It may not be possible to connect after restart, TIME_WAIT could come into play etc. Anyway, keep trying
        connected = False
        while not connected:
            try:         
                sck.connect(addr)
                connected = True
            except:
                self.__radio_station.logger.error("Could not connect to server, retrying in 30 ...")
                sleep(30)
        sck.send(json.dumps({'station':self.__radio_station.id, 'action':'register'}))

        while True:
            data = sck.recv(1024)
            try:
                event = json.loads(data)
                if event["action"] == "delete":
                    self.__delete_scheduled_job(event["id"])
                    self.__radio_station.logger.info("Scheduled program with id {0} has been deleted".format(event["id"]))
                elif event["action"] == "add":
                    scheduled_program = self.__load_program(event["id"])
                    if not self.__is_program_expired(scheduled_program):
                        self.__add_scheduled_job(scheduled_program)
                        self.__radio_station.logger.info(
                            "Scheduled program with id {0} has been added at time {1}".format(event["id"],
                                                                                          scheduled_program.start))
                elif event["action"] == "update":
                    self.__delete_scheduled_job(event["id"])
                    scheduled_program = self.__load_program(event["id"])
                    if not self.__is_program_expired(scheduled_program, scheduled_program.program.duration):
                        self.__add_scheduled_job(scheduled_program)
                        self.__radio_station.logger.info(
                            "Scheduled program with id {0} has been moved to start at time {1}".format(event["id"],
                                                                                                 scheduled_program.start))
            except:
                pass #Most probably a JSON parse error
 

    """
    Gets the program to run from the current list of programs that are lined up for the day
    """

    def __get_current_program(self):
        for program in self.__scheduled_programs:
            if not self.__is_program_expired(program):
                return program

    """
    Returns whether or not the time for a particular program has passed
    """

    def __is_program_expired(self, scheduled_program):
        now = pytz.utc.localize(datetime.utcnow())
        return (scheduled_program.start + scheduled_program.program.duration) < (now + timedelta(minutes=1))

    def __get_program_start_time(self, scheduled_program):
        now = datetime.now(dateutil.tz.tzlocal())
        if scheduled_program.start < now:  # Time at which program begins is already past
            return now + timedelta(seconds=5)  # 5 second scheduling allowance
        else:
            return scheduled_program.start + timedelta(seconds=5)  # 5 second scheduling allowance
Exemple #20
0
  除此之外,也可以使用 Decorator 的方式,如下:

    from apscheduler.scheduler import Scheduler

    sched = Scheduler()
    sched.daemonic = False
    sched.start()

    @sched.interval_schedule(hours=2, start_date='2012-04-12 09:54:59')
    def job_function():
        print "Hello World"

  如果想解除 Decorator 功能方法,可以通过如下方式:

    sched.unschedule_job(job_function.job)


3. 如果我们想实现类似Linux下的 crontab 功能,可以通过 Cron-style scheduling 方式来实现,如下:

    from apscheduler.scheduler import Scheduler

    sched = Scheduler()
    sched.daemonic = False

    def job_function():
        print "Hello World"

    # Schedules job_function 将会在六七八月、十一月、十二月的第三个星期五的0至3点执行
    sched.add_cron_job(job_function, month='6-8,11-12', day='3rd fri', hour='0-3')
class AlertSchedulerHandler():
    make_cachedir = True

    FILENAME = 'definitions.json'
    TYPE_PORT = 'PORT'
    TYPE_METRIC = 'METRIC'
    TYPE_SCRIPT = 'SCRIPT'

    APS_CONFIG = {
        'threadpool.core_threads': 3,
        'coalesce': True,
        'standalone': False
    }

    def __init__(self, cachedir, stacks_dir, in_minutes=True):
        self.cachedir = cachedir
        self.stacks_dir = stacks_dir

        if not os.path.exists(
                cachedir) and AlertSchedulerHandler.make_cachedir:
            try:
                os.makedirs(cachedir)
            except:
                logger.critical(
                    "Could not create the cache directory {0}".format(
                        cachedir))
                pass

        self._collector = AlertCollector()
        self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
        self.__in_minutes = in_minutes
        self.__config_maps = {}

    def update_definitions(self, alert_commands, reschedule_jobs=False):
        ''' updates the persisted definitions and restarts the scheduler '''

        with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f:
            json.dump(alert_commands, f, indent=2)

        if reschedule_jobs:
            self.reschedule()

    def __make_function(self, alert_def):
        return lambda: alert_def.collect()

    def start(self):
        ''' loads definitions from file and starts the scheduler '''

        if self.__scheduler is None:
            return

        if self.__scheduler.running:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)

        alert_callables = self.__load_definitions()

        # schedule each definition
        for _callable in alert_callables:
            self.schedule_definition(_callable)

        logger.debug("Starting scheduler {0}; currently running: {1}".format(
            str(self.__scheduler), str(self.__scheduler.running)))

        self.__scheduler.start()

    def stop(self):
        if not self.__scheduler is None:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)

    def reschedule(self):
        '''
    Removes jobs that are scheduled where their UUID no longer is valid. 
    Schedules jobs where the definition UUID is not currently scheduled.
    '''
        jobs_scheduled = 0
        jobs_removed = 0

        definitions = self.__load_definitions()
        scheduled_jobs = self.__scheduler.get_jobs()

        # for every scheduled job, see if its UUID is still valid
        for scheduled_job in scheduled_jobs:
            uuid_valid = False

            for definition in definitions:
                definition_uuid = definition.get_uuid()
                if scheduled_job.name == definition_uuid:
                    uuid_valid = True
                    break

            # jobs without valid UUIDs should be unscheduled
            if uuid_valid == False:
                jobs_removed += 1
                logger.info("Unscheduling {0}".format(scheduled_job.name))
                self._collector.remove_by_uuid(scheduled_job.name)
                self.__scheduler.unschedule_job(scheduled_job)

        # for every definition, determine if there is a scheduled job
        for definition in definitions:
            definition_scheduled = False
            for scheduled_job in scheduled_jobs:
                definition_uuid = definition.get_uuid()
                if definition_uuid == scheduled_job.name:
                    definition_scheduled = True
                    break

            # if no jobs are found with the definitions UUID, schedule it
            if definition_scheduled == False:
                jobs_scheduled += 1
                self.schedule_definition(definition)

        logger.info(
            "Alert Reschedule Summary: {0} rescheduled, {1} unscheduled".
            format(str(jobs_scheduled), str(jobs_removed)))

    def collector(self):
        ''' gets the collector for reporting to the server '''
        return self._collector

    def __load_definitions(self):
        ''' loads all alert commands from the file.  all clusters are stored in one file '''
        definitions = []

        all_commands = None
        try:
            with open(os.path.join(self.cachedir, self.FILENAME)) as fp:
                all_commands = json.load(fp)
        except:
            if (logger.isEnabledFor(logging.DEBUG)):
                traceback.print_exc()
            return definitions

        for command_json in all_commands:
            clusterName = '' if not 'clusterName' in command_json else command_json[
                'clusterName']
            hostName = '' if not 'hostName' in command_json else command_json[
                'hostName']

            configmap = None
            # each cluster gets a map of key/value pairs of substitution values
            self.__config_maps[clusterName] = {}
            if 'configurations' in command_json:
                configmap = command_json['configurations']

            for definition in command_json['alertDefinitions']:
                obj = self.__json_to_callable(clusterName, hostName,
                                              definition)

                if obj is None:
                    continue

                # get the config values for the alerts 'lookup keys',
                # eg: hdfs-site/dfs.namenode.http-address : host_and_port
                vals = self.__find_config_values(configmap,
                                                 obj.get_lookup_keys())
                self.__config_maps[clusterName].update(vals)

                obj.set_helpers(self._collector,
                                self.__config_maps[clusterName])

                definitions.append(obj)

        return definitions

    def __json_to_callable(self, clusterName, hostName, json_definition):
        '''
    converts the json that represents all aspects of a definition
    and makes an object that extends BaseAlert that is used for individual
    '''
        source = json_definition['source']
        source_type = source.get('type', '')

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Creating job type {0} with {1}".format(
                source_type, str(json_definition)))

        alert = None

        if source_type == AlertSchedulerHandler.TYPE_METRIC:
            alert = MetricAlert(json_definition, source)
        elif source_type == AlertSchedulerHandler.TYPE_PORT:
            alert = PortAlert(json_definition, source)
        elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
            source['stacks_dir'] = self.stacks_dir
            alert = ScriptAlert(json_definition, source)

        if alert is not None:
            alert.set_cluster(clusterName, hostName)

        return alert

    def __find_config_values(self, configmap, obj_keylist):
        ''' finds templated values in the configuration map provided  by the server '''
        if configmap is None:
            return {}

        result = {}

        for key in obj_keylist:
            try:
                obj = configmap
                for layer in key.split('/'):
                    obj = obj[layer]
                result[key] = obj
            except KeyError:  # the nested key is missing somewhere
                pass

        return result

    def update_configurations(self, commands):
        '''
    when an execution command comes in, update any necessary values.
    status commands do not contain useful configurations
    '''
        for command in commands:
            clusterName = command['clusterName']
            if not clusterName in self.__config_maps:
                continue

            if 'configurations' in command:
                configmap = command['configurations']
                keylist = self.__config_maps[clusterName].keys()
                vals = self.__find_config_values(configmap, keylist)
                self.__config_maps[clusterName].update(vals)

    def schedule_definition(self, definition):
        '''
    Schedule a definition (callable). Scheduled jobs are given the UUID
    as their name so that they can be identified later on.
    <p/>
    This function can be called with a definition that is disabled; it will
    simply NOOP.
    '''
        # NOOP if the definition is disabled; don't schedule it
        if definition.is_enabled() == False:
            logger.info(
                "The alert {0} with UUID {1} is disabled and will not be scheduled"
                .format(definition.get_name(), definition.get_uuid()))
            return

        job = None

        if self.__in_minutes:
            job = self.__scheduler.add_interval_job(
                self.__make_function(definition),
                minutes=definition.interval())
        else:
            job = self.__scheduler.add_interval_job(
                self.__make_function(definition),
                seconds=definition.interval())

        # although the documentation states that Job(kwargs) takes a name
        # key/value pair, it does not actually set the name; do it manually
        if job is not None:
            job.name = definition.get_uuid()

        logger.info("Scheduling {0} with UUID {1}".format(
            definition.get_name(), definition.get_uuid()))

    def get_job_count(self):
        '''
    Gets the number of jobs currently scheduled. This is mainly used for
    test verification of scheduling
    '''
        if self.__scheduler is None:
            return 0

        return len(self.__scheduler.get_jobs())

    def execute_alert(self, execution_commands):
        '''
    Executes an alert immediately, ignoring any scheduled jobs. The existing
    jobs remain untouched. The result of this is stored in the alert
    collector for tranmission during the next heartbeat
    '''
        if self.__scheduler is None or execution_commands is None:
            return

        for execution_command in execution_commands:
            try:
                alert_definition = execution_command['alertDefinition']

                clusterName = '' if not 'clusterName' in execution_command else execution_command[
                    'clusterName']
                hostName = '' if not 'hostName' in execution_command else execution_command[
                    'hostName']

                alert = self.__json_to_callable(clusterName, hostName,
                                                alert_definition)

                if alert is None:
                    continue

                logger.info("Executing on-demand alert {0} ({1})".format(
                    alert.get_name(), alert.get_uuid()))

                alert.set_helpers(self._collector,
                                  self.__config_maps[clusterName])
                alert.collect()
            except:
                logger.exception(
                    "Unable to execute the alert outside of the job scheduler")
Exemple #22
0
class Scheduler(Plugin):

    crons = {}
    intervals = {}
    started = False

    def __init__(self):

        addEvent('schedule.cron', self.cron)
        addEvent('schedule.interval', self.interval)
        addEvent('schedule.remove', self.remove)

        self.sched = Sched(misfire_grace_time=60)
        self.sched.start()
        self.started = True

    def remove(self, identifier):
        for cron_type in ['intervals', 'crons']:
            try:
                self.sched.unschedule_job(
                    getattr(self, cron_type)[identifier]['job'])
                log.debug('%s unscheduled %s',
                          (cron_type.capitalize(), identifier))
            except:
                pass

    def doShutdown(self):
        super(Scheduler, self).doShutdown()
        self.stop()

    def stop(self):
        if self.started:
            log.debug('Stopping scheduler')
            self.sched.shutdown()
            log.debug('Scheduler stopped')
        self.started = False

    def cron(self, identifier='', handle=None, day='*', hour='*', minute='*'):
        log.info('Scheduling "%s", cron: day = %s, hour = %s, minute = %s',
                 (identifier, day, hour, minute))

        self.remove(identifier)
        self.crons[identifier] = {
            'handle':
            handle,
            'day':
            day,
            'hour':
            hour,
            'minute':
            minute,
            'job':
            self.sched.add_cron_job(handle, day=day, hour=hour, minute=minute)
        }

    def interval(self,
                 identifier='',
                 handle=None,
                 hours=0,
                 minutes=0,
                 seconds=0):
        log.info(
            'Scheduling %s, interval: hours = %s, minutes = %s, seconds = %s',
            (identifier, hours, minutes, seconds))

        self.remove(identifier)
        self.intervals[identifier] = {
            'handle':
            handle,
            'hours':
            hours,
            'minutes':
            minutes,
            'seconds':
            seconds,
            'job':
            self.sched.add_interval_job(handle,
                                        hours=hours,
                                        minutes=minutes,
                                        seconds=seconds)
        }
Exemple #23
0
class DS_Scheduler:
   """ The main guts and logic for the scheduler."""
   def __init__(self):
      self.util = Util()
      self.sched = Scheduler(conf.config)
      self.sched.start()
      self.queue = {}
      self.util.init_path()
      self.util.init_DB()
      self.util.reset_all()

   def run(self):
      # Main Loop
      i = 0
      master = False
      idle = False
      while True:
         if conf.clustering == True:
            inactive = heartbeats.getActive()
            ''' Here is where we elect our current master node. If the conf.preferred_master
                is not us and is not up yet (Neither True or False), then we will wait 10 check cycles
                and if the master is not up we will assume control. '''
            if conf.preferred_master == myself:
               # I am the master node and I should alwas be in charge
               master = True
               i = 0
            elif conf.preferred_master != myself and conf.peers[conf.preferred_master] == 'True':
               # The master will take control now
               master = False
            elif conf.peers[conf.preferred_master] == False:
               # Assume the master server is dead
               master = True
            elif conf.peers[conf.preferred_master] == '':
               # Master has not been started yet
               i += 1
               if i >= 10:
                  master = True
                  i = 10
            else:
               # I am not the master node, but I am available just in case
               master = False
         if master == True or conf.clustering == False:
            idle = False
            events = self.util.refresh_events()
            for e in events.keys():
               event = events[e]
               e_id = event[0]
               e_type = event[1]
               e_target = event[2]
               e_host = event[3]
               e_update_flag = event[4]
               if e_update_flag == 1:
                  self.util.remote_command(e_id, e_host, e_type, e_target, 'NEW EVENT')
                  self.util.runQuery("update event_table set update_flag=0 where id=%s" % (e_id))

            jobs = self.util.refresh_jobs()
            for j in jobs.keys():
               job = jobs[j]
               # IF the job was updated then the update_flag will be set, so reschedule
               # TODO: Change these in the code below
               j_id = job[0]
               j_name = job[1]
               j_host = job[2]
               j_user = job[3]
               j_dep = job[6]
               j_etrigger = job[8]
               j_cron = job[9]
               j_command = job[12]
               j_update_flag = job[13]
               j_pid = job[16]
               j_dep_mode = job[17]
               job_name = '%s_%i' % (j_name, j_id)
               now = dt.datetime.now() + dt.timedelta(seconds=5)
               if j_update_flag == 1:
                  # Lets make sure the job does not already exist. If it does it means the user had edited an existing job so let's requeue
                  try:
                     self.sched.unschedule_job(self.queue[job_name])
                     # Remove job from queue
                     self.queue.pop(job_name)
                     Log("Refreshing job %s" %(job_name))
                  except(KeyError):
                     pass

                  # DEP SECTION
                  if j_etrigger > 0: # Event trigger
                     # immediate jobs like event jobs and run_now jobs need not collide with the namespace
                     # of cron jobs. So lets randomize the name
                     current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name))
                     Log(current_job)
                     self.util.runQuery("UPDATE jobs set update_flag=1, event_trigger=0 where id='%s'" % (j_id))
                  if j_dep > 0: # If job depends on another...
                     now = dt.datetime.now() + dt.timedelta(seconds=10)
                     parent = self.util.job_status(j_dep)
                     if j_dep_mode == 0  and j_dep_mode == parent[11]:  # ON_SUCCESS
                        current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name))
                        Log(current_job)
                     if j_dep_mode == 1 and j_dep_mode == parent[11]:  # ON_FAIL
                        current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name))
                        Log(current_job)
                     if j_dep_mode == 2 and j_dep_mode == parent[11]:  # ON_KILL
                        current_job = self.sched.add_date_job(self.util.remote_command, now, name=job_name, args=(j_id, j_host, j_command, j_user, job_name))
                        Log(current_job)
                     self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id))
                  if j_etrigger == 0 and j_dep == 0:  #Standard "cron" type scheduling
                     crontab = j_cron.split()
                     # Job Names need to be unique, so the same 'job' being run on different hosts can work.
                     current_job = self.sched.add_cron_job(self.util.remote_command,
                                                          minute=crontab[0],
                                                          hour=crontab[1],
                                                          day=crontab[2],
                                                          month=crontab[3],
                                                          day_of_week=crontab[4],
                                                          name=job_name,
                                                          args=(j_id, j_host, j_command, j_user, job_name),
                                                          max_instances=2)

                     # Add the job to the queue
                     Log(current_job)
                     self.queue[job_name] = current_job
                     self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id))
               # Job was marked for removal
               if j_update_flag == 2:
                  self.sched.unschedule_job(self.queue[job_name])
                  # Remove job from queue
                  self.queue.pop(job_name)
                  self.util.remove_job(j_id)
                  Log("Unscheduled job %s" %(job_name))
                  self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id))
               if j_update_flag == 3:
                  Log("Disabling Job: %s" % (job_name))
                  self.sched.unschedule_job(self.queue[job_name])
                  # Remove job from queue
                  self.queue.pop(job_name)
                  self.util.disable_job(j_id)
                  #self.util.runQuery("update jobs set update_flag=0 where id='%s'" % (j_id))
               if j_update_flag == 4:
                  Log("Killing PID: %s   Job: %s" % (j_pid, job_name))
                  kill_command = 'kill -9 %s' % (j_pid)
                  current_job = self.sched.add_date_job(self.util.remote_command, now, name='ON_KILL %s' % (j_pid), args=(j_id, j_host, kill_command , j_user, job_name))
                  Log(current_job)
                  self.queue[job_name] = current_job
                  self.util.runQuery("update jobs set update_flag=0, status=99996 where id='%s'" % (j_id))
            #pprint(self.queue)
            time.sleep(conf.CHECK_PERIOD)
         else:
            # Unscheduling all jobs so that the peer node can take over.
            if idle == False:
               jobs = self.util.refresh_jobs()
               for j in jobs.keys():
                  job = jobs[j]
                  self.sched.unschedule_job(self.queue[job_name])
                  # Remove job from queue
                  self.queue.pop(job_name)
                  Log("Failed over job %s" %(job_name))
               self.util.reset_all_jobs()
               idle = True

      self.sched.shutdown()
class PhoneManager:

    # Default Sleep time between checks
    kDefaultLoopSleep = 10

    # SMS Forwarding Information
    bForwardSMS = False
    sForwardSMSNumber = ""
    bCallForwardNotify = False

    # Call Forwarding Information.
    bEnableCallForward = False
    sCallForwardNumber = ""
    bSMSForwardNotify = False

    # SMS Auto Reply Informaiton.
    bAutoReply = False
    sAutoReplyText = ""
    bDeleteAfterResponse = False

    # Diable Call Forwarding
    bDisableCallForward = False

    # Determins if all SMS's should be processed, or just unread mesaages.
    bCheckAllSMS = True

    # The last result of a Send SMS
    bLastResult = True

    # MSISDN of last received SMS
    sLastMessageFrom = ""

    # Maintains a list of numbers that have received an autoreply
    listAutoReply = []

    # THread for prcessing
    _thread = None
    _bActive = False
    _lastRunTime = None

    # IMSI
    _IMSI = None

    # Message Queue
    messageQueue = None

    # Config File.
    config = None
    configFile = ''

    kModemSection = 'modem'
    kIMSIOption = 'IMSI'

    kSettingsSection = 'settings'
    kForwardOption = 'forwardsms'
    kDivertOption = 'divert'
    kAutoReplyOption = 'autoreply'

    kScheduleSetting = 'schedule'
    kCommandOption = 'command'
    kOptionOption = 'option'
    kDayOption = 'day'
    kWeekOption = 'week'
    kDayOfWeekOption = 'day_of_week'
    kHourOption = 'hour'
    kMinuteOption = 'minute'

    # Scheudler.
    sched = None

    # Config file changes
    configFileLastChanged = 0

    def __init__(self, modems, configFile):

        logger.debug('Init Modem, modems=' + str(modems) +
                     ' configFileconfigFile=' + configFile)

        if modems == None:
            raise Exception('A Modem Must be Provided')

        if configFile == None:
            raise Exception('A config file must be provided.')

        self.messageQueue = []

        self.sched = Scheduler()

        self.loadConfig(configFile)

        if self._IMSI in modems:
            self.modem = modems[self._IMSI]
        else:
            raise Exception('No modem for selected IMSI')

    def loadConfig(self, configFile):

        logger.info('Loading config:' + configFile)
        self.configFile = configFile

        workConfigFile = configFile + '~'

        logger.debug('Renaming to ' + workConfigFile)
        shutil.copyfile(configFile, workConfigFile)

        logger.debug('Clearing Scheduled Tasks')
        jobs = self.sched.get_jobs()
        for job in jobs:
            logger.debug('Removing ' + job.__str__())
            self.sched.unschedule_job(job)

        try:
            config = ConfigParser.ConfigParser()
            config.read(workConfigFile)

            self.config = config

            IMSI = config.get(self.kModemSection, self.kIMSIOption)
            logger.debug('modem/IMSI-' + IMSI)
            self._IMSI = IMSI

            if config.has_option(self.kSettingsSection, self.kForwardOption):
                number = config.get(self.kSettingsSection, self.kForwardOption)
                if len(number) > 0:
                    logger.debug('Forward SMS Number-' + number)
                    self.enableSMSForwarding(number)
            else:
                self.disableSMSForwarding()

            if config.has_option(self.kSettingsSection, self.kDivertOption):
                number = config.get(self.kSettingsSection, self.kDivertOption)
                if len(number) > 0:
                    logger.debug('Divert Number-' + number)
                    self.enableCallForward(number)
            else:
                config.set(self.kSettingsSection, self.kDivertOption, '')
                #Diable call forward, need to right into main thread

            if config.has_option(self.kSettingsSection, self.kAutoReplyOption):
                autoReplyText = config.get(self.kSettingsSection,
                                           self.kAutoReplyOption)
                if len(autoReplyText) > 0:
                    logger.debug('Auto Reply Text-' + autoReplyText)
                    self.enableSMSAutoReply(autoReplyText)
                else:
                    self.disableSMSAutoReply()
            else:
                self.disableSMSAutoReply()

            for section in config.sections():
                if section.startswith(self.kScheduleSetting):
                    logger.debug('Adding schedule ' + section)

                    if config.has_option(section, self.kCommandOption):
                        command = config.get(section, self.kCommandOption)

                        if config.has_option(section, self.kOptionOption):
                            option = config.get(section, self.kOptionOption)

                            optionList = option.split(',')

                            _optionDict = []

                            for opt in optionList:
                                logger.debug('Option: ' + opt)
                                _optionDict.append(self.getStringAsType(opt))

                            logger.debug('Options: ' + str(_optionDict))

                        else:
                            option = ''

                        _day = self.getSetConfigOption(config, section,
                                                       self.kDayOption)
                        _week = self.getSetConfigOption(
                            config, section, self.kWeekOption)
                        _dayOfWeek = self.getSetConfigOption(
                            config, section, self.kDayOfWeekOption)
                        _hour = self.getSetConfigOption(
                            config, section, self.kHourOption)
                        _minute = self.getSetConfigOption(
                            config, section, self.kMinuteOption)

                        logger.debug('Add Schdule.  Comamnd=[' + command +
                                     '] options [' + option + '] day=' + _day +
                                     ' week=' + _week + ' day_of_week=' +
                                     _dayOfWeek + ' hour=' + _hour +
                                     ' minutes=' + _minute)

                        if command == self.kForwardOption and len(option) > 0:

                            job = self.sched.add_cron_job(
                                self.enableSMSForwarding,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute,
                                args=_optionDict)

                        if command == self.kForwardOption and len(option) == 0:

                            job = self.sched.add_cron_job(
                                self.disableSMSForwarding,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute)

                        if command == self.kDivertOption and len(option) > 0:

                            job = self.sched.add_cron_job(
                                self.enableCallForward,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute,
                                args=_optionDict)

                        if command == self.kDivertOption and len(option) == 0:

                            job = self.sched.add_cron_job(
                                self.disableCallForward,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute)

                        if command == self.kAutoReplyOption and len(
                                option) > 0:

                            job = self.sched.add_cron_job(
                                self.enableSMSAutoReply,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute,
                                args=_optionDict)

                        if command == self.kAutoReplyOption and len(
                                option) == 0:

                            job = self.sched.add_cron_job(
                                self.disableSMSAutoReply,
                                day=_day,
                                week=_week,
                                day_of_week=_dayOfWeek,
                                hour=_hour,
                                minute=_minute)

                        if job is not None:
                            logger.info(job.__str__())

            self.configFileLastChanged = time.ctime(
                os.path.getmtime(self.configFile))
            logger.debug('Config file last changed: ' +
                         self.configFileLastChanged)

        except Exception, e:
            logger.critical('Error loading config file')
            logger.exception(e)
Exemple #25
0
class PyFlowScheduler(object):
    """
    This object schedules the submission of the tasks in a :class:`Flow`.
    There are two types of errors that might occur during the execution of the jobs:

        #. Python exceptions
        #. Errors in the ab-initio code

    Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError.
    The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation
    faults, problems with the resource manager, etc. The flow tries to handle the most common cases
    but there's still a lot of room for improvement.
    Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases:

        #. The number of python exceptions is > max_num_pyexcs

        #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs

        #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks).

        #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds.
           If the mail cannot be sent, the scheduler will shutdown automatically.
           This check prevents the scheduler from being trapped in an infinite loop.
    """
    # Configuration file.
    YAML_FILE = "scheduler.yml"
    USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy")

    Error = PyFlowSchedulerError

    @classmethod
    def autodoc(cls):
        i = cls.__init__.__doc__.index("Args:")
        return cls.__init__.__doc__[i + 5:]

    def __init__(self, **kwargs):
        """
        Args:
            weeks: number of weeks to wait (DEFAULT: 0).
            days: number of days to wait (DEFAULT: 0).
            hours: number of hours to wait (DEFAULT: 0).
            minutes: number of minutes to wait (DEFAULT: 0).
            seconds: number of seconds to wait (DEFAULT: 0).
            mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds.
                (DEFAULT: None i.e. not used).
            verbose: (int) verbosity level. (DEFAULT: 0)
            use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from
                file before launching the jobs. (DEFAULT: "no")
            max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200)
            remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds.
                (int, DEFAULT: 1 day).
            max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs
                (int, DEFAULT: 0)
            max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs
                (int, DEFAULT: 0)
            safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than
               `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5)
            max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler.
                (DEFAULT: -1 i.e. no limit)
            debug: Debug level. Use 0 for production (int, DEFAULT: 0)
            fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes")
            rmflow: If "yes", the scheduler will remove the flow directory if the calculation
                completed successfully. (DEFAULT: "no")
            killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs
                before exiting due to an error. (DEFAULT: "yes")
        """
        # Options passed to the scheduler.
        self.sched_options = AttrDict(
            weeks=kwargs.pop("weeks", 0),
            days=kwargs.pop("days", 0),
            hours=kwargs.pop("hours", 0),
            minutes=kwargs.pop("minutes", 0),
            seconds=kwargs.pop("seconds", 0),
            #start_date=kwargs.pop("start_date", None),
        )
        if all(not v for v in self.sched_options.values()):
            raise self.Error("Wrong set of options passed to the scheduler.")

        self.mailto = kwargs.pop("mailto", None)
        self.verbose = int(kwargs.pop("verbose", 0))
        self.use_dynamic_manager = as_bool(
            kwargs.pop("use_dynamic_manager", False))
        self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200)
        self.max_ncores_used = kwargs.pop("max_ncores_used", None)
        self.contact_resource_manager = as_bool(
            kwargs.pop("contact_resource_manager", False))

        self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600))
        self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0))
        self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0))
        self.safety_ratio = int(kwargs.pop("safety_ratio", 5))
        #self.max_etime_s = kwargs.pop("max_etime_s", )
        self.max_nlaunches = kwargs.pop("max_nlaunches", -1)
        self.debug = kwargs.pop("debug", 0)
        self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True))
        self.rmflow = as_bool(kwargs.pop("rmflow", False))
        self.killjobs_if_errors = as_bool(
            kwargs.pop("killjobs_if_errors", True))

        self.customer_service_dir = kwargs.pop("customer_service_dir", None)
        if self.customer_service_dir is not None:
            self.customer_service_dir = Directory(self.customer_service_dir)
            self._validate_customer_service()

        if kwargs:
            raise self.Error("Unknown arguments %s" % kwargs)

        if not has_apscheduler:
            raise RuntimeError("Install apscheduler with pip")

        if has_sched_v3:
            logger.warning("Using scheduler v>=3.0.0")
            from apscheduler.schedulers.blocking import BlockingScheduler
            self.sched = BlockingScheduler()
        else:
            from apscheduler.scheduler import Scheduler
            self.sched = Scheduler(standalone=True)

        self.nlaunch = 0
        self.num_reminders = 1

        # Used to keep track of the exceptions raised while the scheduler is running
        self.exceptions = deque(maxlen=self.max_num_pyexcs + 10)

        # Used to push additional info during the execution.
        self.history = deque(maxlen=100)

    @classmethod
    def from_file(cls, filepath):
        """Read the configuration parameters from a Yaml file."""
        with open(filepath, "rt") as fh:
            return cls(**yaml.safe_load(fh))

    @classmethod
    def from_string(cls, s):
        """Create an istance from string s containing a YAML dictionary."""
        stream = cStringIO(s)
        stream.seek(0)
        return cls(**yaml.safe_load(stream))

    @classmethod
    def from_user_config(cls):
        """
        Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'.
        Search first in the working directory and then in the configuration directory of abipy.

        Raises:
            `RuntimeError` if file is not found.
        """
        # Try in the current directory.
        path = os.path.join(os.getcwd(), cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        # Try in the configuration directory.
        path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        raise cls.Error(
            "Cannot locate %s neither in current directory nor in %s" %
            (cls.YAML_FILE, path))

    def __str__(self):
        """String representation."""
        lines = [self.__class__.__name__ + ", Pid: %d" % self.pid]
        app = lines.append
        app("Scheduler options: %s" % str(self.sched_options))

        if self.flow is not None:
            app(80 * "=")
            app(str(self.flow))

        return "\n".join(lines)

    @property
    def pid(self):
        """The pid of the process associated to the scheduler."""
        try:
            return self._pid
        except AttributeError:
            self._pid = os.getpid()
            return self._pid

    @property
    def pid_file(self):
        """
        Absolute path of the file with the pid.
        The file is located in the workdir of the flow
        """
        return self._pid_file

    @property
    def flow(self):
        """`Flow`."""
        try:
            return self._flow
        except AttributeError:
            return None

    @property
    def num_excs(self):
        """Number of exceptions raised so far."""
        return len(self.exceptions)

    def get_delta_etime(self):
        """Returns a `timedelta` object representing with the elapsed time."""
        return timedelta(seconds=(time.time() - self.start_time))

    def add_flow(self, flow):
        """
        Add an :class:`Flow` flow to the scheduler.
        """
        if hasattr(self, "_flow"):
            raise self.Error("Only one flow can be added to the scheduler.")

        # Check if we are already using a scheduler to run this flow
        flow.check_pid_file()
        flow.set_spectator_mode(False)

        # Build dirs and files (if not yet done)
        flow.build()

        with open(flow.pid_file, "wt") as fh:
            fh.write(str(self.pid))

        self._pid_file = flow.pid_file
        self._flow = flow

    def _validate_customer_service(self):
        """
        Validate input parameters if customer service is on then
        create directory for tarball files with correct premissions for user and group.
        """
        direc = self.customer_service_dir
        if not direc.exists:
            mode = 0o750
            print("Creating customer_service_dir %s with mode %s" %
                  (direc, mode))
            direc.makedirs()
            os.chmod(direc.path, mode)

        if self.mailto is None:
            raise RuntimeError(
                "customer_service_dir requires mailto option in scheduler.yml")

    def _do_customer_service(self):
        """
        This method is called before the shutdown of the scheduler.
        If customer_service is on and the flow didn't completed successfully,
        a lightweight tarball file with inputs and the most important output files
        is created in customer_servide_dir.
        """
        if self.customer_service_dir is None: return
        doit = self.exceptions or not self.flow.all_ok
        doit = True
        if not doit: return

        prefix = os.path.basename(self.flow.workdir) + "_"

        import tempfile, datetime
        suffix = str(datetime.datetime.now()).replace(" ", "-")
        # Remove milliseconds
        i = suffix.index(".")
        if i != -1: suffix = suffix[:i]
        suffix += ".tar.gz"

        #back = os.getcwd()
        #os.chdir(self.customer_service_dir.path)

        _, tmpname = tempfile.mkstemp(suffix="_" + suffix,
                                      prefix=prefix,
                                      dir=self.customer_service_dir.path,
                                      text=False)

        print("Dear customer,\n We are about to generate a tarball in\n  %s" %
              tmpname)
        self.flow.make_light_tarfile(name=tmpname)
        #os.chdir(back)

    def start(self):
        """
        Starts the scheduler in a new thread. Returns 0 if success.
        In standalone mode, this method will block until there are no more scheduled jobs.
        """
        self.history.append("Started on %s" % time.asctime())
        self.start_time = time.time()

        if not has_apscheduler:
            raise RuntimeError("Install apscheduler with pip")

        if has_sched_v3:
            self.sched.add_job(self.callback, "interval", **self.sched_options)
        else:
            self.sched.add_interval_job(self.callback, **self.sched_options)

        errors = self.flow.look_before_you_leap()
        if errors:
            self.exceptions.append(errors)
            return 1

        # Try to run the job immediately. If something goes wrong return without initializing the scheduler.
        self._runem_all()

        if self.exceptions:
            self.cleanup()
            self.send_email(
                msg=
                "Error while trying to run the flow for the first time!\n %s" %
                self.exceptions)
            return 1

        try:
            self.sched.start()
            return 0

        except KeyboardInterrupt:
            self.shutdown(msg="KeyboardInterrupt from user")
            if ask_yesno(
                    "Do you want to cancel all the jobs in the queue? [Y/n]"):
                print("Number of jobs cancelled:", self.flow.cancel())

            self.flow.pickle_dump()
            return -1

    def _runem_all(self):
        """
        This function checks the status of all tasks,
        tries to fix tasks that went unconverged, abicritical, or queuecritical
        and tries to run all the tasks that can be submitted.+
        """
        excs = []
        flow = self.flow

        # Allow to change the manager at run-time
        if self.use_dynamic_manager:
            from pymatgen.io.abinit.tasks import TaskManager
            new_manager = TaskManager.from_user_config()
            for work in flow:
                work.set_manager(new_manager)

        nqjobs = 0
        if self.contact_resource_manager:
            # This call is expensive and therefore it's optional
            nqjobs = flow.get_njobs_in_queue()
            if nqjobs is None:
                nqjobs = 0
                if flow.manager.has_queue:
                    logger.warning('Cannot get njobs_inqueue')

            if nqjobs >= self.max_njobs_inqueue:
                print("Too many jobs in the queue: %s, returning" % nqjobs)
                return

        if self.max_nlaunches == -1:
            max_nlaunch = self.max_njobs_inqueue - nqjobs
        else:
            max_nlaunch = min(self.max_njobs_inqueue - nqjobs,
                              self.max_nlaunches)

        # check status.
        flow.check_status(show=False)

        # This check is not perfect, we should make a list of tasks to sumbit
        # and select only the subset so that we don't exceeed mac_ncores_used
        # Many sections of this code should be rewritten.
        #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used:
        if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used:
            print("Cannot exceed max_ncores_use:d %s" % self.max_ncores_used)
            return

        # Try to restart the unconverged tasks
        # TODO: do not fire here but prepare for fireing in rapidfire
        for task in self.flow.unconverged_tasks:
            try:
                logger.info("Flow will try restart task %s" % task)
                fired = task.restart()
                if fired:
                    self.nlaunch += 1
                    max_nlaunch -= 1
                    if max_nlaunch == 0:
                        logger.info(
                            "Restart: too many jobs in the queue, returning")
                        flow.pickle_dump()
                        return

            except task.RestartError:
                excs.append(straceback())

        # Temporarily disable by MG because I don't know if fix_critical works after the
        # introduction of the new qadapters
        # reenabled by MsS disable things that do not work at low level
        # fix only prepares for restarting, and sets to ready
        if self.fix_qcritical:
            nfixed = flow.fix_queue_critical()
            if nfixed: print("Fixed %d QCritical error(s)" % nfixed)

        nfixed = flow.fix_abicritical()
        if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed)

        # update database
        flow.pickle_dump()

        # Submit the tasks that are ready.
        try:
            nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch,
                                                 sleep_time=10)
            self.nlaunch += nlaunch

            if nlaunch:
                print("[%s] Number of launches: %d" %
                      (time.asctime(), nlaunch))

        except Exception:
            excs.append(straceback())

        # check status.
        flow.show_status()

        if excs:
            logger.critical("*** Scheduler exceptions:\n *** %s" %
                            "\n".join(excs))
            self.exceptions.extend(excs)

    def callback(self):
        """The function that will be executed by the scheduler."""
        try:
            return self._callback()
        except:
            # All exceptions raised here will trigger the shutdown!
            s = straceback()
            self.exceptions.append(s)

            # This is useful when debugging
            #try:
            #    print("Exception in callback, will cancel all tasks")
            #    for task in self.flow.iflat_tasks():
            #        task.cancel()
            #except Exception:
            #    pass

            self.shutdown(msg="Exception raised in callback!\n" + s)

    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" %
                  get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if all_ok:
            return self.shutdown(
                msg=
                "All tasks have reached S_OK. Will shutdown the scheduler and exit"
            )

        # Handle failures.
        err_lines = []

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = (
                "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s "
                % (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += (
                    "\nThe scheduler tried to send an e-mail to remind the user\n"
                    + " but send_email returned %d. Aborting now" % retcode)
                err_lines.append(msg)

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s)

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_lines.append(boxed(msg))

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_lines.append(boxed(msg))

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_lines.append(boxed(msg))

        # Test on the presence of deadlocks.
        g = self.flow.find_deadlocks()
        if g.deadlocked:
            # Check the flow again so that status are updated.
            self.flow.check_status()

            g = self.flow.find_deadlocks()
            print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables,
                  "\nrunning\n", g.running)
            if g.deadlocked and not g.runnables and not g.running:
                err_lines.append(
                    "No runnable job with deadlocked tasks:\n%s." %
                    str(g.deadlocked))

        if not g.runnables and not g.running:
            # Check the flow again so that status are updated.
            self.flow.check_status()
            g = self.flow.find_deadlocks()
            if not g.runnables and not g.running:
                err_lines.append(
                    "No task is running and cannot find other tasks to submit."
                )

        # Something wrong. Quit
        if err_lines:
            # Cancel all jobs.
            if self.killjobs_if_errors:
                cprint(
                    "killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.",
                    "yellow")
                try:
                    num_cancelled = 0
                    for task in self.flow.iflat_tasks():
                        num_cancelled += task.cancel()
                    cprint("Killed %d tasks" % num_cancelled, "yellow")
                except Exception as exc:
                    cprint(
                        "Exception while trying to kill jobs:\n%s" % str(exc),
                        "red")

            self.shutdown("\n".join(err_lines))

        return len(self.exceptions)

    def cleanup(self):
        """Cleanup routine: remove the pid file and save the pickle database"""
        try:
            os.remove(self.pid_file)
        except OSError as exc:
            logger.critical("Could not remove pid_file: %s", exc)

        # Save the final status of the flow.
        self.flow.pickle_dump()

    def shutdown(self, msg):
        """Shutdown the scheduler."""
        try:
            self.cleanup()

            self.history.append("Completed on: %s" % time.asctime())
            self.history.append("Elapsed time: %s" % self.get_delta_etime())

            if self.debug:
                print(">>>>> shutdown: Number of open file descriptors: %s" %
                      get_open_fds())

            retcode = self.send_email(msg)
            if self.debug:
                print("send_mail retcode", retcode)

            # Write file with the list of exceptions:
            if self.exceptions:
                dump_file = os.path.join(self.flow.workdir, "_exceptions")
                with open(dump_file, "wt") as fh:
                    fh.writelines(self.exceptions)
                    fh.write("Shutdown message:\n%s" % msg)

            lines = []
            app = lines.append
            app("Submitted on: %s" % time.ctime(self.start_time))
            app("Completed on: %s" % time.asctime())
            app("Elapsed time: %s" % str(self.get_delta_etime()))

            if self.flow.all_ok:
                app("Flow completed successfully")
            else:
                app("Flow %s didn't complete successfully" %
                    repr(self.flow.workdir))
                app("use `abirun.py FLOWDIR debug` to analyze the problem.")
                app("Shutdown message:\n%s" % msg)

            print("")
            print("\n".join(lines))
            print("")

            self._do_customer_service()

            if self.flow.all_ok:
                print("Calling flow.finalize()...")
                self.flow.finalize()
                #print("finalized:", self.flow.finalized)
                if self.rmflow:
                    app("Flow directory will be removed...")
                    try:
                        self.flow.rmtree()
                    except Exception:
                        logger.warning(
                            "Ignoring exception while trying to remove flow dir."
                        )

        finally:
            # Shutdown the scheduler thus allowing the process to exit.
            logger.debug('This should be the shutdown of the scheduler')

            # Unschedule all the jobs before calling shutdown
            #self.sched.print_jobs()
            if not has_sched_v3:
                for job in self.sched.get_jobs():
                    self.sched.unschedule_job(job)
            #self.sched.print_jobs()

            self.sched.shutdown()
            # Uncomment the line below if shutdown does not work!
            #os.system("kill -9 %d" % os.getpid())

    def send_email(self, msg, tag=None):
        """
        Send an e-mail before completing the shutdown.
        Returns 0 if success.
        """
        try:
            return self._send_email(msg, tag)
        except:
            self.exceptions.append(straceback())
            return -2

    def _send_email(self, msg, tag):
        if self.mailto is None:
            return -1

        header = msg.splitlines()
        app = header.append

        app("Submitted on: %s" % time.ctime(self.start_time))
        app("Completed on: %s" % time.asctime())
        app("Elapsed time: %s" % str(self.get_delta_etime()))
        app("Number of errored tasks: %d" % self.flow.num_errored_tasks)
        app("Number of unconverged tasks: %d" %
            self.flow.num_unconverged_tasks)

        strio = cStringIO()
        strio.writelines("\n".join(header) + 4 * "\n")

        # Add the status of the flow.
        self.flow.show_status(stream=strio)

        if self.exceptions:
            # Report the list of exceptions.
            strio.writelines(self.exceptions)

        if tag is None:
            tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]"

        return sendmail(subject=self.flow.name + tag,
                        text=strio.getvalue(),
                        mailto=self.mailto)
class GPCAlgGlobFSM():
    def __init__(self, debugFlag=False, configFile = None):
        #SM FSM specific initialization
        self._fsm = GPCAlgGlobProc_sm(self)
        self._fsm.setDebugFlag(debugFlag)
        self.MPCAlgo = {'Active':None}
        self.logger = logging.getLogger("GPCAlgGlobProc")
        tmpLogger = logging.getLoggerClass()
        logging.setLoggerClass(sm_Logger)
        self._fsm.setDebugStream(logging.getLogger("GPCAlgGlobProc.fsm"))
        logging.setLoggerClass(tmpLogger)
        self.eventDeque = deque()
        #APScheduler
        self.sched = Scheduler()
        self.configFile = configFile
    def __del__(self):
        self.sched.shutdown(wait=False)

    def start(self):
        self._fsm.enterStartState()
        self.sched.start()

    def initInit(self):
        self.doInitMemory = {"Count":0,
                             "S0_tUpdate":{'State':None},
                             "MPCData":{'State':None},
                             "Config":{'State':None}}

    def doInit(self):
        mem = self.doInitMemory
        mem["Count"] += 1

        # Get the GPC Config file
        if mem["Config"]['State'] != 'Done':
            conf = mem["Config"]
            try:
                if self.configJob.isConfigRead():
                    conf['State'] = 'Done'
            except AttributeError:
                conf['State'] = 'Running'

        # Get the main "S0_tUpdate" from the SCADA system.
        if mem["S0_tUpdate"]['State'] == None \
           and self.configJob.isConfigRead():

            AlgConfVars = AlgData_OPC(variables=["S0_tUpdate",],
                                      opcserver = self.config["Tree"]["Global"]["OPCServer"])
            AlgConfVars.logger = self.logger
            mem["S0_tUpdate"]["Data"] = AlgConfVars
            mem["S0_tUpdate"]['State'] = 'Running'
        if mem["S0_tUpdate"]['State'] in ['Running']:
            S0 = mem["S0_tUpdate"]
            for i in xrange(3):
                sleep(0.2)
                S0['Data'].readOPC()
                if S0['Data'].opcVarsDict["S0_tUpdate"].value not in [None, 0]:
                    self.S0_tUpdate = S0['Data'].opcVarsDict["S0_tUpdate"].value
                    S0['State'] = 'Done'
                    break
            else:
                # only if the for loop is not stopped with break (no usable value found)
                if S0['Data'].opcVarsDict["S0_tUpdate"].isProblem():
                    S0['Data'].opcVarsDict["S0_tUpdate"]._reset()
                S0['State'] = 'Running'

        # Initialize the MPC OPC data objects.
        if mem["MPCData"]['State'] == None \
           and self.configJob.isConfigRead():
            # Basic dynamic System variables needed for MPC as input variables
            self.MPCData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"])
            self.MPCData.logger = self.logger
            # All MPC related output variables
            Variables = {'OPC_Group':'MPCOutVariables' }
            Variables.update(GPC_OutVars)
            self.MPCOutData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"],
                                          variables = Variables)
            self.MPCOutData.logger = self.logger
            self.MPCOutData.readOPC() # Need to read this variable ones because otherwise it will not be usable for writing.
            # All GPC/MPC State related variables
            Variables = {'OPC_Group':'GPCStateVariables' }
            Variables.update(GPC_StateVars)
            self.MPCStateData = AlgData_OPC(opcserver = self.config["Tree"]["Global"]["OPCServer"],
                                            variables = Variables)
            self.MPCStateData.logger = self.logger
            mem["MPCData"]['State'] = 'Done'

    def doInitRTrigParam(self):
        self.RTrig = ReadTrigger( S0_tUpdate=self.S0_tUpdate,
                                  opcserver=self.config["Tree"]["Global"]["OPCServer"],
                                  test=True)
        self.RTrig.setLogger(self.logger)
        self.RTrig.CTimeperiod = self.config["Tree"]["MPC_Opti"]["ControlTimeperiod"]

    def doUpdateRTrigParam(self,dt):
        """Update trigger detection parameter to get a more precise
           identification of the positive slope instance."""
        self.RTrig.updateTrigParam()
        self.logger.debug( "Trigger: lastDT=%s,DT=%s" % (self.RTrig.lastDT,self.RTrig.DT) )

    def doUpdateConfig(self,conf):
        self.config = dict(zip(("Tree","Valid"),conf))
        MPCmode = self.config["Tree"]["MPC"]["mode"]
        if isinstance(self.MPCAlgo['Active'], MPCAlgos.__dict__[MPCmode]):
            self.MPCAlgo['Active'].updateConf(self.config['Tree']["MPC_"+MPCmode])

    def doRTrigInit(self):
        self.RTrig.jobRuns = 0
        DT = self.RTrig.DT
        NextRT = self.RTrig.getNextRT().replace(tzinfo=None)
        max_runs = self.RTrig.getMaxRuns()
        #Debug-GSc: test max_runs = 4
        self.RTrig.job = self.sched.add_interval_job(self.jobRTrig,
                                    seconds = DT,
                                    start_date = NextRT,
                                    max_runs = max_runs,
                                    name = "ReadTrigger-Job")
        self.doRTrigMemory = {"Count":0,
                              "TrigDone":False}

    def doRTrigStop(self):
        if self.RTrig.job.compute_next_run_time(datetime.now()):
            # only un-schedule if the job is still scheduled otherwise scheduler error
            self.sched.unschedule_job(self.RTrig.job)
        self.RTrig.job = None

    def doWTrigInit(self):
        #for debug reasons: OPC-values are sometimes lost.
        self.logger.debug( "Log QSoll before trigger:" )
        self.MPCOutData.readOPC()
        if self.isNoOPCWriteTrigger():
            self.logger.debug( "WriteTrigger is asked not to be set." )
        else:
            self.WTrig = WriteTrigger(S0_tUpdate=self.S0_tUpdate)
            self.WTrig.setLogger(self.logger)
            self.WTrig.job = self.sched.add_interval_job(self.jobWTrig,
                                        seconds = self.WTrig.DT,
                                        start_date = datetime.now() + timedelta(seconds=0.5),
                                        max_runs = 2,
                                        name = "WriteTrigger-Job")
            #GSc-ToDo: start a job that sets and resets the trigger
            # use self.WTrig.process()
            # Should be called a maximum of 2x self.WTrig.maxRuns
            # but only until self.WTrig.state is in ('Reset' or some "Error")
            # "sched" seems not to be best as Setting process can take several runs (DT 1s)
            # and reseting the same but in between 10%S0_tUpdate needs to be waited.
    def doReadOPC(self):
        #GSc-ToDo: rework this first level checking. Here only completely infeasible situations should lead to "VarsError"
        #Get first state related information and check it
        evStr = self.MPCStateData.readOPC()
        if evStr == None:
            evStr = self.checkMPCData(self.MPCStateData)
        #If OK Get the MPC "In" information and check it
        if evStr == "VarsOK":
            evStr = self.MPCData.readOPC()
        if evStr == None:
            evStr = self.checkMPCData(self.MPCData)

        if isinstance(evStr, (list,tuple)):
            evt = dict(zip(("Type","Data"),evStr))
        else:
            evt = {"Type":evStr}
        self.eventDeque.append(evt)

    def doUpdateParam(self):
        #Only update here is GPC is OPC triggered
        if not self.RTrig.TrigOPC:
            return

        #Do update only if changed
        S0_tUpdate = self.MPCStateData.opcVarsDict["S0.S0_tUpdate"]
        S0_tUpdateDiff = S0_tUpdate.getDiff()
        if S0_tUpdateDiff != None and S0_tUpdateDiff.Diff[0] != 0:
            self.S0_tUpdate = S0_tUpdate.value
            self.RTrig.updateTrigParam(S0_tUpdate=self.S0_tUpdate)

    def doWriteOPCInit(self):
        #GSc-ToDo: Init write process
        self.logger.debug( "Init writeOPCVars process" )
        self.doWriteOPCMemory = {"Count":0,}

    def doWriteOPC(self):
        opcResult = self.MPCOutData.writeOPC(allStored=True, toOPC=True)
        if opcResult in [True, None, []]:
            self.eventHandler({"Type":"OPCWriteError",
                               "Data":"writeOPC returns: %s" % (opcResult)})
            return
        nbrW = len(opcResult)
        tfSuccess = [ri[1] == "Success" for ri in opcResult]
        if not all(tfSuccess):
            nbrErr = nbrW - sum(tfSuccess)
            if self.doWriteOPCMemory["Count"] > 3:
                self.logger.debug( "doWriteOPC after (%s) tries still %s un-successful opc-writeouts\n -> give-up" % \
                                   (self.doWriteOPCMemory["Count"],nbrErr))
                self.eventHandler({"Type":"OPCWriteError",
                                   "Data":"writeOPC returns: %s" % (opcResult)})
            else:
                self.logger.debug( "doWriteOPC (%s): %s un-successful opc-writeouts" % \
                                   (self.doWriteOPCMemory["Count"],nbrErr))
                sleep(0.5)
        else:
            self.logger.debug( "doWriteOPC (%s): ends successful" % \
                               (self.doWriteOPCMemory["Count"],) )
        self.doWriteOPCMemory["Count"] += 1


    def doCheckSysStates(self):
        #ToDo-GSc: check the on/off states of the GPC
        MPCSimu = self.isMPCSimu()
        if MPCSimu and self.getMPCSimuMode() in ['OPCReadOnly',]:
            self.eventDeque.append({"Type":"MPCInactif","Data":"OPCReadOnly Mode specified"})
            return

        # Check the life states of all configured actors

#        SysGPCState = getSysGPCState(self.MPCStateData.opcVarsDict)# Old Life/Autonom based approach
        SysGPCState = getSysGPCState_StMo(self.MPCStateData.opcVarsDict)
        if getattr(self, "SysGPCState", None):
            UpdatedBState = dict([(si,Statei) for si, Statei in SysGPCState.iteritems() if Statei != self.SysGPCState[si]])
        else:
            UpdatedBState = {}
        self.SysGPCState = SysGPCState

        #check the life states of actor S0 (SCADA system)
        if self.SysGPCState['S0'] == 'offline':
            self.eventHandler({"Type":"MPCImpossible","Data":"S0 is %s" % (self.SysGPCState['S0'],)})
            return
        elif self.SysGPCState['S0'] == 'maintenance':
            self.eventDeque.append({"Type":"MPCInactif","Data":"S0 Station is in maintenance"})
            return

        if all([zi in ['offline','maintenance'] for si,zi in self.SysGPCState.iteritems() if si not in ['S0','S99']]):
            self.eventDeque.append({"Type":"MPCInactif","Data":"There is NO Station controllable"})
            return
        else:
            BModeUdate = getSysBModeUpdate(self.MPCStateData.opcVarsDict)
            UpdatedBMode = dict([(si,bmi['Mode']) for si, bmi in BModeUdate.iteritems() if bmi['Update']])
            if self.MPCAlgo['Active'] != None and (UpdatedBState !={} or UpdatedBMode != {}):
                updateStruct = False
                algo = self.MPCAlgo['Active']
                if UpdatedBMode != {}:
                    res = algo.readBConfig(toUpdate=UpdatedBMode) #ToDo: check the return value (True False) if network configuration is usable.
                    if res != True:
                        interItem = {"Type":"MPCImpossible",
                                     "Data":"Error in reading the current basin configuration: %s" % (res,)}
                        self.eventHandler(interItem)
                        return
                    updateStruct = True
                MPCmode = self.config["Tree"]["MPC"]["mode"]
                C_Switch = algo.updateBasinConf(self.SysGPCState,updateStruct=updateStruct)
                self.handleStateSwitch(C_Switch)
            else:
                try:
                    MPCmode = self.config["Tree"]["MPC"]["mode"]
                    try:
                        if not isinstance(self.MPCAlgo['Active'], MPCAlgos.__dict__[MPCmode]):
                            algo = MPCAlgos.__dict__[MPCmode](self.config['Tree']["MPC_"+MPCmode],
                                                              sysVars=self.MPCData.opcVarsDict,
                                                              stateVars=self.MPCStateData.opcVarsDict,
                                                              outVars=self.MPCOutData.opcVarsDict)
                            C_Switch = algo.updateBasinConf(self.SysGPCState)
                            self.handleStateSwitch(C_Switch)
                            self.MPCAlgo['Active'] = algo
                    except KeyError as e:
                        interItem = {"Type":"MPCImpossible",
                                     "Data":"MPC-mode related class is missing. %s" % (e,)}
                        self.eventHandler(interItem)
                        return
                    except BaseException as e:
                        interItem = {"Type":"MPCImpossible",
                                     "Data":"Error during instantiation of the algo class: %s" % (e,)}
                        self.eventHandler(interItem)
                        return
                except KeyError as e:
                    interItem = {"Type":"MPCImpossible",
                                 "Data":"Error getting MPC-Mode specification: %s" % e}
                    self.eventHandler(interItem)
                    return

                #ToDo: Handle possible other control approaches that will run only as off-line control
                self.MPCAlgo['Inactive'] = []
                try:
                    for im in self.config["Tree"]["MPC"]["inactiveModes"]:
                        pass
                except:
                    pass


        #ToDo-GSc: integrate the AlgInernalSysFSM (S4, ...)
        # - init it in doInit
        # - process it here using the self.MPCData.S4_BZ

        self.logger.debug( "doCheckSysStates(): SysStates: %s; SysModes: %s" % (self.SysGPCState,BModeUdate) )

        #Check the SysGPCState again here as it may have changed due to Switching.
        #If here a basin is in controllable this means it is not controlled by GPC in this cycle.
        if all([zi in ['offline','maintenance','controllable'] for si,zi in self.SysGPCState.iteritems() if si not in ['S0','S99']]):
            self.eventDeque.append({"Type":"MPCInactif","Data":"There is NO Station configured for GPC control"})
        else:
            self.eventDeque.append({"Type":"MPCActive"})


    def doRunMPC(self):
        #Initialize the specified MPC mode class object.
        algo = self.MPCAlgo['Active']
        #run MPC
        try:
            algo.run(self.MPCData.opcVarsDict,
                     stateVars=self.MPCStateData.opcVarsDict,
                     outVars=self.MPCOutData.opcVarsDict)
            self.eventDeque.append({"Type":"MPCDone",})
        except BaseException as e:
            interItem = {"Type":"MPCImpossible",
                         "Data":"General MPC-Error: %s" % e}
            self.eventHandler(interItem)
            return

    def doLogMPCResults(self):
        #get the results and build a log entry
        res = [vi.wvalue for ki,vi in self.MPCOutData.opcVarsDict.iteritems() if ki.endswith('QSoll') and vi.isWReady()]
        self.logger.debug("MPC Results: %s" % (res,))

    def doResetWriteVars(self):
        for ki,vi in self.MPCOutData.opcVarsDict.items() + self.MPCStateData.opcVarsDict.items():
            if vi.isWReady():
                vi._reset()

    def doWarning(self,msg):
        pass

    def doSetGPCOffline(self):
        self.logger.debug("""====== GPC is Offline ======
The GPC: is now in Offline mode.
Only a 'Reset'-Event or a complete GSP-restart are possible in this System state.
============================""")
        if self.isMPCSimu():
            try:
                DT = self.S0_tUpdate - 2*self.RTrig.gitter
                DT -= self.S0_tUpdate / self.RTrig.TrigSizePct # This is the sleep time in GPCOffline mode.
            except:
                DT = 900
            #Debug-GSc: test DT = 40
            self.sched.add_date_job( self.jobReset,
                                     date = datetime.now() + timedelta(seconds=DT),
                                     name = "Reset-Job" )
            self.logger.debug("""====!! GPC auto-Reset !!====
The GPC: will be automatically reset at %s
============================""" % (DT,))


    def isInitDone(self):
        #Check all doInitMemory entries for their "State" status
        state = [si['State'] == 'Done' for si in self.doInitMemory.itervalues() if isinstance(si, dict) and si.has_key('State')]
        return all(state)

    def isNotSync(self):
        return not self.RTrig.isSync()

    def isNoOPCWrite(self):
        if not self.isMPCSimu():
            return False
        elif self.getMPCSimuMode() in [None,'NoOPCWrite']:
            return True
        return False

    def isNoOPCWriteTrigger(self):
        if not self.isMPCSimu():
            return False
        elif self.getMPCSimuMode() in [None,'NoOPCWrite','NoOPCWriteTrigger']:
            return True
        return False

    def isMPCSimu(self):
        try:
            MPCSimu = self.config['Tree']['MPC']['simu']
        except:
            MPCSimu = True
        if MPCSimu:
            return True
        return False
    def isOPCWriteOK(self):
        if self.MPCOutData.isWAllIdle():
            self.logger.debug("isOPCWriteOK == True")
            return True
        self.logger.debug("isOPCWriteOK == False")
        return False
    def isOPCWriteError(self):
        if self.MPCOutData.isWAnyProblem():
            self.logger.debug("isOPCWriteError == True")
            return True
        self.logger.debug("isOPCWriteError == False")
        return False

    def logIgnored(self):
        self._fsm.getDebugStream().write("The latest asked transition was ignored by the StateMashine.")

    def jobRTrig(self):
        j = self.RTrig.job
        self.doRTrigMemory['Count'] += 1
        if self.RTrig.getRTrigJob():
            self.doRTrigMemory['TrigDone'] = True
            self.doRTrigMemory['Event'] = {"Type":"TrigOK", "Data":self.RTrig.lastT}
        else:
            if not j.compute_next_run_time(datetime.now()):
                self.doRTrigMemory['TrigDone'] = True
                self.doRTrigMemory['Event'] = {"Type":"TrigError",
                             "Data":"%s: no next fire time scheduled" % j.name}

    def jobWTrig(self):
        if self.WTrig.isInProcess():
            cSatate = self.WTrig.state
            while self.WTrig.state == cSatate:
                self.WTrig.process()
                if self.WTrig.state == cSatate:
                    sleep(1)
        #for debug reasons: OPC-values are sometimes lost.
        if not self.WTrig.isInProcess():
            self.logger.debug( "Log QSoll after trigger:" )
            self.MPCOutData.readOPC()

        if self.WTrig.isJobAlife() and not self.WTrig.isInProcess():
            self.sched.unschedule_job(self.WTrig.job)
            self.WTrig.job = None
            self.logger.debug("jobWTrigError: job unscheduled due to probable Error WTrig process")
            #GSC-ToDo: this is not a correct solution because it can leave the system in an incoherent state.

    def jobReset(self):
        interItem = {"Type":"Reset"}
        self.eventHandler(interItem)

    def checkInitSleep(self):
        if self.isInitDone():
            return False
        elif self.doInitMemory["Count"] == 0:
            return False
        elif self.doInitMemory["Count"] % 3 == 0:
            return True
        else:
            return False

    def checkMPCData(self,MPCData):
        #ToDo: Check if all variables of stations that are not "offline" are usable.
        for k,v in MPCData.opcVarsDict.iteritems():
            if not v.usable:
                return ("VarsError","%s: is not usable"%(k,))
        return "VarsOK"
    def getMPCSimuMode(self):
        try:
            MPCSimuMode = self.config['Tree']['MPC']['simuMode']
        except:
            return None
        return MPCSimuMode
    def getFSMState(self):
        if not self._fsm.isInTransition():
            cState = self._fsm.getState().getName()
            FSMState = "%s" % (cState,)
            ret = {'Trans':None,'State':cState,'Msg':FSMState}
        else:
            trans = self._fsm.getTransition()
            pState = self._fsm.getPreviousState().getName()
            FSMState = "In Transition: %s from %s" %(trans,pState)
            ret = {'Trans':trans,'State':pState,'Msg':FSMState}

        return ret

    def handleStateSwitch(self,C_Switch):
        """Handles the switch from Mode 1 (C-abl) -> 2 (C) or 2 (C) -> 1 (C-abl)
        """
        for sti in C_Switch.get('C-abl -> C',[]):
            vi = "%s.%s_SteuerModus" % (sti,sti)
            self.MPCStateData.opcVarsDict[vi].setWriteValue(6)
            self.SysGPCState[sti] = 'controlled'
        for sti in C_Switch.get('C -> C-abl',[]):
            vi = "%s.%s_SteuerModus" % (sti,sti)
            self.MPCStateData.opcVarsDict[vi].setWriteValue(5)
            self.SysGPCState[sti] = 'controllable'
        if not self.isNoOPCWrite() and len(C_Switch) > 0:
            opcResult = self.MPCStateData.writeOPC(allStored=True, toOPC=True)
            if opcResult in [True, None, []]:
                self.logger.debug( "Basin StateSwitch error: writeOPC returns: %s" % (opcResult))
            else:
                nbrW = len(opcResult)
                tfSuccess = [ri[1] == "Success" for ri in opcResult]
                if not all(tfSuccess):
                    nbrErr = nbrW - sum(tfSuccess)
                    self.logger.debug( "Basin StateSwitch error: writeOPC %s un-successful opc-writeouts" % \
                                       (nbrErr,))
                else:
                    self.logger.debug( "Basin StateSwitch: %s; writeOPC: %s" % (C_Switch, opcResult,))
        elif self.isNoOPCWrite() and len(C_Switch) > 0:
            self.logger.debug( "Basin StateSwitch: %s but NoOPCWrite is active" % (C_Switch,) )

    def eventHandler(self,evt):
        evtStr = evt["Type"]
        if evt.has_key("Data"):
            evtStr = ';'.join((evtStr,str(evt['Data'])))
        self._fsm.getDebugStream().write("#%s (%s)\n" % (evtStr,datetime.now()))

        if evt['Type'] == "DoInit":
            self._fsm.InitDone()
        elif evt['Type'] == "InitError":
            self._fsm.InitError(evt['Data'])
        elif evt['Type'] == "TrigOK":
            self._fsm.TrigOK(evt['Data'])
        elif evt['Type'] == "TrigError":
            self._fsm.TrigError(evt['Data'])
        elif evt['Type'] == "VarsOK":
            self._fsm.VarsOK()
        elif evt['Type'] == "VarsError":
            self._fsm.VarsError()
        elif evt['Type'] == "MPCActive":
            self._fsm.MPCActive()
        elif evt['Type'] == "MPCInactif":
            self._fsm.MPCInactif()
        elif evt['Type'] == "MPCImpossible":
            self._fsm.MPCImpossible(evt['Data'])
        elif evt['Type'] == "MPCDone":
            self._fsm.MPCDone()
        elif evt['Type'] == "OPCWrite":
            self._fsm.OPCWrite(evt.get('Data',None))
        elif evt['Type'] == "Reset":
            self._fsm.Reset()
        elif evt['Type'] == "Stop":
            sys.exit(0)
        else:
            raise ValueError("Unhandled Event type: %s" % evt)
class AlertSchedulerHandler():
  FILENAME = 'definitions.json'
  TYPE_PORT = 'PORT'
  TYPE_METRIC = 'METRIC'
  TYPE_SCRIPT = 'SCRIPT'
  TYPE_WEB = 'WEB'

  APS_CONFIG = { 
    'threadpool.core_threads': 3,
    'coalesce': True,
    'standalone': False
  }

  def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir,
      cluster_configuration, config, in_minutes=True):

    self.cachedir = cachedir
    self.stacks_dir = stacks_dir
    self.common_services_dir = common_services_dir
    self.host_scripts_dir = host_scripts_dir

    self._cluster_configuration = cluster_configuration
    
    if not os.path.exists(cachedir):
      try:
        os.makedirs(cachedir)
      except:
        logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir))

    self._collector = AlertCollector()
    self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
    self.__in_minutes = in_minutes
    self.config = config

    # register python exit handler
    atexit.register(self.exit_handler)


  def exit_handler(self):
    """
    Exit handler
    """
    self.stop()


  def update_definitions(self, heartbeat):
    """
    Updates the persisted alert definitions JSON.
    :param heartbeat:
    :return:
    """
    if 'alertDefinitionCommands' not in heartbeat:
      logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions")
      return

    # prune out things we don't want to store
    alert_definitions = []
    for command in heartbeat['alertDefinitionCommands']:
      command_copy = command.copy()

      # no need to store these since we always use the in-memory cached values
      if 'configurations' in command_copy:
        del command_copy['configurations']

      alert_definitions.append(command_copy)

    # write out the new definitions
    with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f:
      json.dump(alert_definitions, f, indent=2)

    # reschedule only the jobs that have changed
    self.reschedule()


  def __make_function(self, alert_def):
    return lambda: alert_def.collect()


  def start(self):
    """ loads definitions from file and starts the scheduler """

    if self.__scheduler is None:
      return

    if self.__scheduler.running:
      self.__scheduler.shutdown(wait=False)
      self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)

    alert_callables = self.__load_definitions()

    # schedule each definition
    for _callable in alert_callables:
      self.schedule_definition(_callable)

    logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format(
      str(self.__scheduler), str(self.__scheduler.running)))

    self.__scheduler.start()


  def stop(self):
    if not self.__scheduler is None:
      self.__scheduler.shutdown(wait=False)
      self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)

    logger.info("[AlertScheduler] Stopped the alert scheduler.")

  def reschedule(self):
    """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
    jobs_scheduled = 0
    jobs_removed = 0

    definitions = self.__load_definitions()
    scheduled_jobs = self.__scheduler.get_jobs()

    # for every scheduled job, see if its UUID is still valid
    for scheduled_job in scheduled_jobs:
      uuid_valid = False

      for definition in definitions:
        definition_uuid = definition.get_uuid()
        if scheduled_job.name == definition_uuid:
          uuid_valid = True
          break

      # jobs without valid UUIDs should be unscheduled
      if uuid_valid == False:
        jobs_removed += 1
        logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name))
        self._collector.remove_by_uuid(scheduled_job.name)
        self.__scheduler.unschedule_job(scheduled_job)

    # for every definition, determine if there is a scheduled job
    for definition in definitions:
      definition_scheduled = False
      for scheduled_job in scheduled_jobs:
        definition_uuid = definition.get_uuid()
        if definition_uuid == scheduled_job.name:
          definition_scheduled = True
          break

      # if no jobs are found with the definitions UUID, schedule it
      if definition_scheduled == False:
        jobs_scheduled += 1
        self.schedule_definition(definition)

    logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format(
        str(jobs_scheduled), str(jobs_removed)))


  def reschedule_all(self):
    """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
    jobs_scheduled = 0
    jobs_removed = 0

    definitions = self.__load_definitions()
    scheduled_jobs = self.__scheduler.get_jobs()

    # unschedule all scheduled jobs
    for scheduled_job in scheduled_jobs:
        jobs_removed += 1
        logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name))
        self._collector.remove_by_uuid(scheduled_job.name)
        self.__scheduler.unschedule_job(scheduled_job)

    # for every definition, schedule a job
    for definition in definitions:
        jobs_scheduled += 1
        self.schedule_definition(definition)

    logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format(
      str(jobs_scheduled), str(jobs_removed)))


  def collector(self):
    """ gets the collector for reporting to the server """
    return self._collector


  def __load_definitions(self):
    """
    Loads all alert definitions from a file. All clusters are stored in
    a single file.
    :return:
    """
    definitions = []

    all_commands = None
    alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME)
    try:
      with open(alerts_definitions_path) as fp:
        all_commands = json.load(fp)
    except:
      logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path))
      return definitions

    for command_json in all_commands:
      clusterName = '' if not 'clusterName' in command_json else command_json['clusterName']
      hostName = '' if not 'hostName' in command_json else command_json['hostName']

      for definition in command_json['alertDefinitions']:
        alert = self.__json_to_callable(clusterName, hostName, definition)

        if alert is None:
          continue

        alert.set_helpers(self._collector, self._cluster_configuration)

        definitions.append(alert)

    return definitions


  def __json_to_callable(self, clusterName, hostName, json_definition):
    """
    converts the json that represents all aspects of a definition
    and makes an object that extends BaseAlert that is used for individual
    """
    source = json_definition['source']
    source_type = source.get('type', '')

    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition)))

    alert = None

    if source_type == AlertSchedulerHandler.TYPE_METRIC:
      alert = MetricAlert(json_definition, source)
    elif source_type == AlertSchedulerHandler.TYPE_PORT:
      alert = PortAlert(json_definition, source)
    elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
      source['stacks_directory'] = self.stacks_dir
      source['common_services_directory'] = self.common_services_dir
      source['host_scripts_directory'] = self.host_scripts_dir
      alert = ScriptAlert(json_definition, source, self.config)
    elif source_type == AlertSchedulerHandler.TYPE_WEB:
      alert = WebAlert(json_definition, source, self.config)

    if alert is not None:
      alert.set_cluster(clusterName, hostName)

    return alert


  def schedule_definition(self,definition):
    """
    Schedule a definition (callable). Scheduled jobs are given the UUID
    as their name so that they can be identified later on.
    <p/>
    This function can be called with a definition that is disabled; it will
    simply NOOP.
    """
    # NOOP if the definition is disabled; don't schedule it
    if not definition.is_enabled():
      logger.info("[AlertScheduler] The alert {0} with UUID {1} is disabled and will not be scheduled".format(
          definition.get_name(),definition.get_uuid()))
      return

    job = None

    if self.__in_minutes:
      job = self.__scheduler.add_interval_job(self.__make_function(definition),
        minutes=definition.interval())
    else:
      job = self.__scheduler.add_interval_job(self.__make_function(definition),
        seconds=definition.interval())

    # although the documentation states that Job(kwargs) takes a name
    # key/value pair, it does not actually set the name; do it manually
    if job is not None:
      job.name = definition.get_uuid()

    logger.info("[AlertScheduler] Scheduling {0} with UUID {1}".format(
      definition.get_name(), definition.get_uuid()))


  def get_job_count(self):
    """
    Gets the number of jobs currently scheduled. This is mainly used for
    test verification of scheduling.
    """
    if self.__scheduler is None:
      return 0

    return len(self.__scheduler.get_jobs())


  def execute_alert(self, execution_commands):
    """
    Executes an alert immediately, ignoring any scheduled jobs. The existing
    jobs remain untouched. The result of this is stored in the alert
    collector for tranmission during the next heartbeat
    """
    if self.__scheduler is None or execution_commands is None:
      return

    for execution_command in execution_commands:
      try:
        alert_definition = execution_command['alertDefinition']

        clusterName = '' if not 'clusterName' in execution_command else execution_command['clusterName']
        hostName = '' if not 'hostName' in execution_command else execution_command['hostName']

        alert = self.__json_to_callable(clusterName, hostName, alert_definition)

        if alert is None:
          continue

        logger.info("[AlertScheduler] Executing on-demand alert {0} ({1})".format(alert.get_name(),
            alert.get_uuid()))

        alert.set_helpers(self._collector, self._cluster_configuration)
        alert.collect()
      except:
        logger.exception("[AlertScheduler] Unable to execute the alert outside of the job scheduler")
class TrainScheduler(object):
 
	def __init__(self):
		logging.basicConfig(level=logging.DEBUG, filename="debug.log", format='%(asctime)s %(levelname)-8s %(message)s', datefmt="%d.%m.%Y %H:%M:%S")

		self.scheduler = Scheduler()
		self.scheduler.add_listener(self.checkForDuplicates, apscheduler.events.EVENT_JOBSTORE_JOB_ADDED)
		self.scheduler.start()

		if len(self.scheduler.get_jobs()) == 0:
			self.createInitSchedule()

		self.log("Initial tasks completed. Waiting for next event..")

		while True:
			try:
				time.sleep(10)
				#self.scheduler.print_jobs()

			except KeyboardInterrupt:
				self.log("Shutting down..")
				self.scheduler.shutdown()
				quit()


	def createInitSchedule(self):

		self.log("Perform initial query for passenger trains..")
		self.processPassenger()
		self.log("Perform initial query for freight trains..")
		self.processFreight()
		self.log("Perform initial query for auto trains..")
		self.processAutotrain()

		self.log("Creating initial train schedule..")
		
		# request passenger trains every hour
		self.scheduler.add_cron_job(self.processPassenger, hour="*/1", minute="0", day="*", month="*", year="*")
		# request freight trains every day
		self.scheduler.add_cron_job(self.processFreight, hour="0", minute="2", day="*", month="*", year="*")
		# request auto trains every month
		self.scheduler.add_cron_job(self.processAutotrain, hour="0", minute="5", day="1", month="*", year="*")


	def processPassenger(self):
		# return trains for station in question
		tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID)
	 
		for train in tReq.getTrainList():
			trainTime = train.actualTime if (train.actualTime) else train.scheduledTime
			trainTimeCheck = trainTime - datetime.timedelta(minutes=CHECKBEFORE)
						
			try:
				self.scheduler.add_date_job(self.checkIfOnTime, trainTimeCheck, args=[train], name=train.name)
				self.log("Schedule passenger train '%s' to be checked on %s." % (train.name, trainTimeCheck))

			except ValueError:
				try:
					self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name)
					self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime))

				except ValueError:
					self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime))

	def checkIfOnTime(self, remTrain):
		# return trains for station in question
		tReq = passenger.PassengerTrainRequest(PASSENGER_STATION_ID)
	 
		for train in tReq.getTrainList():
			if remTrain.name == train.name:
				trainTime = train.actualTime if (train.actualTime) else train.scheduledTime
				try:
					self.scheduler.add_date_job(self.output, trainTime, args=[train], name=train.name)
					self.log("Schedule passenger train '%s' to be displayed on %s." % (train.name, trainTime))

				except ValueError:
					self.log("Passenger train '%s' (%s) already passed by." % (train.name, trainTime))
				break


	def processFreight(self):
		# return trains for station in question
		freightTrains = freight.FreightTrainRequest(FREIGHT_STATION_ID)
 
		for train in freightTrains.getTrainList():
			# FIXME: only arrival atm
			if train.arrival > datetime.datetime.now():
				self.log("Schedule freight train '%s' to be displayed on %s." % (train.name, train.arrival))
				self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name)
			else:
				self.log("Freight train '%s' (%s) already passed." % (train.name, train.arrival))


	def processAutotrain(self):
		# return trains for station in question
		freightTrains = autotrain.AutoTrainRequest(AUTO_TRAIN_STATION_NAME)
	 
		for train in freightTrains.getTrainList():
			if train.arrival > datetime.datetime.now():
				self.log("Schedule auto train '%s' to be displayed on %s." % (train.name, train.arrival))
				self.scheduler.add_date_job(self.output, train.arrival, args=[train], name=train.name)
			else:
				self.log("Auto train '%s' (%s) already passed." % (train.name, train.arrival))

	def checkForDuplicates(self, event):
		jobs = self.scheduler.get_jobs()

		if jobs:
			# events with the same name (train name) and the next "next run time" are duplicates
			dups = [job for job in jobs if job.name == event.job.name and job.next_run_time == event.job.next_run_time]
			if len(dups) > 1:
				self.log("Unscheduling %s." % event.job)
				self.scheduler.unschedule_job(event.job)


	def output(self, train):
		self.log("OUTPUT: %s" % train)
		f = open(OUTPUT_FILE, "a")
		f.write("%s\n" % train)
		f.close()


	def log(self, message):
		logging.info("* %s" % message)
Exemple #29
0
class ProgramHandler:
    def __init__(self, radio_station):
        self.__radio_station = radio_station
        self.__scheduler = None
        self.__scheduled_jobs = None
        self.__start_listeners()
        self.__is_starting_up = True
        self.__interval_hours = 3  # Time after which to schedule again
        self.__radio_station.logger.info(
            "Done initialising ProgramHandler for {0}".format(
                radio_station.station.name))

    def run(self):
        self.run_current_schedule()
        self.__is_starting_up = False

    def __prepare_schedule(self):
        self.__load_programs()
        self.__scheduler = Scheduler(timezone=pytz.utc)
        self.__scheduled_jobs = dict()

    def run_current_schedule(self):
        self.__prepare_schedule()
        self.__scheduler.start()
        self.__schedule_programs()
        #self.__schedule_next_schedule()

    def stop(self):
        self.__stop_program()
        # any clean up goes here
        # unschedule stuff

    def __schedule_next_schedule(self):
        base_date = datetime.now()
        next_schedule_date = base_date + timedelta(
            0, 0, 0, 0, 0, self.__interval_hours)  # 3 hours
        self.__scheduler.add_date_job(getattr(self, 'run_current_schedule'),
                                      next_schedule_date)

    def __schedule_programs(self):
        for scheduled_program in self.__scheduled_programs:
            if not self.__is_program_expired(scheduled_program):
                self.__add_scheduled_job(scheduled_program)
                self.__radio_station.logger.info(
                    "Scheduled program {0} for station {1} starting at {2}".
                    format(scheduled_program.program.name,
                           self.__radio_station.station.name,
                           scheduled_program.start))

    def __add_scheduled_job(self, scheduled_program):
        start_time = self.__get_program_start_time(scheduled_program).replace(
            tzinfo=None)
        program = RadioProgram(scheduled_program, self.__radio_station)
        try:
            scheduled_job = self.__scheduler.add_date_job(
                getattr(program, 'start'), start_time)
            self.__scheduled_jobs[scheduled_program.id] = scheduled_job
        except Exception as e:
            self.__radio_station.logger.error(
                "Error {err} in __add_scheduled_job".format(err=e.message))

    def __delete_scheduled_job(self, index):
        if not self.__scheduled_jobs:
            self.__radio_station.logger.warning(
                "Failed to delete job (no jobs are scheduled)")
            return

        if index in self.__scheduled_jobs:
            try:
                self.__scheduler.unschedule_job(self.__scheduled_jobs[index])
            except:
                # The job probably ran already
                self.__radio_station.logger.warning(
                    "Failed to remove unscheduled job #{}".format(index))
            del self.__scheduled_jobs[index]

    def __stop_program(self):
        # self.__running_program.stop()
        return

    def __run_program(self):
        # self.__running_program.run()
        return

    def __load_programs(self):
        timezone = self.__radio_station.station.timezone
        #if self.__is_starting_up:
        date_filter = "((date(start) = date(now())) or (start < now() and radio_scheduledprogram.end > now()))"
        #else:
        #   date_filter = "(start >= now() at time zone '{tz}' and start < now() at time zone '{tz}' + interval '{interval} hour')".format(
        #        tz=timezone, interval=self.__interval_hours)
        query = self.__radio_station.db.query(ScheduledProgram).filter(
            ScheduledProgram.station_id ==
            self.__radio_station.station.id).filter(
                text(date_filter)).filter(ScheduledProgram.deleted == False)
        self.__scheduled_programs = query.all()
        self.__radio_station.logger.info("Loaded {1} programs for {0}".format(
            self.__radio_station.station.name, len(self.__scheduled_programs)))

    def __load_program(self, program_id):
        return self.__radio_station.db.query(ScheduledProgram).filter(
            ScheduledProgram.id == program_id).first()

    def __start_listeners(self):
        t = threading.Thread(target=self.__listen_for_scheduling_changes,
                             args=(DefaultConfig.SCHEDULE_EVENTS_SERVER_IP,
                                   DefaultConfig.SCHEDULE_EVENTS_SERVER_PORT))
        t.start()

    def __listen_for_scheduling_changes(self, ip, port):
        sck = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        addr = (ip, port)

        # It may not be possible to connect after restart, TIME_WAIT could come into play etc. Anyway, keep trying
        connected = False
        while not connected:
            try:
                sck.connect(addr)
                connected = True
            except:
                self.__radio_station.logger.warning(
                    "[Station #{}] Could not connect to server, retrying in 30..."
                    .format(self.__radio_station.id))
                sleep(30)
        sck.send(
            json.dumps({
                'station': self.__radio_station.station.id,
                'action': 'register'
            }))

        while True:
            data = sck.recv(10240000)
            try:
                event = json.loads(data)
            except ValueError as e:
                continue
            if "action" in event and "id" in event:
                if event["action"] == "delete":
                    self.__delete_scheduled_job(event["id"])
                    self.__radio_station.logger.info(
                        "Scheduled program with id {0} has been deleted".
                        format(event["id"]))
                elif event["action"] == "add":
                    scheduled_program = self.__load_program(event["id"])
                    if not self.__is_program_expired(scheduled_program):
                        self.__add_scheduled_job(scheduled_program)
                        self.__radio_station.logger.info(
                            "Scheduled program with id {0} has been added at time {1}"
                            .format(event["id"], scheduled_program.start))
                elif event["action"] == "update":
                    self.__delete_scheduled_job(event["id"])
                    scheduled_program = self.__load_program(event["id"])
                    if not self.__is_program_expired(scheduled_program):
                        self.__add_scheduled_job(scheduled_program)
                        self.__radio_station.logger.info(
                            "Scheduled program with id {0} has been moved to start at time {1}"
                            .format(event["id"], scheduled_program.start))
                elif event["action"] == "sync":
                    #self.__radio_station.logger.info("Syncing music for station {0}".format(event["id"]))
                    t = threading.Thread(target=self.__process_music_data,
                                         args=(event["id"],
                                               event["music_data"]))
                    t.start()

    def __get_dict_from_rows(self, rows):
        result = dict()
        for row in rows:
            result[row.title] = row
        return result

    def __process_music_data(self, station_id, json_string):
        songs_in_db = self.__get_dict_from_rows(
            self.__radio_station.db.query(ContentMusic).filter(
                ContentMusic.station_id == station_id).all())
        artists_in_db = self.__get_dict_from_rows(
            self.__radio_station.db.query(ContentMusicArtist).filter(
                ContentMusicArtist.station_id == station_id).all())
        albums_in_db = self.__get_dict_from_rows(
            self.__radio_station.db.query(ContentMusicAlbum).filter(
                ContentMusicAlbum.station_id == station_id).all())

        data = json.loads(json_string)
        for artist in data:
            if artist in artists_in_db:
                music_artist = artists_in_db[artist]
            else:
                # persist the artist
                music_artist = ContentMusicArtist(**{
                    'title': artist,
                    'station_id': station_id
                })
                artists_in_db[artist] = music_artist
                self.__radio_station.db.add(music_artist)
                try:
                    self.__radio_station.db._model_changes = {}
                    self.__radio_station.db.commit()
                except DatabaseError:
                    self.__radio_station.db.rollback()
                    continue

            for album in data[artist]:
                if album in albums_in_db:
                    music_album = albums_in_db[album]
                else:
                    # persist the album
                    music_album = ContentMusicAlbum(**{
                        'title': album,
                        'station_id': station_id
                    })
                    albums_in_db[album] = music_album
                    self.__radio_station.db.add(music_album)
                    try:
                        self.__radio_station.db._model_changes = {}
                        self.__radio_station.db.commit()
                    except DatabaseError:
                        self.__radio_station.db.rollback()
                        continue

                for song in data[artist][album]['songs']:
                    if song['title'] in songs_in_db:
                        music_song = songs_in_db[song['title']]
                    else:
                        music_song = ContentMusic(
                            **{
                                'title': song['title'],
                                'duration': song['duration'],
                                'station_id': station_id,
                                'album_id': music_album.id,
                                'artist_id': music_artist.id
                            })
                        songs_in_db[song['title']] = music_song
                        self.__radio_station.db.add(music_song)
                    try:
                        self.__radio_station.db._model_changes = {}
                        self.__radio_station.db.commit()
                    except DatabaseError:
                        self.__radio_station.db.rollback()
                        continue

    """
    Gets the program to run from the current list of programs that are lined up for the day
    """

    def __get_current_program(self):
        for program in self.__scheduled_programs:
            if not self.__is_program_expired(program):
                return program

    """
    Returns whether or not the time for a particular program has passed
    """

    def __is_program_expired(self, scheduled_program):
        now = arrow.utcnow()
        return (scheduled_program.start_utc +
                scheduled_program.program.duration) < (now +
                                                       timedelta(minutes=1))

    def __get_program_start_time(self, scheduled_program):
        now = arrow.utcnow().datetime
        if scheduled_program.start_utc < now:  # Time at which program begins is already past
            return now + timedelta(seconds=5)  # 5 second scheduling allowance
        else:
            return scheduled_program.start_utc + timedelta(
                seconds=5)  # 5 second scheduling allowance
Exemple #30
0
class AlertSchedulerHandler():
    FILENAME = 'definitions.json'
    TYPE_PORT = 'PORT'
    TYPE_METRIC = 'METRIC'
    TYPE_AMS = 'AMS'
    TYPE_SCRIPT = 'SCRIPT'
    TYPE_WEB = 'WEB'
    TYPE_RECOVERY = 'RECOVERY'

    def __init__(self,
                 cachedir,
                 stacks_dir,
                 common_services_dir,
                 extensions_dir,
                 host_scripts_dir,
                 cluster_configuration,
                 config,
                 recovery_manager,
                 in_minutes=True):

        self.cachedir = cachedir
        self.stacks_dir = stacks_dir
        self.common_services_dir = common_services_dir
        self.extensions_dir = extensions_dir
        self.host_scripts_dir = host_scripts_dir

        self._cluster_configuration = cluster_configuration

        # a mapping between a cluster name and a unique hash for all definitions
        self._cluster_hashes = {}

        # the amount of time, in seconds, that an alert can run after it's scheduled time
        alert_grace_period = int(config.get('agent', 'alert_grace_period', 5))

        if not os.path.exists(cachedir):
            try:
                os.makedirs(cachedir)
            except:
                logger.critical(
                    "[AlertScheduler] Could not create the cache directory {0}"
                    .format(cachedir))

        apscheduler_standalone = False

        self.APS_CONFIG = {
            'apscheduler.threadpool.core_threads':
            3,
            'apscheduler.coalesce':
            True,
            'apscheduler.standalone':
            apscheduler_standalone,
            'apscheduler.misfire_grace_time':
            alert_grace_period,
            'apscheduler.threadpool.context_injector':
            self._job_context_injector if not apscheduler_standalone else None,
            'apscheduler.threadpool.agent_config':
            config
        }

        self._collector = AlertCollector()
        self.__scheduler = Scheduler(self.APS_CONFIG)
        self.__in_minutes = in_minutes
        self.config = config
        self.recovery_manger = recovery_manager

        # register python exit handler
        ExitHelper().register(self.exit_handler)

    def _job_context_injector(self, config):
        """
    apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running
    in embedded mode

    Please note, this function called in job context thus all injects should be time-running optimized

    :type config AmbariConfig.AmbariConfig
    """
        if not config.use_system_proxy_setting():
            from ambari_commons.network import reconfigure_urllib2_opener
            reconfigure_urllib2_opener(ignore_system_proxy=True)

    def exit_handler(self):
        """
    Exit handler
    """
        self.stop()

    def update_definitions(self, heartbeat):
        """
    Updates the persisted alert definitions JSON.
    :param heartbeat:
    :return:
    """
        if 'alertDefinitionCommands' not in heartbeat:
            logger.warning(
                "There are no alert definition commands in the heartbeat; unable to update definitions"
            )
            return

        # prune out things we don't want to store
        alert_definitions = []
        for command in heartbeat['alertDefinitionCommands']:
            command_copy = command.copy()

            # no need to store these since we always use the in-memory cached values
            if 'configurations' in command_copy:
                del command_copy['configurations']

            alert_definitions.append(command_copy)

        # write out the new definitions
        with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f:
            json.dump(alert_definitions, f, indent=2)

        # determine how to reschedule the jobs
        reschedule_all = False
        if "clusterName" in command_copy and command_copy[
                "clusterName"] not in self._cluster_hashes:
            reschedule_all = True

        if reschedule_all is True:
            # reschedule all jobs, creating new instances
            self.reschedule_all()
        else:
            # reschedule only the jobs that have changed
            self.reschedule()

    def __make_function(self, alert_def):
        return lambda: alert_def.collect()

    def start(self):
        """ loads definitions from file and starts the scheduler """

        if self.__scheduler is None:
            return

        if self.__scheduler.running:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(self.APS_CONFIG)

        alert_callables = self.__load_definitions()

        # schedule each definition
        for _callable in alert_callables:
            self.schedule_definition(_callable)

        logger.info(
            "[AlertScheduler] Starting {0}; currently running: {1}".format(
                str(self.__scheduler), str(self.__scheduler.running)))

        self.__scheduler.start()

    def stop(self):
        if not self.__scheduler is None:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(self.APS_CONFIG)

        logger.info("[AlertScheduler] Stopped the alert scheduler.")

    def reschedule(self):
        """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
        jobs_scheduled = 0
        jobs_removed = 0

        definitions = self.__load_definitions()
        scheduled_jobs = self.__scheduler.get_jobs()

        # for every scheduled job, see if its UUID is still valid
        for scheduled_job in scheduled_jobs:
            uuid_valid = False

            for definition in definitions:
                definition_uuid = definition.get_uuid()
                if scheduled_job.name == definition_uuid:
                    uuid_valid = True
                    break

            # jobs without valid UUIDs should be unscheduled
            if uuid_valid is False:
                jobs_removed += 1
                logger.info("[AlertScheduler] Unscheduling {0}".format(
                    scheduled_job.name))
                self._collector.remove_by_uuid(scheduled_job.name)
                self.__scheduler.unschedule_job(scheduled_job)

        # for every definition, determine if there is a scheduled job
        for definition in definitions:
            definition_scheduled = False
            for scheduled_job in scheduled_jobs:
                definition_uuid = definition.get_uuid()
                if definition_uuid == scheduled_job.name:
                    definition_scheduled = True
                    break

            # if no jobs are found with the definitions UUID, schedule it
            if definition_scheduled is False:
                jobs_scheduled += 1
                self.schedule_definition(definition)

        logger.info(
            "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled"
            .format(str(jobs_scheduled), str(jobs_removed)))

    def reschedule_all(self):
        """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
        logger.info("[AlertScheduler] Rescheduling all jobs...")

        jobs_scheduled = 0
        jobs_removed = 0

        definitions = self.__load_definitions()
        scheduled_jobs = self.__scheduler.get_jobs()

        # unschedule all scheduled jobs
        for scheduled_job in scheduled_jobs:
            jobs_removed += 1
            logger.info("[AlertScheduler] Unscheduling {0}".format(
                scheduled_job.name))
            self._collector.remove_by_uuid(scheduled_job.name)
            self.__scheduler.unschedule_job(scheduled_job)

        # for every definition, schedule a job
        for definition in definitions:
            jobs_scheduled += 1
            self.schedule_definition(definition)

        logger.info(
            "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled"
            .format(str(jobs_removed), str(jobs_scheduled)))

    def collector(self):
        """ gets the collector for reporting to the server """
        return self._collector

    def __load_definitions(self):
        """
    Loads all alert definitions from a file. All clusters are stored in
    a single file. This wil also populate the cluster-to-hash dictionary.
    :return:
    """
        definitions = []

        alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME)
        try:
            with open(alerts_definitions_path) as fp:
                all_commands = json.load(fp)
        except:
            logger.warning(
                '[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'
                .format(alerts_definitions_path))
            return definitions

        for command_json in all_commands:
            clusterName = '' if not 'clusterName' in command_json else command_json[
                'clusterName']
            hostName = '' if not 'hostName' in command_json else command_json[
                'hostName']
            clusterHash = None if not 'hash' in command_json else command_json[
                'hash']

            # cache the cluster and cluster hash after loading the JSON
            if clusterName != '' and clusterHash is not None:
                logger.info(
                    '[AlertScheduler] Caching cluster {0} with alert hash {1}'.
                    format(clusterName, clusterHash))
                self._cluster_hashes[clusterName] = clusterHash

            for definition in command_json['alertDefinitions']:
                alert = self.__json_to_callable(clusterName, hostName,
                                                definition)

                if alert is None:
                    continue

                alert.set_helpers(self._collector, self._cluster_configuration)

                definitions.append(alert)

        return definitions

    def __json_to_callable(self, clusterName, hostName, json_definition):
        """
    converts the json that represents all aspects of a definition
    and makes an object that extends BaseAlert that is used for individual
    """
        alert = None

        try:
            source = json_definition['source']
            source_type = source.get('type', '')

            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(
                    "[AlertScheduler] Creating job type {0} with {1}".format(
                        source_type, str(json_definition)))

            if source_type == AlertSchedulerHandler.TYPE_METRIC:
                alert = MetricAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_AMS:
                alert = AmsAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_PORT:
                alert = PortAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
                source['stacks_directory'] = self.stacks_dir
                source['common_services_directory'] = self.common_services_dir
                source['extensions_directory'] = self.extensions_dir
                source['host_scripts_directory'] = self.host_scripts_dir
                alert = ScriptAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_WEB:
                alert = WebAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_RECOVERY:
                alert = RecoveryAlert(json_definition, source, self.config,
                                      self.recovery_manger)

            if alert is not None:
                alert.set_cluster(clusterName, hostName)

        except Exception, exception:
            logger.exception(
                "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped."
            )

        return alert
Exemple #31
0
class _Direct(Direct):

    # set initial conditions for the subclass (in addition to the superclass
    # methods defined in Direct) and initialize the scheduler
    def __init__(self, *args, **kwargs):
        Direct.__init__(self, *args, **kwargs)
        self.scheduler = Scheduler()
        self.sampling = False

    def collect_sample(self):
        time_value = gmtime()
        self.send('R0\r')
        print '#\t--- Collecting Sample at %.3f' % mktime(time_value)

    def query_status(self):
        time_value = gmtime()
        self.send('S\r')
        print '#\t--- Query Instrument Status at %.3f' % mktime(time_value)

    def run(self):
        while True:

            # parse the user commands from stdin
            cmd = sys.stdin.readline()
            cmd = cmd.strip()

            # default command set
            if cmd == 'q':
                if self.sampling is True:
                    print '#\t--- stop all scheduled sampling'
                    self.scheduler.unschedule_job(self.sample)
                    self.scheduler.unschedule_job(self.status)
                    self.scheduler.shutdown()

                print '#\t--- turning on 1 Hz status messages'
                self.send('F1\r')
                print '### exiting'
                sleep(1)
                break

            elif cmd == 'init':
                print '### initialize instrument for sampling'
                print '#\t--- turning off 1 Hz status messages'
                self.send('F5A\r')
                sleep(1)
                self.send('F5A\r')
                sleep(1)
                self.send('F5A\r')
                sleep(1)
                print '#\t--- flush internal pump 2 times with reagent'
                self.send('P2\r')
                sleep(2)
                print '#\t\t--- * first cycle complete'
                self.send('P2\r')
                sleep(2)
                print '#\t\t--- * second cycle complete, ready for sampling'

            elif cmd == 'start':
                print '### sampling started, will sample every hour at the top of the hour'
                self.scheduler.start()
                self.sample = self.scheduler.add_cron_job(self.collect_sample, minute=0)
                self.status = self.scheduler.add_cron_job(self.query_status, hour='0,12', minute=15)
                #self.scheduler.print_jobs()
                self.sampling = True

            elif cmd == 'stop':
                print '### sampling stopped'
                self.scheduler.unschedule_job(self.sample)
                self.scheduler.unschedule_job(self.status)
                self.scheduler.shutdown()
                self.sampling = False

            else:
                print '### sending %s' % cmd
                self.send(cmd + '\r')
class TestJobExecution(object):
    def setup(self):
        self.scheduler = Scheduler(threadpool=FakeThreadPool())
        self.scheduler.add_jobstore(RAMJobStore(), "default")

        # Make the scheduler think it's running
        self.scheduler._thread = FakeThread()

        self.logstream = StringIO()
        self.loghandler = StreamHandler(self.logstream)
        self.loghandler.setLevel(ERROR)
        scheduler.logger.addHandler(self.loghandler)

    def teardown(self):
        scheduler.logger.removeHandler(self.loghandler)
        if scheduler.datetime == FakeDateTime:
            scheduler.datetime = datetime
        FakeDateTime._now = original_now

    def test_job_name(self):
        def my_job():
            pass

        job = self.scheduler.add_interval_job(my_job, start_date=datetime(2010, 5, 19))
        eq_(
            repr(job),
            "<Job (name=my_job, trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), "
            "start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>",
        )

    def test_schedule_object(self):
        # Tests that any callable object is accepted (and not just functions)
        class A:
            def __init__(self):
                self.val = 0

            def __call__(self):
                self.val += 1

        a = A()
        job = self.scheduler.add_interval_job(a, seconds=1)
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(a.val, 2)

    def test_schedule_method(self):
        # Tests that bound methods can be scheduled (at least with RAMJobStore)
        class A:
            def __init__(self):
                self.val = 0

            def method(self):
                self.val += 1

        a = A()
        job = self.scheduler.add_interval_job(a.method, seconds=1)
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(a.val, 2)

    def test_unschedule_job(self):
        def increment():
            vals[0] += 1

        vals = [0]
        job = self.scheduler.add_cron_job(increment)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals[0], 1)
        self.scheduler.unschedule_job(job)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals[0], 1)

    def test_unschedule_func(self):
        def increment():
            vals[0] += 1

        def increment2():
            vals[0] += 1

        vals = [0]
        job1 = self.scheduler.add_cron_job(increment)
        job2 = self.scheduler.add_cron_job(increment2)
        job3 = self.scheduler.add_cron_job(increment)
        eq_(self.scheduler.get_jobs(), [job1, job2, job3])

        self.scheduler.unschedule_func(increment)
        eq_(self.scheduler.get_jobs(), [job2])

    @raises(KeyError)
    def test_unschedule_func_notfound(self):
        self.scheduler.unschedule_func(copy)

    def test_job_finished(self):
        def increment():
            vals[0] += 1

        vals = [0]
        job = self.scheduler.add_interval_job(increment, max_runs=1)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals, [1])
        assert job not in self.scheduler.get_jobs()

    def test_job_exception(self):
        def failure():
            raise DummyException

        job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9))
        self.scheduler._process_jobs(job.next_run_time)
        assert "DummyException" in self.logstream.getvalue()

    def test_misfire_grace_time(self):
        self.scheduler.misfire_grace_time = 3
        job = self.scheduler.add_interval_job(lambda: None, seconds=1)
        eq_(job.misfire_grace_time, 3)

        job = self.scheduler.add_interval_job(lambda: None, seconds=1, misfire_grace_time=2)
        eq_(job.misfire_grace_time, 2)

    def test_coalesce_on(self):
        # Makes sure that the job is only executed once when it is scheduled
        # to be executed twice in a row
        def increment():
            vals[0] += 1

        vals = [0]
        events = []
        scheduler.datetime = FakeDateTime
        self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED)
        job = self.scheduler.add_interval_job(
            increment, seconds=1, start_date=FakeDateTime.now(), coalesce=True, misfire_grace_time=2
        )

        # Turn the clock 14 seconds forward
        FakeDateTime._now += timedelta(seconds=2)

        self.scheduler._process_jobs(FakeDateTime.now())
        eq_(job.runs, 1)
        eq_(len(events), 1)
        eq_(events[0].code, EVENT_JOB_EXECUTED)
        eq_(vals, [1])

    def test_coalesce_off(self):
        # Makes sure that every scheduled run for the job is executed even
        # when they are in the past (but still within misfire_grace_time)
        def increment():
            vals[0] += 1

        vals = [0]
        events = []
        scheduler.datetime = FakeDateTime
        self.scheduler.add_listener(events.append, EVENT_JOB_EXECUTED | EVENT_JOB_MISSED)
        job = self.scheduler.add_interval_job(
            increment, seconds=1, start_date=FakeDateTime.now(), coalesce=False, misfire_grace_time=2
        )

        # Turn the clock 2 seconds forward
        FakeDateTime._now += timedelta(seconds=2)

        self.scheduler._process_jobs(FakeDateTime.now())
        eq_(job.runs, 3)
        eq_(len(events), 3)
        eq_(events[0].code, EVENT_JOB_EXECUTED)
        eq_(events[1].code, EVENT_JOB_EXECUTED)
        eq_(events[2].code, EVENT_JOB_EXECUTED)
        eq_(vals, [3])

    def test_interval(self):
        def increment(amount):
            vals[0] += amount
            vals[1] += 1

        vals = [0, 0]
        job = self.scheduler.add_interval_job(increment, seconds=1, args=[2])
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals, [4, 2])

    def test_interval_schedule(self):
        @self.scheduler.interval_schedule(seconds=1)
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals, [2])

    def test_cron(self):
        def increment(amount):
            vals[0] += amount
            vals[1] += 1

        vals = [0, 0]
        job = self.scheduler.add_cron_job(increment, args=[3])
        start = job.next_run_time
        self.scheduler._process_jobs(start)
        eq_(vals, [3, 1])
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals, [6, 2])
        self.scheduler._process_jobs(start + timedelta(seconds=2))
        eq_(vals, [9, 3])

    def test_cron_schedule_1(self):
        @self.scheduler.cron_schedule()
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals[0], 2)

    def test_cron_schedule_2(self):
        @self.scheduler.cron_schedule(minute="*")
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        next_run = start + timedelta(seconds=60)
        eq_(increment.job.get_run_times(next_run), [start, next_run])
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(next_run)
        eq_(vals[0], 2)

    def test_date(self):
        def append_val(value):
            vals.append(value)

        vals = []
        date = datetime.now() + timedelta(seconds=1)
        self.scheduler.add_date_job(append_val, date, kwargs={"value": "test"})
        self.scheduler._process_jobs(date)
        eq_(vals, ["test"])

    def test_print_jobs(self):
        out = StringIO()
        self.scheduler.print_jobs(out)
        expected = "Jobstore default:%s" "    No scheduled jobs%s" % (os.linesep, os.linesep)
        eq_(out.getvalue(), expected)

        self.scheduler.add_date_job(copy, datetime(2200, 5, 19))
        out = StringIO()
        self.scheduler.print_jobs(out)
        expected = (
            "Jobstore default:%s    "
            "copy (trigger: date[2200-05-19 00:00:00], "
            "next run at: 2200-05-19 00:00:00)%s" % (os.linesep, os.linesep)
        )
        eq_(out.getvalue(), expected)

    def test_jobstore(self):
        self.scheduler.add_jobstore(RAMJobStore(), "dummy")
        job = self.scheduler.add_date_job(lambda: None, datetime(2200, 7, 24), jobstore="dummy")
        eq_(self.scheduler.get_jobs(), [job])
        self.scheduler.remove_jobstore("dummy")
        eq_(self.scheduler.get_jobs(), [])

    @raises(KeyError)
    def test_remove_nonexistent_jobstore(self):
        self.scheduler.remove_jobstore("dummy2")

    def test_job_next_run_time(self):
        # Tests against bug #5
        def increment():
            vars[0] += 1

        vars = [0]
        scheduler.datetime = FakeDateTime
        job = self.scheduler.add_interval_job(increment, seconds=1, misfire_grace_time=3, start_date=FakeDateTime.now())
        start = job.next_run_time

        self.scheduler._process_jobs(start)
        eq_(vars, [1])

        self.scheduler._process_jobs(start)
        eq_(vars, [1])

        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vars, [2])
class MetaDataGenerationScheduler():
    def __init__(self, updateIntervalSeconds=30):
        self.interval = updateIntervalSeconds
        config = {'apscheduler.daemonic': False}
        self.sched = Scheduler(config)
        # initialize these per instance.
        self.repo_timestamps = {}
        self.jobs = {}


    repo_timestamps = {}  #dictionary with jobName (=reponame) : last scheduler modification timestamp (float)
    jobs = {} #dictionary with jobName (=reponame) : jobHandle

    configService = RepoConfigService()
    static_root_dir = configService.getStaticRepoDir()
    sched = None
    interval = None

    def start(self):
        self.update_program_config() #read configs, schedule jobs

        # schedule an update as a job
        self.sched.add_interval_job(self.update_program_config, seconds=self.interval)
        
        # schedule cleanup cache
        self.sched.add_cron_job(self.cleanupCacheDir, hour = 23, minute = 17, second = 20)

        self.sched.start()
        
    def createrepo_with_optional_cleanup_job(self, *argList):
        monitor = JobMonitorer()
        monitor.job_starts()
        repoDir = argList[0]
        reponame = argList[1]
        rpm_max_keep = argList[2]
        didCleanUp=False
        try:
            if rpm_max_keep != None:
                didCleanUp=True
                self.configService.doCleanup(repoDir, rpm_max_keep)
                logging.info("job RpmCleanup on "+reponame+" took "+str(monitor.get_execution_time_until_now_seconds())+" seconds")
            self.configService.doCreateRepo(repoDir, reponame)
            monitor.job_finishes()
            logging.info(monitor.get_pretty_job_summary("createrepo on "+reponame+" (cleanup included : "+str(didCleanUp)+")"))
        except Exception as ex:
            logging.error(traceback.format_exc())

    def update_program_config(self):
        updatedJobs = 0
        addedJobs = 0
        removedJobs = 0
        
        list_of_static_dirs = os.listdir(self.static_root_dir)
        self.remove_jobs_where_repo_deleted(list_of_static_dirs)
        for static_dir in list_of_static_dirs:
            file_path = self.configService.getMetaDataGenerationFilePathRelativeToRepoDirByRepoName(static_dir)
            if not os.path.exists(file_path):
                if self.repo_timestamps.has_key(static_dir):
                    logging.debug("unschedule because file does not exist")
                    self.unschedule_by_reponame(static_dir)
                    del self.repo_timestamps[static_dir] #repo is unmanaged now, check back later
                    removedJobs+=1
                continue
            
            if not static_dir in self.repo_timestamps:
                logging.debug("new repo found..")
                addedJobs+=1
                self.repo_timestamps[static_dir] = self.determine_last_modification_time(
                    file_path) #make an entry so we know we processed the repo + remember modification timestamp
                self.add_job_for_repo(static_dir)
            else: # we already processed the repo because its in the dictionary
                logging.debug("check for updates in repo config...")
                if self.is_more_recent_metadata_generation_file_than(static_dir, self.repo_timestamps[static_dir]):
                    logging.debug("update job for repo " + static_dir)
                    updatedJobs+=1
                    self.repo_timestamps[static_dir] = self.determine_last_modification_time(file_path)
                    self.unschedule_by_reponame(static_dir)
                    self.add_job_for_repo(static_dir)
        logging.info("update_program_config finished -- updated %s jobs, added %s jobs, removed %s jobs"%(updatedJobs,addedJobs,removedJobs))

    def remove_jobs_where_repo_deleted(self, list_of_existing_repos):
        removed_repos = set(self.repo_timestamps.keys()) - set(list_of_existing_repos)
        for repo in removed_repos:
            self.unschedule_by_reponame(repo)

    def determine_last_modification_time(self, file_path):
        statbuf = os.stat(file_path)
        return statbuf.st_mtime #float representing the last modification timestamp

    def unschedule_by_reponame(self, reponame):
        if reponame in self.jobs:
            self.sched.unschedule_job(self.jobs[reponame])
            del self.jobs[reponame] #remove the job from the job-handle dictionary..

    def is_more_recent_metadata_generation_file_than(self, repodir, past_timestamp):
        file_path = self.configService.getMetaDataGenerationFilePathRelativeToRepoDirByRepoName(repodir)
        actual_timestamp = self.determine_last_modification_time(file_path)
        if actual_timestamp > past_timestamp:
            return True
        else:
            return False
        
    def cleanupCacheDir(self):
        cleanupCacheMonitor = JobMonitorer()
        cleanupCacheMonitor.job_starts()
        logging.info('Start cache cleanup ...')
        cleanupDir = self.configService.getRepoCacheDir()
        
        try:
            for reponame in os.listdir(cleanupDir):
                if reponame.startswith('.'):
                    continue
                
                # check for cache dirs of already deleted repos 
                absoluteDir = os.path.join(cleanupDir, reponame)
                if os.path.isdir(absoluteDir):
                    if not os.path.exists(self.configService.getStaticRepoDir(reponame)):
                        shutil.rmtree(absoluteDir)
                        continue
                    
                           
                lockfile = self.configService.getRepoLockFile(reponame)
                if not os.path.exists(lockfile):
                    shutil.rmtree(absoluteDir)
        except Exception as ex:
            logging.error("Exception in CleanupCacheDir : "+str(ex))                    
        finally:
            cleanupCacheMonitor.job_finishes()
            logging.info(cleanupCacheMonitor.get_pretty_job_summary("CleanupCacheDir"))        

    def add_job_for_repo(self, repo_dir):
        metaDataConfig = self.configService.getMetaDataGenerationConfig(repo_dir)
        if not metaDataConfig:
            return #exit silently without adding a job
        generation_type = metaDataConfig.getMetaDataGenerationType()
        if generation_type == 'manual': return #exit silently
        #if we get here, we know its "scheduled"
        generation_interval = metaDataConfig.getMetaDataGenerationInterval()
        generation_interval = int(generation_interval)
        rpm_max_keep = metaDataConfig.getMetaDataGenerationRpmMaxKeep()
        full_path_to_repo = self.configService.getStaticRepoDir(repo_dir)
        argList = [full_path_to_repo, repo_dir, rpm_max_keep]
        addedJob = self.sched.add_interval_job(self.createrepo_with_optional_cleanup_job, seconds=generation_interval, args=argList)
        self.jobs[repo_dir] = addedJob

    def shutdown(self):
        self.sched.shutdown()
Exemple #34
0
class TNActionScheduler(TNArchipelPlugin):
    """
    This plugin allows to create scheduled actions.
    """
    def __init__(self, configuration, entity, entry_point_group):
        """
        Initialize the plugin.
        @type configuration: Configuration object
        @param configuration: the configuration
        @type entity: L{TNArchipelEntity}
        @param entity: the entity that owns the plugin
        @type entry_point_group: string
        @param entry_point_group: the group name of plugin entry_point
        """
        TNArchipelPlugin.__init__(self,
                                  configuration=configuration,
                                  entity=entity,
                                  entry_point_group=entry_point_group)
        self.scheduler = Scheduler()
        self.scheduler.start()
        self.database = sqlite3.connect(self.configuration.get(
            "SCHEDULER", "database"),
                                        check_same_thread=False)
        self.database.execute(
            "create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)"
        )
        self.database.commit()
        self.cursor = self.database.cursor()
        self.restore_jobs()
        self.supported_actions_for_vm = ("create", "shutdown", "destroy",
                                         "suspend", "resume", "reboot",
                                         "migrate", "pause")
        self.supported_actions_for_hypervisor = ("alloc", "free")
        # permissions
        self.entity.permission_center.create_permission(
            "scheduler_jobs", "Authorizes user to get the list of task", False)
        self.entity.permission_center.create_permission(
            "scheduler_schedule", "Authorizes user to schedule a task", False)
        self.entity.permission_center.create_permission(
            "scheduler_unschedule", "Authorizes user to unschedule a task",
            False)
        self.entity.permission_center.create_permission(
            "scheduler_actions", "Authorizes user to get available actions",
            False)
        # hooks
        if self.entity.__class__.__name__ == "TNArchipelVirtualMachine":
            self.entity.register_hook("HOOK_VM_TERMINATE",
                                      method=self.vm_terminate)

    ### Plugin interface

    def register_handlers(self):
        """
        This method will be called by the plugin user when it will be
        necessary to register module for listening to stanza.
        """
        self.entity.xmppclient.RegisterHandler('iq',
                                               self.process_iq,
                                               ns=ARCHIPEL_NS_ENTITY_SCHEDULER)

    def unregister_handlers(self):
        """
        Unregister the handlers.
        """
        self.entity.xmppclient.UnregisterHandler(
            'iq', self.process_iq, ns=ARCHIPEL_NS_ENTITY_SCHEDULER)

    @staticmethod
    def plugin_info():
        """
        Return informations about the plugin.
        @rtype: dict
        @return: dictionary contaning plugin informations
        """
        plugin_friendly_name = "Action Scheduler"
        plugin_identifier = "action_scheduler"
        plugin_configuration_section = "SCHEDULER"
        plugin_configuration_tokens = ["database"]
        return {
            "common-name": plugin_friendly_name,
            "identifier": plugin_identifier,
            "configuration-section": plugin_configuration_section,
            "configuration-tokens": plugin_configuration_tokens
        }

    ### Persistance

    def delete_job(self, uid):
        """
        Remove a job from the database.
        @type uid: string
        @param uid: the uid of the job to remove
        """
        self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid, ))
        self.database.commit()

    def save_jobs(self,
                  uid,
                  action,
                  year,
                  month,
                  day,
                  hour,
                  minute,
                  second,
                  comment,
                  params=None):
        """
        Save a job in the database.
        @type uid: string
        @param uid: the uid of the job
        @type action: string
        @param action: the action
        @type year: string
        @param year: year of execution
        @type month: string
        @param month: month of execution
        @type day: string
        @param day: day of execution
        @type hour: string
        @param hour: hour of execution
        @type minute: string
        @param minute: minute of execution
        @type second: string
        @param second: second of execution
        @type comment: string
        @param comment: comment about the job
        @type params: string
        @param params: random parameter of the job
        """
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute(
            "INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (
                entity_uid,
                uid,
                action,
                year,
                month,
                day,
                hour,
                minute,
                second,
                comment,
                params,
            ))
        self.database.commit()

    def restore_jobs(self):
        """
        Restore the jobs from the database.
        """
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?",
                            (entity_uid, ))
        for values in self.cursor:
            try:
                entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values
                str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour,
                                                  minute, second)
                self.scheduler.add_cron_job(
                    self.do_job_for_vm,
                    year=year,
                    month=month,
                    day=day,
                    hour=hour,
                    minute=minute,
                    second=second,
                    args=[action, job_uuid, str_date, comment])
            except Exception as ex:
                self.entity.log.error("unable to restore a job: %s" % str(ex))

    def vm_terminate(self, origin, user_info, arguments):
        """
        Close the database connection.
        @type origin: TNArchipelEntity
        @param origin: the origin of the hook
        @type user_info: object
        @param user_info: random user information
        @type arguments: object
        @param arguments: runtime argument
        """
        self.database.close()

    ### Jobs

    def get_jod_with_uid(self, uid):
        """
        Get a job with given uid.
        @type uid: string
        @param uid: the uid of the job
        """
        if hasattr(self.scheduler, "get_jobs"):
            jobs = self.scheduler.get_jobs()
        else:
            jobs = self.scheduler.jobs

        for job in jobs:
            if str(job.args[1]) == uid:
                return job
        return None

    def do_job_for_vm(self, action, uid, str_date, comment, param):
        """
        Perform the job.
        @type action: string
        @param action: the action to execute
        @type uid: string
        @param uid: the uid of the job
        @type str_date: string
        @param str_date: the date of the job
        @type comment: string
        @param comment: comment about the job
        @type param: string
        @param param: a random parameter to give to job
        """
        if action == "create":
            self.entity.create()
        elif action == "shutdown":
            self.entity.shutdown()
        elif action == "destroy":
            self.entity.destroy()
        elif action == "suspend":
            self.entity.suspend()
        elif action == "resume":
            self.entity.resume()
        elif action == "pause":
            if self.entity.libvirt_status == 1:
                self.entity.suspend()
            elif self.entity.libvirt_status == 3:
                self.entity.resume()
        elif action == "migrate":
            pass
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid)
        self.entity.push_change("scheduler", "jobexecuted")

    def do_job_for_hypervisor(self, action, uid, str_date, comment, param):
        """
        Perform the job.
        @type action: string
        @param action: the action to execute
        @type uid: string
        @param uid: the uid of the job
        @type str_date: string
        @param str_date: the date of the job
        @type comment: string
        @param comment: comment about the job
        @type param: string
        @param param: a random parameter to give to job
        """
        if action == "alloc":
            self.entity.alloc()
        elif action == "free":
            pass  #self.entity.free()
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid)
        self.entity.push_change("scheduler", "jobexecuted")

    ### Process IQ

    def process_iq(self, conn, iq):
        """
        This method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received.
        It understands IQ of type:
            - jobs
            - schedule
            - unschedule
        @type conn: xmpp.Dispatcher
        @param conn: ths instance of the current connection that send the stanza
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        """
        reply = None
        action = self.entity.check_acp(conn, iq)
        self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_")

        if action == "schedule":
            reply = self.iq_schedule(iq)
        elif action == "unschedule":
            reply = self.iq_unschedule(iq)
        elif action == "jobs":
            reply = self.iq_jobs(iq)
        elif action == "actions":
            reply = self.iq_actions(iq)
        if reply:
            conn.send(reply)
            raise xmpp.protocol.NodeProcessed

    def iq_schedule(self, iq):
        """
        Schedule a task.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            job = iq.getTag("query").getTag("archipel").getAttr("job")
            entityClass = self.entity.__class__.__name__
            param = None
            if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm:
                raise Exception("action %s is not valid" % job)
            elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor:
                raise Exception("action %s is not valid" % job)
            year = iq.getTag("query").getTag("archipel").getAttr("year")
            month = iq.getTag("query").getTag("archipel").getAttr("month")
            day = iq.getTag("query").getTag("archipel").getAttr("day")
            hour = iq.getTag("query").getTag("archipel").getAttr("hour")
            minute = iq.getTag("query").getTag("archipel").getAttr("minute")
            second = iq.getTag("query").getTag("archipel").getAttr("second")
            comment = iq.getTag("query").getTag("archipel").getAttr("comment")
            if iq.getTag("query").getTag("archipel").has_attr("param"):
                param = iq.getTag("query").getTag("archipel").getAttr("param")
            uid = str(uuid.uuid1())
            str_date = "%s-%s-%s @ %s : %02d : %02d" % (
                year, month, day, hour, int(minute), int(second))
            if entityClass == "TNArchipelVirtualMachine":
                func = self.do_job_for_vm
            elif entityClass == "TNArchipelHypervisor":
                func = self.do_job_for_hypervisor
            self.scheduler.add_cron_job(
                func,
                year=year,
                month=month,
                day=day,
                hour=hour,
                minute=minute,
                second=second,
                args=[job, uid, str_date, comment, param])
            self.save_jobs(uid, job, year, month, day, hour, minute, second,
                           comment, param)
            self.entity.push_change("scheduler", "scheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_jobs(self, iq):
        """
        Get jobs.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            nodes = []
            if hasattr(self.scheduler, "get_jobs"):
                jobs = self.scheduler.get_jobs()
            else:
                jobs = self.scheduler.jobs

            for job in jobs:
                job_node = xmpp.Node(tag="job",
                                     attrs={
                                         "action": str(job.args[0]),
                                         "uid": str(job.args[1]),
                                         "date": str(job.args[2]),
                                         "comment": job.args[3]
                                     })
                nodes.append(job_node)
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_unschedule(self, iq):
        """
        Unschedule a job.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            uid = iq.getTag("query").getTag("archipel").getAttr("uid")
            the_job = self.get_jod_with_uid(uid)
            if not the_job:
                raise Exception("job with uid %s doesn't exists" % uid)
            self.delete_job(uid)
            self.scheduler.unschedule_job(the_job)
            self.entity.push_change("scheduler", "unscheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    def iq_actions(self, iq):
        """
        Get available actions.
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            entityClass = self.entity.__class__.__name__
            if entityClass == "TNArchipelVirtualMachine":
                actions = self.supported_actions_for_vm
            elif entityClass == "TNArchipelHypervisor":
                actions = self.supported_actions_for_hypervisor
            nodes = []
            for action in actions:
                action_node = xmpp.Node(tag="action")
                action_node.setData(action)
                nodes.append(action_node)
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply
Exemple #35
0
class PyFlowScheduler(object):
    """
    This object schedules the submission of the tasks in an :class:`Flow`.
    There are two types of errors that might occur during the execution of the jobs:

        #. Python exceptions
        #. Abinit Errors.

    Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError.
    The set of Abinit Errors is much much broader. It includes wrong input data, segmentation
    faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases
    but there's still a lot of room for improvement.
    Note, in particular, that `PyFlowScheduler` will shutdown automatically if

        #. The number of python exceptions is > MAX_NUM_PYEXC

        #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS

        #. The number of jobs launched becomes greater than (SAFETY_RATIO * total_number_of_tasks).

        #. The scheduler will send an email to the user (specified by mailto) every REMINDME_S seconds.
           If the mail cannot be sent, it will shutdown automatically.
           This check prevents the scheduler from being trapped in an infinite loop.
    """
    # Configuration file.
    YAML_FILE = "scheduler.yml"
    USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy")

    DEBUG = 0

    Error = PyFlowSchedulerError

    def __init__(self, **kwargs):
        """
        Args:
            weeks: number of weeks to wait
            days: number of days to wait
            hours: number of hours to wait
            minutes: number of minutes to wait
            seconds: number of seconds to wait
            verbose: (int) verbosity level
            max_njobs_inque: Limit on the number of jobs that can be present in the queue
            use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from
                file before launching the jobs. Default: False
            max_nlaunch: Maximum number of tasks launched by radpifire (default -1 i.e. no limit)
        """
        # Options passed to the scheduler.
        self.sched_options = AttrDict(
            weeks=kwargs.pop("weeks", 0),
            days=kwargs.pop("days", 0),
            hours=kwargs.pop("hours", 0),
            minutes=kwargs.pop("minutes", 0),
            seconds=kwargs.pop("seconds", 0),
            #start_date=kwargs.pop("start_date", None),
        )

        if all(not v for v in self.sched_options.values()):
            raise self.Error("Wrong set of options passed to the scheduler.")

        self.mailto = kwargs.pop("mailto", None)
        self.verbose = int(kwargs.pop("verbose", 0))
        self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False)
        self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200)

        self.REMINDME_S = float(kwargs.pop("REMINDME_S", 4 * 24 * 3600))
        self.MAX_NUM_PYEXCS = int(kwargs.pop("MAX_NUM_PYEXCS", 0))
        self.MAX_NUM_ABIERRS = int(kwargs.pop("MAX_NUM_ABIERRS", 0))
        self.SAFETY_RATIO = int(kwargs.pop("SAFETY_RATIO", 5))
        #self.MAX_ETIME_S = kwargs.pop("MAX_ETIME_S", )
        self.max_nlaunch = kwargs.pop("max_nlaunch", -1)

        if kwargs:
            raise self.Error("Unknown arguments %s" % kwargs)

        if has_sched_v3:
            from apscheduler.schedulers.blocking import BlockingScheduler
            self.sched = BlockingScheduler()
        else:
            from apscheduler.scheduler import Scheduler
            self.sched = Scheduler(standalone=True)

        self.nlaunch = 0
        self.num_reminders = 1

        # Used to keep track of the exceptions raised while the scheduler is running
        self.exceptions = collections.deque(maxlen=self.MAX_NUM_PYEXCS + 10)

        # Used to push additional info during the execution.
        self.history = collections.deque(maxlen=100)

    @classmethod
    def from_file(cls, filepath):
        """Read the configuration parameters from a Yaml file."""
        with open(filepath, "r") as fh:
            return cls(**yaml.load(fh))

    @classmethod
    def from_string(cls, s):
        """Create an istance from string s containing a YAML dictionary."""
        stream = cStringIO(s)
        stream.seek(0)

        return cls(**yaml.load(stream))

    @classmethod
    def from_user_config(cls):
        """
        Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'.
        Search first in the working directory and then in the configuration directory of abipy.

        Raises:
            RuntimeError if file is not found.
        """
        # Try in the current directory.
        path = os.path.join(os.getcwd(), cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        # Try in the configuration directory.
        path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        err_msg = "Cannot locate %s neither in current directory nor in %s" % (
            cls.YAML_FILE, path)
        raise cls.Error(err_msg)

    def __str__(self):
        """String representation."""
        lines = [self.__class__.__name__ + ", Pid: %d" % self.pid]
        app = lines.append

        app("Scheduler options: %s" % str(self.sched_options))
        app(80 * "=")
        app(str(self.flow))

        return "\n".join(lines)

    @property
    def pid(self):
        """The pid of the process associated to the scheduler."""
        try:
            return self._pid

        except AttributeError:
            self._pid = os.getpid()
            return self._pid

    @property
    def pid_file(self):
        """
        Absolute path of the file with the pid.
        The file is located in the workdir of the flow
        """
        return self._pid_file

    @property
    def flow(self):
        """`Flow`."""
        return self._flow

    @property
    def num_excs(self):
        """Number of exceptions raised so far."""
        return len(self.exceptions)

    def get_delta_etime(self):
        """Returns a `timedelta` object representing with the elapsed time."""
        return timedelta(seconds=(time.time() - self.start_time))

    def add_flow(self, flow):
        """Add an :class:`Flow` flow to the scheduler."""
        if hasattr(self, "_flow"):
            raise self.Error("Only one flow can be added to the scheduler.")

        pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid")

        if os.path.isfile(pid_file):
            flow.show_status()

            err_msg = ("""
                pid_file %s already exists
                There are two possibilities:

                   1) There's an another instance of PyFlowScheduler running
                   2) The previous scheduler didn't exit in a clean way

                To solve case 1:
                   Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file)
                   Then you can restart the new scheduler.

                To solve case 2:
                   Remove the pid_file and restart the scheduler.

                Exiting""" % pid_file)

            raise self.Error(err_msg)

        with open(pid_file, "w") as fh:
            fh.write(str(self.pid))

        self._pid_file = pid_file
        self._flow = flow

    def start(self):
        """
        Starts the scheduler in a new thread. Returns True if success.
        In standalone mode, this method will block until there are no more scheduled jobs.
        """
        self.history.append("Started on %s" % time.asctime())
        self.start_time = time.time()

        if has_sched_v3:
            self.sched.add_job(self.callback, "interval", **self.sched_options)
        else:
            self.sched.add_interval_job(self.callback, **self.sched_options)

        errors = self.flow.look_before_you_leap()
        if errors:
            print(errors)
            self.exceptions.append(errors)
            return False

        # Try to run the job immediately. If something goes wrong return without initializing the scheduler.
        self._runem_all()

        if self.exceptions:
            self.cleanup()
            self.send_email(
                msg=
                "Error while trying to run the flow for the first time!\n %s" %
                self.exceptions)
            return False

        self.sched.start()
        return True

    def _runem_all(self):
        """
        This function checks the status of all tasks,
        tries to fix tasks that went unconverged, abicritical, or queuecritical
        and tries to run all the tasks that can be submitted.+
        """
        excs = []
        flow = self.flow

        # Allow to change the manager at run-time
        if self.use_dynamic_manager:
            from pymatgen.io.abinitio.tasks import TaskManager
            new_manager = TaskManager.from_user_config()
            for work in flow:
                work.set_manager(new_manager)

        nqjobs = flow.get_njobs_in_queue()
        if nqjobs is None:
            nqjobs = 0
            print('Cannot get njobs_inqueue')

        if nqjobs >= self.max_njobs_inqueue:
            print("Too many jobs in the queue, returning")
            return

        if self.max_nlaunch == -1:
            max_nlaunch = self.max_njobs_inqueue - nqjobs
        else:
            max_nlaunch = min(self.max_njobs_inqueue - nqjobs,
                              self.max_nlaunch)

        # check status
        flow.check_status()
        flow.show_status()

        # fix problems
        # Try to restart the unconverged tasks
        # todo donot fire here but prepare for fireing in rapidfire
        for task in self.flow.unconverged_tasks:
            try:
                logger.info("Flow will try restart task %s" % task)
                fired = task.restart()
                if fired:
                    self.nlaunch += 1
                    max_nlaunch -= 1
                    if max_nlaunch == 0:
                        print("Restart: too many jobs in the queue, returning")
                        flow.pickle_dump()
                        return
            except Exception:
                excs.append(straceback())

        # move here from withing rapid fire ...
        # fix only prepares for restarting, and sets to ready
        flow.fix_critical()

        # update database
        flow.pickle_dump()

        #if self.num_restarts == self.max_num_restarts:
        #    info_msg = "Reached maximum number of restarts. Cannot restart anymore Returning"
        #    logger.info(info_msg)
        #    self.history.append(info_msg)
        #    return 1

        # Submit the tasks that are ready.
        try:
            nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch,
                                                 sleep_time=10)
            self.nlaunch += nlaunch

            if nlaunch:
                print("[%s] Number of launches: %d" %
                      (time.asctime(), nlaunch))

        except Exception:
            excs.append(straceback())

        flow.show_status()

        if excs:
            logger.critical("*** Scheduler exceptions:\n *** %s" %
                            "\n".join(excs))
            self.exceptions.extend(excs)

    def callback(self):
        """The function that will be executed by the scheduler."""
        try:
            return self._callback()
        except:
            # All exceptions raised here will trigger the shutdown!
            self.exceptions.append(straceback())
            self.shutdown(msg="Exception raised in callback!")

    def _callback(self):
        """The actual callback."""
        if self.DEBUG:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" %
                  get_open_fds())
        #print('before _runem_all in _callback')

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if self.verbose:
            print("all_ok", all_ok)

        if all_ok:
            self.shutdown(
                msg=
                "All tasks have reached S_OK. Will shutdown the scheduler and exit"
            )

        # Handle failures.
        err_msg = ""

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S:
            self.num_reminders += 1
            msg = (
                "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s "
                % (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += (
                    "\nThe scheduler tried to send an e-mail to remind the user\n"
                    + " but send_email returned %d. Aborting now" % retcode)
                err_msg += msg

        #if delta_etime.total_seconds() > self.MAX_ETIME_S:
        #    err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.MAX_NUM_PYEXCS:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.MAX_NUM_PYEXCS)
            err_msg += boxed(msg)

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_UNCONVERGED.
        #if self.flow.num_unconverged_tasks:
        #    # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet
        #    msg = ("Found %d unconverged tasks."
        #           "Automatic restarting is not available yet. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        #deadlocks = self.detect_deadlocks()
        #if deadlocks:
        #    msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit"
        #           % self.flow.num_unconverged_tasks)
        #    err_msg += boxed(msg)

        if err_msg:
            # Something wrong. Quit
            self.shutdown(err_msg)

        return len(self.exceptions)

    def cleanup(self):
        """Cleanup routine: remove the pid file and save the pickle database"""
        try:
            os.remove(self.pid_file)
        except OSError:
            logger.critical("Could not remove pid_file")
            pass

        # Save the final status of the flow.
        self.flow.pickle_dump()

    def shutdown(self, msg):
        """Shutdown the scheduler."""
        try:
            self.cleanup()

            #if False and self.flow.has_db:
            #    try:
            #        self.flow.db_insert()
            #    except Exception:
            #         logger.critical("MongoDb insertion failed.")

            self.history.append("Completed on %s" % time.asctime())
            self.history.append("Elapsed time %s" % self.get_delta_etime())

            if self.DEBUG:
                print(">>>>> shutdown: Number of open file descriptors: %s" %
                      get_open_fds())

            retcode = self.send_email(msg)
            if self.DEBUG:
                print("send_mail retcode", retcode)

            # Write file with the list of exceptions:
            if self.exceptions:
                dump_file = os.path.join(self.flow.workdir, "_exceptions")
                with open(dump_file, "w") as fh:
                    fh.writelines(self.exceptions)
                    fh.write("Shutdown message:\n%s" % msg)

        finally:
            # Shutdown the scheduler thus allowing the process to exit.
            print('this should be the shutdown of the scheduler')

            # Unschedule all the jobs before calling shutdown
            self.sched.print_jobs()
            for job in self.sched.get_jobs():
                self.sched.unschedule_job(job)
            self.sched.print_jobs()

            self.sched.shutdown()
            # Uncomment the line below if shutdown does not work!
            #os.system("kill -9 %d" % os.getpid())

    def send_email(self, msg, tag=None):
        """
        Send an e-mail before completing the shutdown.
        Returns 0 if success.
        """
        try:
            return self._send_email(msg, tag)
        except:
            self.exceptions.append(straceback())
            return -2

    def _send_email(self, msg, tag):
        if self.mailto is None:
            return -1

        header = msg.splitlines()
        app = header.append

        app("Submitted on %s" % time.ctime(self.start_time))
        app("Completed on %s" % time.asctime())
        app("Elapsed time %s" % str(self.get_delta_etime()))
        app("Number of errored tasks: %d" % self.flow.num_errored_tasks)
        app("Number of unconverged tasks: %d" %
            self.flow.num_unconverged_tasks)

        strio = cStringIO()
        strio.writelines("\n".join(header) + 4 * "\n")

        # Add the status of the flow.
        self.flow.show_status(stream=strio)

        if self.exceptions:
            # Report the list of exceptions.
            strio.writelines(self.exceptions)

        if tag is None:
            tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]"

        return sendmail(subject=self.flow.name + tag,
                        text=strio.getvalue(),
                        mailto=self.mailto)
Exemple #36
0
class Scheduler(object):
    """
    Manages a list of actions that should be performed at specific times.

    Note that this class *intentionally* contains hardly any error
    checking. The correct behaviour of the Scheduler depends on the
    parent code doing "the right thing". In particular, it is crucial
    that the reached() method be called with the next time step at
    which an event is expected to happen, which can be obtained using
    the next() method.

    Thus a typical (correct) usage is as follows:

        s = Scheduler()
        s.add(...)       # schedule some item(s)
        t = s.next()     # get next time step at which something should happen

        # [do some stuff based on the time step just obtained]

        s.reached(t)

    """
    def __init__(self):
        """
        Create a Scheduler.

        """
        self.items = []
        self.realtime_items = {}
        self.realtime_jobs = []  # while the scheduler is running, the job
        # associated with each realtime_item will be
        # stored in this list (otherwise it is empty)
        self.last = None

    def __iter__(self):
        return self

    def add(self,
            func,
            args=None,
            kwargs=None,
            at=None,
            at_end=False,
            every=None,
            after=None,
            realtime=False):
        """
        Register a function with the scheduler.

        Returns the scheduled item, which can be removed again by
        calling Scheduler._remove(item). Note that this may change in
        the future, so use with care.

        """
        if not hasattr(func, "__call__"):
            raise TypeError(
                "The function must be callable but object '%s' is of type '%s'"
                % (str(func), type(func)))
        assert at or every or at_end or (
            after and realtime
        ), "Use either `at`, `every` or `at_end` if not in real time mode."
        assert not (
            at is not None and every is not None
        ), "Cannot mix `at` with `every`. Please schedule separately."
        assert not (at is not None
                    and after is not None), "Delays don't mix with `at`."

        args = args or []
        kwargs = kwargs or {}
        callback = functools.partial(func, *args, **kwargs)

        if realtime:
            if at_end:
                at_end_item = SingleTimeEvent(None, True, callback)
                self._add(at_end_item)
            return at_end_item

        if at or (at_end and not every):
            at_item = SingleTimeEvent(at, at_end, callback)
            self._add(at_item)
            return at_item

        if every:
            every_item = RepeatingTimeEvent(every, after, at_end, callback)
            self._add(every_item)
            return every_item

    def _add(self, item):
        self.items.append(item)

    def _remove(self, item):
        self.items.remove(item)

    def _add_realtime(self, func, at=None, every=None, after=None):
        """
        Add a realtime job.

        Returns the Job object as obtained from APScheduler.add_job() etc.

        """
        if not hasattr(self, "apscheduler"):
            try:
                from apscheduler.scheduler import Scheduler as APScheduler
            except ImportError:
                log.error(
                    "Need APScheduler package to schedule realtime events.\n"
                    "Please install from http://pypi.python.org/pypi/APScheduler."
                )
                raise

            self.apscheduler = APScheduler()
            atexit.register(lambda: self.apscheduler.shutdown(wait=False))
            self.apscheduler.start()

        if after and isinstance(after, Number):
            # `after` can be either a delay in seconds, or a date/datetime.
            # Since the APScheduler API expects a date/datetime convert it.
            after = datetime.now() + timedelta(seconds=after)

        # Register the job so that it can be started/stopped as needed.
        self.realtime_items[func] = (at, every, after)

    def start_realtime_jobs(self):
        for (func, (at, every, after)) in self.realtime_items.items():
            if at:
                job = self.apscheduler.add_date_job(func, at)
            elif every:
                if after:
                    job = self.apscheduler.add_interval_job(func,
                                                            seconds=every,
                                                            start_date=after)
                else:
                    job = self.apscheduler.add_interval_job(func,
                                                            seconds=every)
            elif after:
                job = self.apscheduler.add_date_job(func, after)
            else:
                raise ValueError(
                    "Assertion violated. Use either `at`, `every` of `after`.")

            self.realtime_jobs.append(job)

    def stop_realtime_jobs(self):
        for job in self.realtime_jobs:
            self.apscheduler.unschedule_job(job)
        self.realtime_jobs = []

    def next(self):
        """
        Returns the time for the next action to be performed.

        Automatically called upon iteration of scheduler instance.

        """
        next_step = None
        stop = False  # This flag determines whether or not iteration should be
        # stopped after all items are checked.

        for item in self.items:
            if item.next_time is not None and (next_step is None
                                               or next_step > item.next_time):
                next_step = item.next_time
            if item.state == EV_REQUESTS_STOP_INTEGRATION:
                self._remove(item)
                stop = True

        if next_step is None:
            stop = True

        if stop is True:
            raise StopIteration

        if next_step < self.last:
            log.error(
                "Scheduler computed the next time step should be t = {:.2g} s, but the last one was already t = {:.2g} s."
                .format(next_step, self.last))
            raise ValueError(
                "Scheduler is corrupted. Requested a time step in the past: dt = {:.2g}."
                .format(next_step - self.last))
        return next_step

    def reached(self, time):
        """
        Notify the Scheduler that a certain point in time has been reached.

        It will perform the action(s) that were defined to happen at that time.

        """
        for item in self.items:
            if same_time(item.next_time, time):
                item.check_and_trigger(time)
                if item.state == EV_DONE:
                    self._remove(item)
        self.last = time

    def finalise(self, time):
        """
        Trigger all events that need to happen at the end of time integration.

        """
        for item in self.items:
            if item.trigger_on_stop:
                item.check_and_trigger(time, is_stop=True)

    def reset(self, time):
        """
        Override schedule so that internal time is now `time` and modify scheduled items accordingly.

        """
        self.last = None
        for item in self.items:
            item.reset(time)

    def _print_realtime_item(self, item, func_print=log.info):
        (f, (at, every, after)) = item
        func_print("'{}': <at={}, every={}, after={}>".format(
            item.callback.f.__name__, at, every, after))

    def print_scheduled_items(self, func_print=log.info):
        for item in self.items:
            # this will call __str__ on the item, which should be defined for
            # all events
            print item
        for item in self.realtime_items:
            self._print_realtime_item(item, func_print)

    def clear(self):
        log.debug("Removing scheduled items:")
        self.print_scheduled_items(func_print=log.debug)
        self.items = []
        self.stop_realtime_jobs()
        self.realtime_items = {}

    def run(self, integrator, callbacks_at_scheduler_events=[]):
        """
        Integrate until an exit condition in the schedule has been met.

        The optional argument `callbacks_at_scheduler_events` should be a
        list of functions which are called whenever the time integration
        reaches a "checkpoint" where some event is scheduled. Each such
        function should expect the timestep t at which the event occurs as
        its single argument. Note that these functions are called just
        *before* the scheduled events are triggered. This is used, for
        example, to keep time-dependent fields up to date with the
        simulation time.

        """
        self.start_realtime_jobs()

        for t in self:
            assert (t >= integrator.cur_t)  # sanity check

            # If new items were scheduled after a previous time
            # integration finished, we can have t == integrator.cur_t.
            # However, this confuses the integrators so we don't integrate
            # in this case.
            if t != integrator.cur_t:
                integrator.advance_time(t)

            for f in callbacks_at_scheduler_events:
                f(t)
            self.reached(t)

        self.finalise(t)
        self.stop_realtime_jobs()
Exemple #37
0
class PeetsMediaTranslator(DatagramProtocol):
  ''' A translator protocol to relay local udp traffic to NDN and remote NDN traffic to local udp.
  This class also implements the strategy for fetching remote data.
  If the remote seq is unknown, use a short prefix without seq to probe;
  otherwise use a naive leaking-bucket like method to fetch the remote data

  We seperate the fetching of the media stream and the fetching of the control stream (RTCP, STUN, etc).
  '''
  __logger = Logger.get_logger('PeetsMediaTranslator')
  def __init__(self, factory, pipe_size):
    '''
    Args:
      factory (PeetsServerFactory) : the factory that stores necessory information about the local user
      pipe_size (int) : the pipeline size for fetching the remote media stream. Pipelining allows us to minimize impact of the interest-data roundtrip delay.
    '''
    self.factory = factory
    self.pipe_size = pipe_size
    self.factory = factory
    self.factory.set_local_status_callback(self.toggle_scheduler)
    # here we use two sockets, because the pending interests sent by a socket can not be satisified
    # by the content published later by the same socket
    self.ccnx_int_socket = CcnxSocket()
    self.ccnx_int_socket.start()
    self.ccnx_con_socket = CcnxSocket()
    self.ccnx_con_socket.start()
    self.stream_closure = PeetsClosure(msg_callback = self.stream_callback, timeout_callback = self.stream_timeout_callback)
    self.probe_closure = PeetsClosure(msg_callback = self.probe_callback, timeout_callback = self.probe_timeout_callback)
    self.ctrl_probe_closure = PeetsClosure(msg_callback = self.ctrl_probe_callback, timeout_callback = self.ctrl_probe_timeout_callback)
    self.scheduler = None
    self.peets_status = None
    
  def toggle_scheduler(self, status):
    '''Start or stop the scheduler for periodic jobs.

    Args:
      status (str): either 'Running' or 'Stopped'
    '''
    if status == 'Running':
      self.peets_status = 'Running'
      self.scheduler = Scheduler()
      self.scheduler.start()
      self.scheduler.add_interval_job(self.fetch_media, seconds = 0.01, max_instances = 2)
    elif status == 'Stopped':
      self.peets_status = 'Stopped'
      for job in self.scheduler.get_jobs():
        self.scheduler.unschedule_job(job)
      self.scheduler.shutdown(wait = True)
      self.scheduler = None
       
  def datagramReceived(self, data, (host, port)):
    '''Intercept the webrtc traffice from the local front end and relay it to the NDN

    Args:
      data (bytes) : the UDP data
      host (str) : the IP of the source
      port (int) : the port of the source

    1. Differentiate RTP vs RTCP
    RTCP: packet type (PT) = 200 - 208
    SR (sender report)        200
    RR (receiver report)      201
    SDES (source description) 202
    BYE (goodbye)             203
    App (application-defined) 204
    other types go until      208
    RFC 5761 (implemented by WebRTC) makes sure that RTP's PT field
    plus M field (which is equal to the PT field in RTCP) would not conflict

    2. Differentiate STUN vs RTP & RTCP
    STUN: the most significant 2 bits of every STUN msg MUST be zeros (RFC 5389)
    RTP & RTCP: version bits (2 bits) value equals 2

    Note:
    Tried to fake a Stun request and response so that we don't have to relay stun msgs to NDN, but failed. It worked for a time, although will significantly high rate of the STUN message exchanges
    We need to use the username exchanged in the sdps for stun it worked for a while but magically stopped working, so now we still send it over NDN

    Note 2:
    We only publish one medai stream from the local user (with the default offer SDP). We publish RTCP and STUN for each PeerConnections though.
    '''
    # mask to test most significant 2 bits
    msg = bytearray(data)
    c = self.factory.client

    if msg[0] & 0xC0 == 0 or msg[1] > 199 and msg[1] < 209:
      try:
        ctrl_seq = c.ctrl_seqs[port]
        cid = c.remote_cids[port]
        # RTCP and STUN is for each peerconnection. the cid of remote user is used to identify the peer connection so that remote user knows which one to fetch
        name = c.local_user.get_ctrl_prefix() + '/' + cid + '/' + str(ctrl_seq)
        c.ctrl_seqs[port] = ctrl_seq + 1
        self.ccnx_con_socket.publish_content(name, data)
      except KeyError:
        pass

    elif c.media_source_port == port:
      # only publish one media stream
      name = c.local_user.get_media_prefix() + '/' + str(c.local_seq)
      c.local_seq += 1
      self.ccnx_con_socket.publish_content(name, data)
Exemple #38
0
class LocalScheduler(object):
    scheduler_registry = {}
    _lockdown = False

    @classmethod
    def get(cls, name):
        return cls.scheduler_registry[name]

    @classmethod
    def get_all(cls):
        return cls.scheduler_registry.values()

    @classmethod
    def shutdown_all(cls):
        for scheduler in cls.scheduler_registry.values():
            scheduler.stop()

    @classmethod
    def lockdown(cls):
        cls._lockdown = True

    @classmethod
    def clear_all(cls):
        for scheduler in cls.scheduler_registry.values():
            scheduler.clear()

    def __init__(self, name, label=None):
        self.scheduled_jobs = {}
        self._scheduler = None
        self.name = name
        self.label = label
        self.__class__.scheduler_registry[self.name] = self

    def start(self):
        logger.info('Starting scheduler: %s' % self.name)
        if not self.__class__._lockdown:
            self._scheduler = OriginalScheduler()
            for job in self.scheduled_jobs.values():
                self._schedule_job(job)

            self._scheduler.start()
        else:
            logger.debug('lockdown in effect')

    def stop(self):
        if self._scheduler:
            self._scheduler.shutdown()
            del self._scheduler
            self._scheduler = None

    @property
    def running(self):
        if self._scheduler:
            return self._scheduler.running
        else:
            return False

    def clear(self):
        for job in self.scheduled_jobs.values():
            self.stop_job(job)

    def stop_job(self, job):
        if self.running:
            self._scheduler.unschedule_job(job._job)

        del (self.scheduled_jobs[job.name])
        job.scheduler = None

    def _schedule_job(self, job):
        if isinstance(job, IntervalJob):
            job._job = self._scheduler.add_interval_job(
                job.function, *job.args, **job.kwargs)
        elif isinstance(job, DateJob):
            job._job = self._scheduler.add_date_job(job.function, *job.args,
                                                    **job.kwargs)
        elif isinstance(job, CronJob):
            job._job = self._scheduler.add_cron_job(job.function, *job.args,
                                                    **job.kwargs)
        else:
            raise UnknownJobClass

    def add_job(self, job):
        logger.debug('adding job')
        if job.scheduler or job.name in self.scheduled_jobs.keys():
            raise AlreadyScheduled

        if self._scheduler:
            self._schedule_job(job)

        job.scheduler = self
        self.scheduled_jobs[job.name] = job

    def add_interval_job(self, name, label, function, *args, **kwargs):
        job = IntervalJob(name=name,
                          label=label,
                          function=function,
                          *args,
                          **kwargs)
        self.add_job(job)
        return job

    def add_date_job(self, name, label, function, *args, **kwargs):
        job = DateJob(name=name,
                      label=label,
                      function=function,
                      *args,
                      **kwargs)
        self.add_job(job)
        return job

    def add_cron_job(self, name, label, function, *args, **kwargs):
        job = CronJob(name=name,
                      label=label,
                      function=function,
                      *args,
                      **kwargs)
        self.add_job(job)
        return job

    def get_job_list(self):
        return self.scheduled_jobs.values()

    def get_job_by_name(self, name):
        try:
            return self.scheduled_jobs[name]
        except KeyError:
            raise UnknownJob

    def __unicode__(self):
        return unicode(self.label or self.name)
class HMScheduler( Base ):
    '''
    The HMSceduler is used to periodically to send messages to HouseMonitor.  The commands can anything including:

    # Report status
    # Turn on and off devices.

    You control the scheduler by sending messages to the scheduler using pubsub.
    '''

    ''' The queue that is used to send messages to the rest of the system. '''
    __input_queue = None

    ''' The scheduler object '''
    scheduler = None

    ''' A dictionary of the current jobs that are running '''
    jobs = defaultdict( list )

    previous_datetime = datetime.utcnow()

    def __init__( self, queue ):
        '''
        Initialize the MHScheduler.

        # Store the queue into __input_queue
        # Associate **add_interval** with Constants.TopicNames.SchedulerAddIntervalStep
        # Associate **add_cron** with Constants.TopicNames.SchedulerAddCronStep
        # Associate **add_date** with Constants.TopicNames.SchedulerAddDateStep
        # Associate **add_one_shot with Constants.TopicNames.SchedulerAddOneShotStepSchedulerAddOneShotStep
        # Associate **delete_job** with Constants.TopicNames.SchedulerDeleteJob
        '''
        super( HMScheduler, self ).__init__()
        self.__input_queue = queue
        pub.subscribe( self.add_interval, Constants.TopicNames.SchedulerAddIntervalStep )
        pub.subscribe( self.add_cron, Constants.TopicNames.SchedulerAddCronStep )
        pub.subscribe( self.add_date, Constants.TopicNames.SchedulerAddDateStep )
        pub.subscribe( self.add_one_shot, Constants.TopicNames.SchedulerAddOneShotStep )
        pub.subscribe( self.deleteJob, Constants.TopicNames.SchedulerDeleteJob )
        pub.subscribe( self.print_jobs, Constants.TopicNames.SchedulerPrintJobs )

    @property
    def scheduler_topic_name( self ):
        ''' The topic name to which this routine subscribes.'''
        return Constants.TopicNames.SchedulerStep

    @property
    def logger_name( self ):
        ''' Set the logger level. '''
        return Constants.LogKeys.Scheduler

    def start( self ):
        '''
        Start the Scheduler.

        For more information on the parameter see:

        .. seealso:: http://packages.python.org/APScheduler/#starting-the-scheduler

        '''
        self.logger.debug( 'Scheduler starting' )
        self.scheduler = Scheduler()
#        self.logger.debug( 'Setting jobstore to HouseMonitor.db' )
#        self.scheduler.add_jobstore(ShelveJobStore('HouseMonitor.db'), 'shelve')
        self.scheduler.start()

        name = 'scheduled status check'
        device = 'status'
        port = 'scheduler'
        listeners = [Constants.TopicNames.Statistics, Constants.TopicNames.CurrentValueStep]
        scheduler_id = str( uuid.uuid4() )
        args = name, device, port, listeners, scheduler_id
        self.scheduler.add_interval_job( self.sendCommand, minutes=10, args=args )

        name = 'uptime'
        device = 'HouseMonitor'
        port = 'uptime'
        listeners = [Constants.TopicNames.UpTime, Constants.TopicNames.CurrentValueStep]
        scheduler_id = str( uuid.uuid4() )
        args = name, device, port, listeners, scheduler_id
        self.scheduler.add_interval_job( self.sendCommand, seconds=5, args=args )

        name = 'Pulse'
        device = '0x13a20040902a02'
        port = 'DIO-0'
        listeners = [ Constants.TopicNames.StatusPanel_SystemCheck, Constants.TopicNames.ZigBeeOutput]
        scheduler_id = str( uuid.uuid4() )
        args = name, device, port, listeners, scheduler_id
        self.scheduler.add_interval_job( self.sendCommand, seconds=5, args=args )

    def add_interval( self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, args=None, kwargs=None ):
        '''
        Schedule an interval at which sendCommand will be called.

        For more information on the parameter see:

            .. seealso:: http://packages.python.org/APScheduler/intervalschedule.html

        :param name: the name of the job to start. This will be used to identify the job if there is a need to delete it latter.
        :type name: str
        :param weeks: the number of weeks between calls.
        :type weeks: int
        :param days: the number of days between calls.
        :type days: int
        :param hours: the number of hours between calls.
        :type hours: int
        :param minutes: the number of minutes between calls.
        :type minutes: int
        :param seconds: the number of seconds between calls.
        :type seconds: int
        :param start_date: the time and date to start the interval.
        :type start_date: datetime
        :param args: the args to pass to sendCommand
        :param kwargs: the kwargs to pass to sendCommand
        :raises: None

        '''
        name = args[0]
        self.logger.debug( 'interval ({}) add {} {} {} {} {} {} {}'.format( name, weeks, days, hours, hours, minutes, seconds, start_date ) )
        token = self.scheduler.add_interval_job( self.sendCommand, weeks=weeks,
                        days=days, hours=hours, minutes=minutes, seconds=seconds,
                        start_date=start_date, args=args, kwargs=kwargs, name=name )
        self.jobs[name].append( token )

    def add_cron( self, year=None, month=None, day=None, week=None, day_of_week=None,
                  hour=None, minute=None, second=None, start_date=None, args=None, kwargs=None ):
        '''
        Schedule a cron command to call sendCommand.

        For more information on the parameter see:

            .. seealso:: http://packages.python.org/APScheduler/cronschedule.html

        :param name: the name of the cron job to start. This will be used to identify the job if there is a need to delete it latter.
        :type weeks: str
        :param weeks: the number of weeks between calls.
        :type weeks: int
        :param days: the number of days between calls.
        :type days: int
        :param hours: the number of hours between calls.
        :type hours: int
        :param minutes: the number of minutes between calls.
        :type minutes: int
        :param seconds: the number of seconds between calls.
        :type seconds: int
        :param start_date: the time and date to start the interval.
        :type start_date: datetime
        :param args: the args to pass to sendCommand
        :param kwargs: the kwargs to pass to sendCommand
        :raises: None

        '''
        name = args[0]
        self.logger.debug( 'set cron({}) at {}/{}/{} {}:{}:{} {} {} {}'.format( name, year, month,
                                day, hour, minute, second, week, day_of_week, start_date ) )
        token = self.scheduler.add_cron_job( self.sendCommand, year=year,
                    month=month, day=day, week=week, day_of_week=day_of_week, hour=hour,
                    minute=minute, second=second, start_date=start_date, args=args, kwargs=kwargs )
        self.jobs[name].append( token )

    def add_date( self, date, args, **kwargs ):
        '''
        Schedule a specific data and time to call sendCommand.

        For more information on the parameter see:

            .. seealso:: http://packages.python.org/APScheduler/dateschedule.html

        :param name: the name of the cron job to start. This will be used to identify the job if there is a need to delete it latter.
        :type weeks: str
        :param date: Set the time to call sendCommand
        :type date: datetime
        :param args: the arguments to call sendCommand with
        :type weeks: tuple
        :param date: the kwwargs to call sendCommand with
        :type date: dictionary

        '''
        name = args[0]

        self.logger.debug( 'add date({}) at {}'.format( name, date ) )
        token = self.scheduler.add_date_job( self.sendCommand, date=date,
                                                             args=args, kwargs=kwargs )
        self.jobs[name].append( token )

    def add_one_shot( self, delta, args=None, kwargs=None ):
        '''
        Schedule sendCommand to be called after some interval. (ie. in 5 seconds or one hour).  For more information
        on timeDelta see:

        .. seealso:: http://docs.python.org/2/library/datetime.html#timedelta-objects

        :param name: delta the time until sendCommand is called
        :type weeks: timedelta
        :param date: Set the time to call sendCommand
        :type date: datetime
        :param args: the arguments to call sendCommand with
        :type weeks: tuple
        :param date: the kwwargs to call sendCommand with
        :type date: dictionary

        '''
        name = args[0]
        now = GetDateTime()
        dt = now.datetime()
        dt = dt + delta
        token = self.scheduler.add_date_job( self.sendCommand, date=dt,
                                name=name, args=args, kwargs=kwargs )
        self.jobs[name].append( token )

    def deleteJob( self, name ):
        '''
        Delete a specified job

        :param name: the name of the job to delete.
        :type weeks: str

        '''
        item = None
        if name in self.jobs:
            for number, item in enumerate( self.jobs[name] ):
                try:
                    self.scheduler.unschedule_job( item )
                except KeyError:
                    pass
                self.logger.info( '{} "{}" removed from scheduler'.format( number, name ) )
            self.jobs[name] = []

    def shutdown( self, wait=True ):
        '''
        shutdown the scheduler

        .. seealso: http://packages.python.org/APScheduler/#shutting-down-the-scheduler

        :param wait: determines whether to wait on threads to commplete.
        :type wait: boolean

        '''

        if ( self.scheduler != None ):
            self.scheduler.shutdown( wait=wait )
            self.scheduler = None

    def print_jobs( self ):
        '''
        print tye currently scheduled jobs

        .. seealso: http://packages.python.org/APScheduler/#getting-a-list-of-scheduled-jobs

        '''
        self.scheduler.print_jobs()

    def sendCommand( self, name, device, port, listeners=[], scheduler_id=str( uuid.uuid4() ) ):
        """
        send command will send the cammand to the HouseMonitor system

        :param device: the device name.
        :type device: str
        :param port: the port name.
        :type days: str
        :param listeners: the listeners that this command will be routed to.
        :type listeners: list of strings that contains the topic name of the listeners.  Most can be found in Constants.TopicNames

        """
        try:
            data = {
                Constants.EnvelopeContents.VALUE: 1,
                Constants.EnvelopeContents.DEVICE: device,
                Constants.EnvelopeContents.PORT: port,

                Constants.EnvelopeContents.SCHEDULER_ID: scheduler_id,
                Constants.EnvelopeContents.ARRIVAL_TIME: datetime.utcnow(),
                Constants.EnvelopeContents.STEPS: copy.copy( listeners ),
                Constants.EnvelopeContents.NAME: name,
            }
            de = DataEnvelope( Constants.EnvelopeTypes.STATUS, **data )
            self.logger.debug( 'name: {} listeners: {} scheduler_id:  {}'.
                               format( name, listeners,
                                       data[Constants.EnvelopeContents.STEPS] ) )
            self.__input_queue.transmit( de, self.__input_queue.LOW_PRIORITY )
        except Exception as ex:
            self.logger.exception( "Exception in SendCommand: {}".format( ex ) )
class AlertSchedulerHandler():
  FILENAME = 'definitions.json'
  TYPE_PORT = 'PORT'
  TYPE_METRIC = 'METRIC'
  TYPE_SCRIPT = 'SCRIPT'
  TYPE_WEB = 'WEB'
  TYPE_RECOVERY = 'RECOVERY'

  def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir,
      alert_grace_period, cluster_configuration, config, recovery_manager,
      in_minutes=True):

    self.cachedir = cachedir
    self.stacks_dir = stacks_dir
    self.common_services_dir = common_services_dir
    self.host_scripts_dir = host_scripts_dir

    self._cluster_configuration = cluster_configuration
    
    if not os.path.exists(cachedir):
      try:
        os.makedirs(cachedir)
      except:
        logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir))

    self.APS_CONFIG = {
      'apscheduler.threadpool.core_threads': 3,
      'apscheduler.coalesce': True,
      'apscheduler.standalone': False,
      'apscheduler.misfire_grace_time': alert_grace_period
    }

    self._collector = AlertCollector()
    self.__scheduler = Scheduler(self.APS_CONFIG)
    self.__in_minutes = in_minutes
    self.config = config
    self.recovery_manger = recovery_manager

    # register python exit handler
    ExitHelper().register(self.exit_handler)


  def exit_handler(self):
    """
    Exit handler
    """
    self.stop()


  def update_definitions(self, heartbeat):
    """
    Updates the persisted alert definitions JSON.
    :param heartbeat:
    :return:
    """
    if 'alertDefinitionCommands' not in heartbeat:
      logger.warning("There are no alert definition commands in the heartbeat; unable to update definitions")
      return

    # prune out things we don't want to store
    alert_definitions = []
    for command in heartbeat['alertDefinitionCommands']:
      command_copy = command.copy()

      # no need to store these since we always use the in-memory cached values
      if 'configurations' in command_copy:
        del command_copy['configurations']

      alert_definitions.append(command_copy)

    # write out the new definitions
    with open(os.path.join(self.cachedir, self.FILENAME), 'w') as f:
      json.dump(alert_definitions, f, indent=2)

    # reschedule only the jobs that have changed
    self.reschedule()


  def __make_function(self, alert_def):
    return lambda: alert_def.collect()


  def start(self):
    """ loads definitions from file and starts the scheduler """

    if self.__scheduler is None:
      return

    if self.__scheduler.running:
      self.__scheduler.shutdown(wait=False)
      self.__scheduler = Scheduler(self.APS_CONFIG)

    alert_callables = self.__load_definitions()

    # schedule each definition
    for _callable in alert_callables:
      self.schedule_definition(_callable)

    logger.info("[AlertScheduler] Starting {0}; currently running: {1}".format(
      str(self.__scheduler), str(self.__scheduler.running)))

    self.__scheduler.start()


  def stop(self):
    if not self.__scheduler is None:
      self.__scheduler.shutdown(wait=False)
      self.__scheduler = Scheduler(self.APS_CONFIG)

    logger.info("[AlertScheduler] Stopped the alert scheduler.")

  def reschedule(self):
    """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
    jobs_scheduled = 0
    jobs_removed = 0

    definitions = self.__load_definitions()
    scheduled_jobs = self.__scheduler.get_jobs()

    # for every scheduled job, see if its UUID is still valid
    for scheduled_job in scheduled_jobs:
      uuid_valid = False

      for definition in definitions:
        definition_uuid = definition.get_uuid()
        if scheduled_job.name == definition_uuid:
          uuid_valid = True
          break

      # jobs without valid UUIDs should be unscheduled
      if uuid_valid == False:
        jobs_removed += 1
        logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name))
        self._collector.remove_by_uuid(scheduled_job.name)
        self.__scheduler.unschedule_job(scheduled_job)

    # for every definition, determine if there is a scheduled job
    for definition in definitions:
      definition_scheduled = False
      for scheduled_job in scheduled_jobs:
        definition_uuid = definition.get_uuid()
        if definition_uuid == scheduled_job.name:
          definition_scheduled = True
          break

      # if no jobs are found with the definitions UUID, schedule it
      if definition_scheduled == False:
        jobs_scheduled += 1
        self.schedule_definition(definition)

    logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format(
        str(jobs_scheduled), str(jobs_removed)))


  def reschedule_all(self):
    """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
    jobs_scheduled = 0
    jobs_removed = 0

    definitions = self.__load_definitions()
    scheduled_jobs = self.__scheduler.get_jobs()

    # unschedule all scheduled jobs
    for scheduled_job in scheduled_jobs:
        jobs_removed += 1
        logger.info("[AlertScheduler] Unscheduling {0}".format(scheduled_job.name))
        self._collector.remove_by_uuid(scheduled_job.name)
        self.__scheduler.unschedule_job(scheduled_job)

    # for every definition, schedule a job
    for definition in definitions:
        jobs_scheduled += 1
        self.schedule_definition(definition)

    logger.info("[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled".format(
      str(jobs_scheduled), str(jobs_removed)))


  def collector(self):
    """ gets the collector for reporting to the server """
    return self._collector


  def __load_definitions(self):
    """
    Loads all alert definitions from a file. All clusters are stored in
    a single file.
    :return:
    """
    definitions = []

    all_commands = None
    alerts_definitions_path = os.path.join(self.cachedir, self.FILENAME)
    try:
      with open(alerts_definitions_path) as fp:
        all_commands = json.load(fp)
    except:
      logger.warning('[AlertScheduler] {0} not found or invalid. No alerts will be scheduled until registration occurs.'.format(alerts_definitions_path))
      return definitions

    for command_json in all_commands:
      clusterName = '' if not 'clusterName' in command_json else command_json['clusterName']
      hostName = '' if not 'hostName' in command_json else command_json['hostName']

      for definition in command_json['alertDefinitions']:
        alert = self.__json_to_callable(clusterName, hostName, definition)

        if alert is None:
          continue

        alert.set_helpers(self._collector, self._cluster_configuration)

        definitions.append(alert)

    return definitions


  def __json_to_callable(self, clusterName, hostName, json_definition):
    """
    converts the json that represents all aspects of a definition
    and makes an object that extends BaseAlert that is used for individual
    """
    alert = None

    try:
      source = json_definition['source']
      source_type = source.get('type', '')

      if logger.isEnabledFor(logging.DEBUG):
        logger.debug("[AlertScheduler] Creating job type {0} with {1}".format(source_type, str(json_definition)))


      if source_type == AlertSchedulerHandler.TYPE_METRIC:
        alert = MetricAlert(json_definition, source, self.config)
      elif source_type == AlertSchedulerHandler.TYPE_PORT:
        alert = PortAlert(json_definition, source)
      elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
        source['stacks_directory'] = self.stacks_dir
        source['common_services_directory'] = self.common_services_dir
        source['host_scripts_directory'] = self.host_scripts_dir
        alert = ScriptAlert(json_definition, source, self.config)
      elif source_type == AlertSchedulerHandler.TYPE_WEB:
        alert = WebAlert(json_definition, source, self.config)
      elif source_type == AlertSchedulerHandler.TYPE_RECOVERY:
        alert = RecoveryAlert(json_definition, source, self.recovery_manger)

      if alert is not None:
        alert.set_cluster(clusterName, hostName)

    except Exception,exception:
      logger.exception("[AlertScheduler] Unable to load an invalid alert definition. It will be skipped.")

    return alert
Exemple #41
0
class beholdDaemon(Daemon):
    #class application():
    def __init__(self, pidfile):
        Daemon.__init__(self, pidfile)
        self.sched = Scheduler()
        self.processingJob = None
        self.sidsJob = None

        self.logger = ''
        self.rtmp = ''

        self.username = ''
        self.password = ''
        self.host = ''
        self.db = ''
        self.connection = ''

        self.prefix = './'

        #self.username = dboptions[ 'username' ]
        #self.password = dboptions[ 'password' ]
        #self.host = dboptions[ 'host' ]
        #self.db = dboptions[ 'database' ]

        #try:
        #self.connection = PySQLPool.getNewConnection( username = self.username, \
        #password = self.password, \
        #host = self.host, \
        #db = self.db )
        #except:
        #self.logger.error( 'database connection failed' )
        #die( 'database connection failed' )

        self.sids = {}  # sid to cid dictionary
        self.binaries = {}
        self.times = {}

        # all default values can be overriden with corresponding methods
        self.binaries['rtmpdump'] = './rtmpdump'  # default location
        self.binaries['yamdi'] = './yamdi'  # default location
        self.times['interval'] = int(
            10)  # default value, FIXME: currently will not work with other
        self.times['killafter'] = int(self.times['interval'] *
                                      1.5)  # default value
        self.times['sidrefresh'] = int(30)  # default value
        self.times['overlap'] = int(15)  # default value in seconds

#    def stop( self ):
#	self.logger.info( 'shutting down beholder' )
#	Daemon.stop( self )

    def setDatabase(self, dboptions):
        self.username = dboptions['username']
        self.password = dboptions['password']
        self.host = dboptions['host']
        self.db = dboptions['database']

    def setRTMPHost(self, rtmp):
        self.rtmp = rtmp

    def setLogger(self, logger):
        self.logger = logger

    def setBinaryFilename(self, binary, filename):
        self.binaries[binary] = filename

    def setTime(self, name, value):
        self.times[name] = int(value)

    def setPrefix(self, prefix):
        self.prefix = prefix

    def _dbConnect(self):
        try:
            self.connection = PySQLPool.getNewConnection( username = self.username, \
                password = self.password, \
                host = self.host, \
                db = self.db )
        except:
            #	    self.logger.error( 'database connection failed' )
            die('database connection failed')

    #def _signalHandler( signum, frame ):
#	if signum == signal.SIGTERM:
#self.logger.info( 'beholder shutting down' )
#sys.exit( 0 )

    def _getSids(self):
        try:
            query = PySQLPool.getNewQuery(self.connection)
            query.Query('select sid, url from cam where dump is true')
            self.sids = {}
            for row in query.record:
                #	     	print( '%s - %s' % ( row[ 'sid' ], row[ 'url' ] ) )
                self.sids[row['sid']] = row['url']
            self.logger.info("sid list updated")
        except:
            self.logger.warning("getSids failed")

    def _startProcessing(self):
        now = datetime.datetime.now()
        self.logger.info('processing started at %s' % (now))

        #TODO: !!!
        # checking clocks
        #	passed = now - self.schedulerStartTime
        if self.lastRun is not None:
            passed = now - self.lastRun
            minutes = passed.seconds / 60
            self.logger.debug('minutes passed %s' % minutes)
            if minutes != self.times['interval']:
                m = int(
                    round(now.minute, -1)
                )  # rounding minutes (FIXME: this will work only for 10)

                if m == 60:
                    m, s = 59, 59
                else:
                    s = 0

                self.schedulerStartTime = datetime.datetime(year=now.year,
                                                            month=now.month,
                                                            day=now.day,
                                                            hour=now.hour,
                                                            minute=m,
                                                            second=s)

                if self.schedulerStartTime < now:
                    seld.schedulerStartTime += datetime.timedelta(
                        minutes=self.times['interval'])

                # reinit scheduler
                self._unscheduleJobs()
                self._scheduleJobsInit()

                self.lastRun = None

                return  # there are nothing more to do here

        self.logger.debug('kill time interval equals %s' %
                          (self.times['killafter']))

        suddenDeathTime = self.schedulerStartTime + datetime.timedelta(
            minutes=self.times['killafter'])

        if suddenDeathTime.second == 59:  # case of hh:59:59
            suddenDeathTime += datetime.timedelta(seconds=1)

        self.logger.debug('sudden death time is to %s' % (suddenDeathTime))

        self.schedulerStartTime += datetime.timedelta(
            minutes=self.times['interval'])

        if self.schedulerStartTime.second == 59:  # case of hh:59:59
            self.schedulerStartTime += datetime.timedelta(seconds=1)

        self.logger.info('threads will stop recording at %s' %
                         (self.schedulerStartTime))
        self.logger.debug('sids are %s' % (self.sids))

        for sid in self.sids:
            self.logger.debug('creating thread for %s' % (sid))
            d = streamDumper(
                self.logger,
                self.connection,
                sid,
                self.rtmp,
                self.sids[sid],
                self.schedulerStartTime.strftime(
                    '%d-%m-%Y %H:%M:%S'),  # it's a stop time for thread
                suddenDeathTime.strftime('%d-%m-%Y %H:%M:%S'),
                self.prefix,
                self.binaries['rtmpdump'],
                self.binaries['yamdi'])
            d.start()
            self.logger.info('thread started for %s sid' % sid)

        self.lastRun = now

    def _initScheduler(self, currentTime):
        m = int(
            round(currentTime.minute,
                  -1))  # rounding minutes (FIXME: this will work only for 10)

        if m == 60:
            m, s = 59, 59


#	    s = 59
        else:
            s = 0

        nextTime = datetime.datetime(year=currentTime.year,
                                     month=currentTime.month,
                                     day=currentTime.day,
                                     hour=currentTime.hour,
                                     minute=m,
                                     second=s)

        if nextTime > currentTime:
            self.schedulerStartTime = nextTime
        else:
            self.schedulerStartTime = nextTime + datetime.timedelta(
                minutes=self.times['interval'])

        self.logger.info('scheduled processing start time is %s' %
                         self.schedulerStartTime)

        #	self.sched.add_date_job( self._startProcessing, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) )
        #	self.sched.add_date_job( self._setupIntervalScheduler, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) )
        self._scheduleJobsInit()

        self.sched.daemonic = False  # scheduler will not let caller thread exit
        self.sched.start()
        self.logger.info('scheduler initialization completed')

    def _scheduleJobsInit(self):
        self.lastRun = None
        starttime = self.schedulerStartTime - datetime.timedelta(
            seconds=self.times['overlap'])
        #	self.sched.add_date_job( self._startProcessing, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) )
        #	self.sched.add_date_job( self._setupIntervalScheduler, self.schedulerStartTime - datetime.timedelta( seconds = self.times[ 'overlap' ] ) )
        self.sched.add_date_job(self._startProcessing, starttime)
        self.sched.add_date_job(self._setupIntervalScheduler, starttime)

    def _setupIntervalScheduler(self):
        self.processingJob = self.sched.add_interval_job(
            self._startProcessing, minutes=int(self.times['interval']))
        self.sidsJob = self.sched.add_interval_job(
            self._getSids, minutes=int(self.times['sidrefresh']))

    def _unscheduleJobs(self):
        self.sched.unschedule_job(self.processingJob)
        self.sched.unschedule_job(self.sidsJob)

    def run(self):
        #	signal.signal( signal.SIGTERM, self._signalHandler ) # setting up shutdown handler
        self.logger.info('beholder started. big brother is watching you :)')
        self._dbConnect()
        self._getSids()
        self._initScheduler(datetime.datetime.now())
class HouseControl(object):
    
    __scheduler = None
    __heatingStatusBean = None
    
    busJobsQueue = Queue.Queue()
    busWorkerThread = BusWorker(busJobsQueue)
    
    def __init__(self):
        self.logger = logging.getLogger(APPLICATION_LOGGER_NAME)
        self.logger.info("HouseControl starting...")

        configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION)
        
        #Initialize HeatingStatusBean
        self.__initalizeHeatingStatusBean(configurationReader)
        
        #Initialize Scheduler
        self.__initializeScheduler(configurationReader)
        
        #Initialize BusQueueWorker
        self.busWorkerThread.setDaemon(True)        
        self.busWorkerThread.start() 
        
        self.logger.info("HouseControl started.")
        
        
    def __initalizeHeatingStatusBean(self, configurationReader):
        #HeatingStatusBean       
        self.__heatingStatusBean = HeatingStatusBean.HeatingStatusBean()
        
        #Configure Bean
        self.updateHeatingStatusBeanConfiguration(configurationReader)
        
        #Add ChangeListener
        self.__heatingStatusBean.addChangeListener(HeatingControlService.HeatingControlService(self))
        self.__heatingStatusBean.addChangeListener(HeatingSwitchService.HeatingSwitchService(self))
        ##self.__heatingStatusBean.addChangeListener(HeatingMonitorService.HeatingMonitorService(self))
        self.logger.info("HeatingStatusBean configured.")
            
            
    def __initializeScheduler(self, configurationReader):
        #Scheduler
        self.__scheduler = Scheduler()
        self.__scheduler.configure(standalone=True)
        self.__scheduler.add_listener(schedulerListener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
        
        #SchedulerTasks
        #TemperaturFeedService, TemperatureLogService, MixerControlService
        self.__loadBaseSchedulerTasks()
        
        self.__scheduler.start()
        
        #Benutzerdefinierte Schaltzeiten
        self.loadUserSchedulerTasks(configurationReader)
        
        self.logger.info("Scheduler started.")
        
    
    def getHeatingStatusBean(self):
        return self.__heatingStatusBean
    
    def getScheduler(self):
        return self.__scheduler
    
        
    def __loadBaseSchedulerTasks(self):
        temperatureFeedService = TemperatureFeedService.TemperatureFeedService(self)
        temperatureLogService = TemperatureLogService.TemperatureLogService(self)
        mixerControlService = MixerControlService.MixerControlService(self)
        
        #TemperaturFeedService
        job = self.__scheduler.add_interval_job(temperatureFeedService.run, seconds=INTERVALL_UPDATE_TEMPERATURE)
        job.name = SCHEDULE_SERVICE_TEMPERATURE_UPDATER
        self.logger.info("Scheduler-Job [" + job.name + "] loaded.")

        #TemperatureLogService
        job = self.__scheduler.add_interval_job(temperatureLogService.run, seconds=INTERVALL_LOG_TEMPERATURE)
        job.name = SCHEDULE_SERVICE_TEMPERATURE_LOGGER
        self.logger.info("Scheduler-Job [" + job.name + "] loaded.")
        
        #MixerControlService
        job = self.__scheduler.add_interval_job(mixerControlService.run, seconds=INTERVALL_UPDATE_MIXER)
        job.name = SCHEDULE_SERVICE_TEMPERATURE_MIXERCONTROL
        self.logger.info("Scheduler-Job [" + job.name + "] loaded.")
        
        
    def updateHeatingStatusBeanConfiguration(self, configurationReader):
        temperatures = configurationReader.temperatures
        self.__heatingStatusBean.setUpperFloorFlowTargetTemperature(float(temperatures.get('ogv')))
        self.__heatingStatusBean.setGroundFloorFlowTargetTemperature(float(temperatures.get('egv')))
        self.__heatingStatusBean.setWaterTargetTemperature(float(temperatures.get('hotwater')))
    
    
    def reloadUserSchedulerTasks(self):
        self.removeUserSchedulerTasks()
        
        configurationReader = ConfigurationReader(self.logger, os.getcwd() + FILEPATH_CONFIGURATION)
        self.updateHeatingStatusBeanConfiguration(configurationReader)
        
        self.loadUserSchedulerTasks(configurationReader)
        
        
    def removeUserSchedulerTasks(self):
        prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) 
        jobList = self.__scheduler.get_jobs()
        for job in jobList:
            jobName = job.name
            if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX):
                self.logger.info("Scheduler-Job [" + job.name + "] removed.")
                self.__scheduler.unschedule_job(job)


    def loadUserSchedulerTasks(self, configurationReader):
        baseCronSched = {'year':None, 'month':None, 'day':None, 'week':None, 'day_of_week':None, 'hour':None, 'minute':None, 'second':None, 'start_date':None}
        for task in configurationReader.heatingTasks:
            
            schedType = task.get('schedule').get('type') 
            if(schedType == 'cron'):
                cronSched = baseCronSched.copy()
                cronSched.update(task.get('schedule'))
                cronSched.pop('type')
                if(task.get('type') == 'changeHeatingStatus'):
                    taskFunction = self.__heatingStatusBean.setHeatingStatusMap
                    job = self.__scheduler.add_cron_job(taskFunction,
                                                        year=cronSched['year'], month=cronSched['month'], day=cronSched['day'],
                                                        week=cronSched['week'], day_of_week=cronSched['day_of_week'], 
                                                        hour=cronSched['hour'], minute=cronSched['minute'], second=cronSched['second'], 
                                                        start_date=cronSched['start_date'],
                                                        args=[task.get('status')])
                    n = SERVICE_HEATING_ACTION_PREFIX + str(task.get('name'))
                    job.name = n
        
        prefixLen = len(SERVICE_HEATING_ACTION_PREFIX) 
        jobList = self.__scheduler.get_jobs()
        for job in jobList:
            jobName = job.name
            if(jobName[:prefixLen] == SERVICE_HEATING_ACTION_PREFIX):
                self.logger.info("Scheduler-Job [" + jobName + "] loaded.")
Exemple #43
0
class TestJobExecution(object):
    def setup(self):
        self.scheduler = Scheduler(threadpool=FakeThreadPool())
        self.scheduler.add_jobstore(RAMJobStore(), 'default')

        # Make the scheduler think it's running
        self.scheduler._thread = FakeThread()

        self.logstream = StringIO()
        self.loghandler = StreamHandler(self.logstream)
        self.loghandler.setLevel(ERROR)
        scheduler.logger.addHandler(self.loghandler)

    def teardown(self):
        scheduler.logger.removeHandler(self.loghandler)
        if scheduler.datetime == FakeDateTime:
            scheduler.datetime = datetime
        FakeDateTime._now = original_now

    @raises(TypeError)
    def test_noncallable(self):
        date = datetime.now() + timedelta(days=1)
        self.scheduler.add_date_job('wontwork', date)

    def test_job_name(self):
        def my_job():
            pass

        job = self.scheduler.add_interval_job(my_job,
                                              start_date=datetime(2010, 5, 19))
        eq_(
            repr(job), '<Job (name=my_job, '
            'trigger=<IntervalTrigger (interval=datetime.timedelta(0, 1), '
            'start_date=datetime.datetime(2010, 5, 19, 0, 0))>)>')

    def test_schedule_object(self):
        # Tests that any callable object is accepted (and not just functions)
        class A:
            def __init__(self):
                self.val = 0

            def __call__(self):
                self.val += 1

        a = A()
        job = self.scheduler.add_interval_job(a, seconds=1)
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(a.val, 2)

    def test_schedule_method(self):
        # Tests that bound methods can be scheduled (at least with RAMJobStore)
        class A:
            def __init__(self):
                self.val = 0

            def method(self):
                self.val += 1

        a = A()
        job = self.scheduler.add_interval_job(a.method, seconds=1)
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(a.val, 2)

    def test_unschedule_job(self):
        def increment():
            vals[0] += 1

        vals = [0]
        job = self.scheduler.add_cron_job(increment)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals[0], 1)
        self.scheduler.unschedule_job(job)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals[0], 1)

    def test_unschedule_func(self):
        def increment():
            vals[0] += 1

        def increment2():
            vals[0] += 1

        vals = [0]
        job1 = self.scheduler.add_cron_job(increment)
        job2 = self.scheduler.add_cron_job(increment2)
        job3 = self.scheduler.add_cron_job(increment)
        eq_(self.scheduler.get_jobs(), [job1, job2, job3])

        self.scheduler.unschedule_func(increment)
        eq_(self.scheduler.get_jobs(), [job2])

    @raises(KeyError)
    def test_unschedule_func_notfound(self):
        self.scheduler.unschedule_func(copy)

    def test_job_finished(self):
        def increment():
            vals[0] += 1

        vals = [0]
        job = self.scheduler.add_interval_job(increment, max_runs=1)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals, [1])
        assert job not in self.scheduler.get_jobs()

    def test_job_exception(self):
        def failure():
            raise DummyException

        job = self.scheduler.add_date_job(failure, datetime(9999, 9, 9))
        self.scheduler._process_jobs(job.next_run_time)
        assert 'DummyException' in self.logstream.getvalue()

    def test_misfire_grace_time(self):
        self.scheduler.misfire_grace_time = 3
        job = self.scheduler.add_interval_job(lambda: None, seconds=1)
        eq_(job.misfire_grace_time, 3)

        job = self.scheduler.add_interval_job(lambda: None,
                                              seconds=1,
                                              misfire_grace_time=2)
        eq_(job.misfire_grace_time, 2)

    def test_coalesce_on(self):
        # Makes sure that the job is only executed once when it is scheduled
        # to be executed twice in a row
        def increment():
            vals[0] += 1

        vals = [0]
        events = []
        scheduler.datetime = FakeDateTime
        self.scheduler.add_listener(events.append,
                                    EVENT_JOB_EXECUTED | EVENT_JOB_MISSED)
        job = self.scheduler.add_interval_job(increment,
                                              seconds=1,
                                              start_date=FakeDateTime.now(),
                                              coalesce=True,
                                              misfire_grace_time=2)

        # Turn the clock 14 seconds forward
        FakeDateTime._now += timedelta(seconds=2)

        self.scheduler._process_jobs(FakeDateTime.now())
        eq_(job.runs, 1)
        eq_(len(events), 1)
        eq_(events[0].code, EVENT_JOB_EXECUTED)
        eq_(vals, [1])

    def test_coalesce_off(self):
        # Makes sure that every scheduled run for the job is executed even
        # when they are in the past (but still within misfire_grace_time)
        def increment():
            vals[0] += 1

        vals = [0]
        events = []
        scheduler.datetime = FakeDateTime
        self.scheduler.add_listener(events.append,
                                    EVENT_JOB_EXECUTED | EVENT_JOB_MISSED)
        job = self.scheduler.add_interval_job(increment,
                                              seconds=1,
                                              start_date=FakeDateTime.now(),
                                              coalesce=False,
                                              misfire_grace_time=2)

        # Turn the clock 2 seconds forward
        FakeDateTime._now += timedelta(seconds=2)

        self.scheduler._process_jobs(FakeDateTime.now())
        eq_(job.runs, 3)
        eq_(len(events), 3)
        eq_(events[0].code, EVENT_JOB_EXECUTED)
        eq_(events[1].code, EVENT_JOB_EXECUTED)
        eq_(events[2].code, EVENT_JOB_EXECUTED)
        eq_(vals, [3])

    def test_interval(self):
        def increment(amount):
            vals[0] += amount
            vals[1] += 1

        vals = [0, 0]
        job = self.scheduler.add_interval_job(increment, seconds=1, args=[2])
        self.scheduler._process_jobs(job.next_run_time)
        self.scheduler._process_jobs(job.next_run_time)
        eq_(vals, [4, 2])

    def test_interval_schedule(self):
        @self.scheduler.interval_schedule(seconds=1)
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals, [2])

    def test_cron(self):
        def increment(amount):
            vals[0] += amount
            vals[1] += 1

        vals = [0, 0]
        job = self.scheduler.add_cron_job(increment, args=[3])
        start = job.next_run_time
        self.scheduler._process_jobs(start)
        eq_(vals, [3, 1])
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals, [6, 2])
        self.scheduler._process_jobs(start + timedelta(seconds=2))
        eq_(vals, [9, 3])

    def test_cron_schedule_1(self):
        @self.scheduler.cron_schedule()
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vals[0], 2)

    def test_cron_schedule_2(self):
        @self.scheduler.cron_schedule(minute='*')
        def increment():
            vals[0] += 1

        vals = [0]
        start = increment.job.next_run_time
        next_run = start + timedelta(seconds=60)
        eq_(increment.job.get_run_times(next_run), [start, next_run])
        self.scheduler._process_jobs(start)
        self.scheduler._process_jobs(next_run)
        eq_(vals[0], 2)

    def test_date(self):
        def append_val(value):
            vals.append(value)

        vals = []
        date = datetime.now() + timedelta(seconds=1)
        self.scheduler.add_date_job(append_val, date, kwargs={'value': 'test'})
        self.scheduler._process_jobs(date)
        eq_(vals, ['test'])

    def test_print_jobs(self):
        out = StringIO()
        self.scheduler.print_jobs(out)
        expected = 'Jobstore default:%s'\
                   '    No scheduled jobs%s' % (os.linesep, os.linesep)
        eq_(out.getvalue(), expected)

        self.scheduler.add_date_job(copy, datetime(2200, 5, 19))
        out = StringIO()
        self.scheduler.print_jobs(out)
        expected = 'Jobstore default:%s    '\
            'copy (trigger: date[2200-05-19 00:00:00], '\
            'next run at: 2200-05-19 00:00:00)%s' % (os.linesep, os.linesep)
        eq_(out.getvalue(), expected)

    def test_jobstore(self):
        self.scheduler.add_jobstore(RAMJobStore(), 'dummy')
        job = self.scheduler.add_date_job(lambda: None,
                                          datetime(2200, 7, 24),
                                          jobstore='dummy')
        eq_(self.scheduler.get_jobs(), [job])
        self.scheduler.remove_jobstore('dummy')
        eq_(self.scheduler.get_jobs(), [])

    @raises(KeyError)
    def test_remove_nonexistent_jobstore(self):
        self.scheduler.remove_jobstore('dummy2')

    def test_job_next_run_time(self):
        # Tests against bug #5
        def increment():
            vars[0] += 1

        vars = [0]
        scheduler.datetime = FakeDateTime
        job = self.scheduler.add_interval_job(increment,
                                              seconds=1,
                                              misfire_grace_time=3,
                                              start_date=FakeDateTime.now())
        start = job.next_run_time

        self.scheduler._process_jobs(start)
        eq_(vars, [1])

        self.scheduler._process_jobs(start)
        eq_(vars, [1])

        self.scheduler._process_jobs(start + timedelta(seconds=1))
        eq_(vars, [2])
class EventScheduler():
    """Class to scheduler regular events in a similar manner to cron."""
    __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \
                  % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy')))
    '''This determines the number of seconds after the designated run time that the job is still allowed to be run.
    If jobs are not being run, try increasing this in increments of 1.'''
    __GRACE_PERIOD = 31536000  # Amazing grace! Time in seconds before the job is considered misfired. Currently a year
    __COALESCE = True   # Force the job to only run once instead of retrying multiple times
    '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads
    in the pool that the filter would get clogged up!'''
    __threadpool_corethreads = 0    # Maximum number of persistent threads in the pool
    __threadpool_maxthreads = 20    # Maximum number of total threads in the pool
    __threadpool_keepalive = 1      # Seconds to keep non-core worker threads in the pool

    def __init__(self, start=True):
        try:
            config = {'apscheduler.daemon': True, 'apscheduler.standalone': False,
                      'apscheduler.threadpool.core_threads': self.__threadpool_corethreads,
                      'apscheduler.threadpool.max_threads': self.__threadpool_maxthreads,
                      'apscheduler.threadpool.keepalive': self.__threadpool_keepalive,
                      'apscheduler.coalesce': self.__COALESCE}
            self.__sched = Scheduler(config)
            '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the
            shelve job store working.'''
            self.__sched.add_jobstore(SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'), 'default')
            atexit.register(lambda: self.__sched.shutdown(wait=False))  # Stop the scheduler when the program exits
            if start:
                self.__sched.start()
        except KeyError:
            logging.warning('An error occurred starting the scheduler.')

    def start_scheduler(self):
        self.__sched.start()

    def add_cron_event(self, func, name, year=None, month=None, week=None, day=None,
                       day_of_week=None, hour=None, minute=None, second=None, start_date=None, *args,
                       **kwargs):
        """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed.
        The following expressions can be used in each field:
        Expression  Field   Description
        *           any     Fire on every value
        */a         any     Fire on every 'a' values, starting from the minimum
        a-b         any     Fire on any value in the 'a-b' range (a must be smaller than b
        a-b/c       any     Fire every 'c' values within the 'a-b' range
        xth y       day     Fire on the x -th occurrence of weekday y within the month
        last x      day     Fire on the last occurrence of weekday 'x' within the month
        last        day     Fire on the last day within the month
        x,y,z       any     Fire on any matching expression; can combine any number of any of the above expressions

        If you want to add **options to the event, use kwargs (keyword arguments dictionary)"""
        if self.__sched is not None:
            event_exists = False
            if self.__find_event(name) is not None:
                event_exists = True
            if not event_exists:
                self.__sched.add_cron_job(func=func, name=name, year=year, month=month, day=day, week=week,
                                          day_of_week=day_of_week, hour=hour, minute=minute, second=second,
                                          start_date=start_date, args=args, kwargs=kwargs,
                                          misfire_grace_time=self.__GRACE_PERIOD)
                logging.info('New cron event added')
            else:
                '''Every event needs a unique name so we can keep track of the little bastards. And please use
                descriptive names so that they can be properly identified in the job schedule.'''
                logging.warning('add_cron_event: Event already exists')
                raise EventExistsError('A job with name %s already exists' % name)
        else:
            raise SchedulerNotFoundError('add_cron_event: Scheduler does not exist. It may have not started.')

    def __find_event(self, event_name):
        if self.__sched is not None:
            events = self.__sched.get_jobs()
            for event in events:
                if event.name == event_name:
                    return event
                else:
                    return None
        else:
            logging.warning('__find_event: Scheduler does not exist. It may have not started.')
            raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.')

    def add_onceoff_event(self, func, name, date, args=None):
        """Add a once off event to the schedule. The job is executed once at the specified date and time.
        Date/time format: YYYY-MM-DD HH:MM:SS"""
        if self.__sched is not None:
            try:
                if args is None:  # If there are no arguments to be passed to the function
                    self.__sched.add_date_job(func=func, name=name, date=date,
                                              misfire_grace_time=self.__GRACE_PERIOD)
                else:   # If there are arguments to be passed to the function
                    self.__sched.add_date_job(func=func, name=name, date=date, arge=args,
                                              misfire_grace_time=self.__GRACE_PERIOD)
            except ValueError:
                '''If the event is in the past, it will not run. This program is not capable of manipulating
                space and time. Try import __time_travel__'''
                raise EventWontRunError('The event will not run: Event time has expired.')
            logging.info('New once off event added')
        else:
            logging.warning('add_onceoff_event: Scheduler does not exist. It may have not started.')
            raise SchedulerNotFoundError('Scheduler does not exist. It may have not started.')

    def remove_event(self, event_name):
        """Remove the event 'event_name' from the schedule."""
        if self.__sched is not None:
            removed = False
            event = self.__find_event(event_name=event_name)
            if event is not None:   # If the event exists, remove it
                self.__sched.unschedule_job(event)
                removed = True
            if not removed:
                '''Raise an error so that it can be handled correctly'''
                logging.warning('remove_event: Event not found for removal.')
                raise EventNotFoundError('Event not found for removal: %s' % event_name)
        else:
            raise SchedulerNotFoundError('remove_event: Scheduler does not exist. It may have not started.')

    def get_jobs(self):
        """Get the list of events currently in the job store."""
        if self.__sched is not None:
            return self.__sched.get_jobs()
        else:
            raise SchedulerNotFoundError('get_events: Scheduler does not exist. It may have not started.')

    def get_job_names(self):
        """
        Get the names of all the jobs in the job store
        :return: list
        """
        jobs = self.get_jobs()
        job_list = []
        if jobs:
            for job in jobs:
                job_list.append(job.name)
        return job_list

    def get_scheduler(self):
        """Returns the Scheduler object. Rather add functionality to this class than call this method."""
        if self.__sched is not None:
            return self.__sched
        else:
            raise SchedulerNotFoundError('get_scheduler: Scheduler does not exist. It may have not started.')
Exemple #45
0
class TNActionScheduler:
    
    def __init__(self, entity, db_file):
        """
        initialize the module
        @type entity TNArchipelEntity
        @param entity the module entity
        """
        self.entity = entity
        self.scheduler = Scheduler()
        self.scheduler.start()
        
        self.database = sqlite3.connect(db_file, check_same_thread=False);
        self.database.execute("create table if not exists scheduler (entity_uuid text, job_uuid text, action text, year text, month text, day text, hour text, minute text, second text, comment text, params text)")
        self.database.commit()
        self.cursor = self.database.cursor()
        self.restore_jobs()
        self.supported_actions_for_vm = ("create", "shutdown", "destroy", "suspend", "resume", "reboot", "migrate", "pause")
        self.supported_actions_for_hypervisor = ("alloc", "free")
        
        # permissions
        self.entity.permission_center.create_permission("scheduler_jobs", "Authorizes user to get the list of task", False);
        self.entity.permission_center.create_permission("scheduler_schedule", "Authorizes user to schedule a task", False);
        self.entity.permission_center.create_permission("scheduler_unschedule", "Authorizes user to unschedule a task", False);
        self.entity.permission_center.create_permission("scheduler_actions", "Authorizes user to get available actions", False);
    
    
    
    ### Persistance
    
    def delete_job(self, uid):
        """
        remove a job from the database
        
        @type uid string
        @param uid the uid of the job to remove
        """
        
        self.cursor.execute("DELETE FROM scheduler WHERE job_uuid=?", (uid,))
        self.database.commit()
    
    
    def save_jobs(self, uid, action, year, month, day, hour, minute, second, comment, params=None):
        """
        save a job in the database
        
        @type uid string
        @param uid the uid of the job
        @type action string
        @param action the action
        @type year string
        @param year year of execution
        @type month string
        @param month month of execution
        @type day string
        @param day day of execution
        @type hour string
        @param hour hour of execution
        @type minute string
        @param minute minute of execution
        @type second string
        @param second second of execution
        @type comment string
        @param comment comment about the job
        @type params string
        @param params random parameter of the job
        """
        
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute("INSERT INTO scheduler VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (entity_uid, uid, action, year, month, day, hour, minute, second, comment, params,))
        self.database.commit()
    
    
    def restore_jobs(self):
        """
        restore the jobs from the database
        """
        
        entityClass = self.entity.__class__.__name__
        if entityClass == "TNArchipelVirtualMachine":
            entity_uid = self.entity.uuid
        elif entityClass == "TNArchipelHypervisor":
            entity_uid = ARCHIPEL_SCHED_HYPERVISOR_UID
        self.cursor.execute("SELECT * FROM scheduler WHERE entity_uuid=?", (entity_uid,))
        for values in self.cursor:
            entity_uuid, job_uuid, action, year, month, day, hour, minute, second, comment, params = values
            str_date = "%s/%s/%s %s:%s:%s" % (year, month, day, hour, minute, second)
            self.scheduler.add_cron_job(self.do_job_for_vm, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[action, job_uuid, str_date, comment])
    
    
    
    ### Jobs
    
    def get_jod_with_uid(self, uid):
        """
        get a job with given uid
        
        @type uid string
        @param uid the uid of the job
        """
        
        for job in self.scheduler.jobs:
            if str(job.args[1]) == uid:
                return job;
        return None;
    
    
    def do_job_for_vm(self, action, uid, str_date, comment, param):
        """
        perform the job
        
        @type action string
        @param action the action to execute
        @type uid string
        @param uid the uid of the job
        @type str_date string
        @param str_date the date of the job
        @type comment string
        @param commt comment about the job
        @type param string
        @param param a random parameter to give to job
        """
        
        if action == "create":      self.entity.create()
        elif action == "shutdown":  self.entity.shutdown()
        elif action == "destroy":   self.entity.destroy()
        elif action == "suspend":   self.entity.suspend()
        elif action == "resume":    self.entity.resume()
        elif action == "pause":
            if self.entity.libvirt_status == 1: self.entity.suspend()
            elif self.entity.libvirt_status == 3: self.entity.resume()
        elif action == "migrate": pass
        
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid);
        
        self.entity.push_change("scheduler", "jobexecuted");
    
    
    def do_job_for_hypervisor(self, action, uid, str_date, comment, param):
        """
        perform the job
        
        @type action string
        @param action the action to execute
        @type uid string
        @param uid the uid of the job
        @type str_date string
        @param str_date the date of the job
        @type comment string
        @param commt comment about the job
        @type param string
        @param param a random parameter to give to job
        """
        
        if action == "alloc": self.entity.alloc()
        elif action == "free": pass #self.entity.free()
        
        job = self.get_jod_with_uid(uid)
        if not job or not self.scheduler.is_job_active(job):
            self.delete_job(uid);
        
        self.entity.push_change("scheduler", "jobexecuted");
    
    
    
    ### Process IQ
    
    def process_iq(self, conn, iq):
        """
        this method is invoked when a ARCHIPEL_NS_VM_SCHEDULER IQ is received.
        
        it understands IQ of type:
            - jobs
            - schedule
            - unschedule
        
        @type conn: xmpp.Dispatcher
        @param conn: ths instance of the current connection that send the stanza
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        """
        action = self.entity.check_acp(conn, iq)
        self.entity.check_perm(conn, iq, action, -1, prefix="scheduler_")
        
        if   action == "schedule":      reply = self.iq_schedule(iq)
        elif action == "unschedule":    reply = self.iq_unschedule(iq)
        elif action == "jobs":          reply = self.iq_jobs(iq)
        elif action == "actions":       reply = self.iq_actions(iq)
        
        if reply:
            conn.send(reply)
            raise xmpp.protocol.NodeProcessed
    
    
    def iq_schedule(self, iq):
        """
        Schedule a task.
        
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            job = iq.getTag("query").getTag("archipel").getAttr("job")
            entityClass = self.entity.__class__.__name__
            param = None
            
            if entityClass == "TNArchipelVirtualMachine" and not job in self.supported_actions_for_vm:
                raise Exception("action %s is not valid" % job)
            elif entityClass == "TNArchipelHypervisor" and not job in self.supported_actions_for_hypervisor:
                raise Exception("action %s is not valid" % job)
            
            year = iq.getTag("query").getTag("archipel").getAttr("year")
            month = iq.getTag("query").getTag("archipel").getAttr("month")
            day = iq.getTag("query").getTag("archipel").getAttr("day")
            hour = iq.getTag("query").getTag("archipel").getAttr("hour")
            minute = iq.getTag("query").getTag("archipel").getAttr("minute")
            second = iq.getTag("query").getTag("archipel").getAttr("second")
            comment = iq.getTag("query").getTag("archipel").getAttr("comment")
            if iq.getTag("query").getTag("archipel").has_attr("param"):
                param = iq.getTag("query").getTag("archipel").getAttr("param")
            
            uid = str(uuid.uuid1())
            
            str_date = "%s-%s-%s @ %s : %s : %s" % (year, month, day, hour, minute, second)
            
            if entityClass == "TNArchipelVirtualMachine":   func = self.do_job_for_vm
            elif entityClass == "TNArchipelHypervisor":     func = self.do_job_for_hypervisor
            
            self.scheduler.add_cron_job(func, year=year, month=month, day=day, hour=hour, minute=minute, second=second, args=[job, uid, str_date, comment, param])
            
            self.save_jobs(uid, job, year, month, day, hour, minute, second, comment, param)
            
            self.entity.push_change("scheduler", "scheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply
    
    
    def iq_jobs(self, iq):
        """
        gets jobs
        
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            nodes = [];
            for job in self.scheduler.jobs:
                job_node = xmpp.Node(tag="job", attrs={"action": str(job.args[0]), "uid": str(job.args[1]), "date": str(job.args[2]), "comment": job.args[3]})
                nodes.append(job_node)
            
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply

    
    
    def iq_unschedule(self, iq):
        """
        gets jobs
        
        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ
        
        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            uid = iq.getTag("query").getTag("archipel").getAttr("uid")
            the_job = self.get_jod_with_uid(uid);
            if not the_job:
                raise Exception("job with uid %s doesn't exists" % uid)
            
            self.delete_job(uid);
            self.scheduler.unschedule_job(the_job);
            self.entity.push_change("scheduler", "unscheduled")
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply
    
    
    def iq_actions(self, iq):
        """
        get available actions

        @type iq: xmpp.Protocol.Iq
        @param iq: the received IQ

        @rtype: xmpp.Protocol.Iq
        @return: a ready to send IQ containing the result of the action
        """
        try:
            reply = iq.buildReply("result")
            entityClass = self.entity.__class__.__name__
            
            if entityClass == "TNArchipelVirtualMachine":
                actions = self.supported_actions_for_vm
            elif entityClass == "TNArchipelHypervisor":
                actions = self.supported_actions_for_hypervisor
            
            nodes = []
            for action in actions:
                action_node = xmpp.Node(tag="action")
                action_node.setData(action)
                nodes.append(action_node)
            reply.setQueryPayload(nodes)
        except Exception as ex:
            reply = build_error_iq(self, ex, iq)
        return reply
Exemple #46
0
class EventScheduler():
    logging.basicConfig()
    """Class to scheduler regular events in a similar manner to cron."""
    __mysql_url = 'mysql+pymysql://powermonitor:%s@localhost/powermonitor' \
                  % str(base64.b64decode(bytes('cDB3M3JtMG4xdDBy')))
    '''This determines the number of seconds after the designated run time that the job is still allowed to be run.
    If jobs are not being run, try increasing this in increments of 1.'''
    __GRACE_PERIOD = 31536000  # Amazing grace! Time in seconds before the job is considered misfired. Currently a year
    __COALESCE = True  # Force the job to only run once instead of retrying multiple times
    '''If there is a problem with thread concurrency, play around with these values. You'd think with all these threads
    in the pool that the filter would get clogged up!'''
    __threadpool_corethreads = 0  # Maximum number of persistent threads in the pool
    __threadpool_maxthreads = 20  # Maximum number of total threads in the pool
    __threadpool_keepalive = 1  # Seconds to keep non-core worker threads in the pool

    def __init__(self, start=True):
        try:
            config = {
                'apscheduler.daemon': True,
                'apscheduler.standalone': False,
                'apscheduler.threadpool.core_threads':
                self.__threadpool_corethreads,
                'apscheduler.threadpool.max_threads':
                self.__threadpool_maxthreads,
                'apscheduler.threadpool.keepalive':
                self.__threadpool_keepalive,
                'apscheduler.coalesce': self.__COALESCE
            }
            self.__sched = Scheduler(config)
            '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the
            shelve job store working.'''
            self.__sched.add_jobstore(
                SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'),
                'default')
            atexit.register(lambda: self.__sched.shutdown(wait=False)
                            )  # Stop the scheduler when the program exits
            if start:
                self.__sched.start()
        except KeyError:
            logging.warning('An error occurred starting the scheduler.')

    def start_scheduler(self):
        self.__sched.start()

    def add_cron_event(self,
                       func,
                       name,
                       year=None,
                       month=None,
                       week=None,
                       day=None,
                       day_of_week=None,
                       hour=None,
                       minute=None,
                       second=None,
                       start_date=None,
                       *args,
                       **kwargs):
        """Add a cron like event to the schedule. Each job must be given a name in case it needs to be removed.
        The following expressions can be used in each field:
        Expression  Field   Description
        *           any     Fire on every value
        */a         any     Fire on every 'a' values, starting from the minimum
        a-b         any     Fire on any value in the 'a-b' range (a must be smaller than b
        a-b/c       any     Fire every 'c' values within the 'a-b' range
        xth y       day     Fire on the x -th occurrence of weekday y within the month
        last x      day     Fire on the last occurrence of weekday 'x' within the month
        last        day     Fire on the last day within the month
        x,y,z       any     Fire on any matching expression; can combine any number of any of the above expressions

        If you want to add **options to the event, use kwargs (keyword arguments dictionary)"""
        if self.__sched is not None:
            event_exists = False
            if self.__find_event(name) is not None:
                event_exists = True
            if not event_exists:
                self.__sched.add_cron_job(
                    func=func,
                    name=name,
                    year=year,
                    month=month,
                    day=day,
                    week=week,
                    day_of_week=day_of_week,
                    hour=hour,
                    minute=minute,
                    second=second,
                    start_date=start_date,
                    args=args,
                    kwargs=kwargs,
                    misfire_grace_time=self.__GRACE_PERIOD)
                logging.info('New cron event added')
            else:
                '''Every event needs a unique name so we can keep track of the little bastards. And please use
                descriptive names so that they can be properly identified in the job schedule.'''
                logging.warning('add_cron_event: Event already exists')
                raise EventExistsError('A job with name %s already exists' %
                                       name)
        else:
            raise SchedulerNotFoundError(
                'add_cron_event: Scheduler does not exist. It may have not started.'
            )

    def __find_event(self, event_name):
        if self.__sched is not None:
            events = self.__sched.get_jobs()
            for event in events:
                if event.name == event_name:
                    return event
                else:
                    return None
        else:
            logging.warning(
                '__find_event: Scheduler does not exist. It may have not started.'
            )
            raise SchedulerNotFoundError(
                'Scheduler does not exist. It may have not started.')

    def add_onceoff_event(self, func, name, date, args=None):
        """Add a once off event to the schedule. The job is executed once at the specified date and time.
        Date/time format: YYYY-MM-DD HH:MM:SS"""
        if self.__sched is not None:
            try:
                if args is None:  # If there are no arguments to be passed to the function
                    self.__sched.add_date_job(
                        func=func,
                        name=name,
                        date=date,
                        misfire_grace_time=self.__GRACE_PERIOD)
                else:  # If there are arguments to be passed to the function
                    self.__sched.add_date_job(
                        func=func,
                        name=name,
                        date=date,
                        arge=args,
                        misfire_grace_time=self.__GRACE_PERIOD)
            except ValueError:
                '''If the event is in the past, it will not run. This program is not capable of manipulating
                space and time. Try import __time_travel__'''
                raise EventWontRunError(
                    'The event will not run: Event time has expired.')
            logging.info('New once off event added')
        else:
            logging.warning(
                'add_onceoff_event: Scheduler does not exist. It may have not started.'
            )
            raise SchedulerNotFoundError(
                'Scheduler does not exist. It may have not started.')

    def remove_event(self, event_name):
        """Remove the event 'event_name' from the schedule."""
        if self.__sched is not None:
            removed = False
            event = self.__find_event(event_name=event_name)
            if event is not None:  # If the event exists, remove it
                self.__sched.unschedule_job(event)
                removed = True
            if not removed:
                '''Raise an error so that it can be handled correctly'''
                logging.warning('remove_event: Event not found for removal.')
                raise EventNotFoundError('Event not found for removal: %s' %
                                         event_name)
        else:
            raise SchedulerNotFoundError(
                'remove_event: Scheduler does not exist. It may have not started.'
            )

    def get_jobs(self):
        """Get the list of events currently in the job store."""
        if self.__sched is not None:
            return self.__sched.get_jobs()
        else:
            raise SchedulerNotFoundError(
                'get_events: Scheduler does not exist. It may have not started.'
            )

    def get_job_names(self):
        """
        Get the names of all the jobs in the job store
        :return: list
        """
        jobs = self.get_jobs()
        job_list = []
        if jobs:
            for job in jobs:
                job_list.append(job.name)
        return job_list

    def get_scheduler(self):
        """Returns the Scheduler object. Rather add functionality to this class than call this method."""
        if self.__sched is not None:
            return self.__sched
        else:
            raise SchedulerNotFoundError(
                'get_scheduler: Scheduler does not exist. It may have not started.'
            )
Exemple #47
0
class PyFlowScheduler(object):
    """
    This object schedules the submission of the tasks in an :class:`Flow`.
    There are two types of errors that might occur during the execution of the jobs:

        #. Python exceptions
        #. Abinit Errors.

    Python exceptions are easy to detect and are usually due to a bug in abinitio or random errors such as IOError.
    The set of Abinit Errors is much much broader. It includes wrong input data, segmentation
    faults, problems with the resource manager, etc. Abinitio tries to handle the most common cases
    but there's still a lot of room for improvement.
    Note, in particular, that `PyFlowScheduler` will shutdown automatically if

        #. The number of python exceptions is > MAX_NUM_PYEXC

        #. The number of Abinit Errors (i.e. the number of tasks whose status is S_ERROR) is > MAX_NUM_ERRORS

        #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks).

        #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds.
           If the mail cannot be sent, it will shutdown automatically.
           This check prevents the scheduler from being trapped in an infinite loop.
    """
    # Configuration file.
    YAML_FILE = "scheduler.yml"
    USER_CONFIG_DIR = os.path.join(os.getenv("HOME"), ".abinit", "abipy")

    Error = PyFlowSchedulerError

    def __init__(self, **kwargs):
        """
        Args:
            weeks: number of weeks to wait
            days: number of days to wait
            hours: number of hours to wait
            minutes: number of minutes to wait
            seconds: number of seconds to wait
            verbose: (int) verbosity level
            max_njobs_inque: Limit on the number of jobs that can be present in the queue
            use_dynamic_manager: True if the :class:`TaskManager` must be re-initialized from
                file before launching the jobs. Default: False
            max_nlaunches: Maximum number of tasks launched by radpifire (default -1 i.e. no limit)
        """
        # Options passed to the scheduler.
        self.sched_options = AttrDict(
            weeks=kwargs.pop("weeks", 0),
            days=kwargs.pop("days", 0),
            hours=kwargs.pop("hours", 0),
            minutes=kwargs.pop("minutes", 0),
            seconds=kwargs.pop("seconds", 0),
            #start_date=kwargs.pop("start_date", None),
        )

        if all(not v for v in self.sched_options.values()):
            raise self.Error("Wrong set of options passed to the scheduler.")

        self.mailto = kwargs.pop("mailto", None)
        self.verbose = int(kwargs.pop("verbose", 0))
        self.use_dynamic_manager = kwargs.pop("use_dynamic_manager", False)
        self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200)
        self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False))

        self.remindme_s = float(kwargs.pop("remindme_s", 4 * 24 * 3600))
        self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0))
        self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0))
        self.safety_ratio = int(kwargs.pop("safety_ratio", 5))
        #self.max_etime_s = kwargs.pop("max_etime_s", )
        self.max_nlaunches = kwargs.pop("max_nlaunches", -1)
        self.debug = kwargs.pop("debug", 0)

        if kwargs:
            raise self.Error("Unknown arguments %s" % kwargs)

        if has_sched_v3:
            from apscheduler.schedulers.blocking import BlockingScheduler
            self.sched = BlockingScheduler()
        else:
            from apscheduler.scheduler import Scheduler
            self.sched = Scheduler(standalone=True)

        self.nlaunch = 0
        self.num_reminders = 1

        # Used to keep track of the exceptions raised while the scheduler is running
        self.exceptions = collections.deque(maxlen=self.max_num_pyexcs + 10)

        # Used to push additional info during the execution.
        self.history = collections.deque(maxlen=100)

    @classmethod
    def from_file(cls, filepath):
        """Read the configuration parameters from a Yaml file."""
        with open(filepath, "r") as fh:
            return cls(**yaml.load(fh))

    @classmethod
    def from_string(cls, s):
        """Create an istance from string s containing a YAML dictionary."""
        stream = cStringIO(s)
        stream.seek(0)
        return cls(**yaml.load(stream))

    @classmethod
    def from_user_config(cls):
        """
        Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'.
        Search first in the working directory and then in the configuration directory of abipy.

        Raises:
            RuntimeError if file is not found.
        """
        # Try in the current directory.
        path = os.path.join(os.getcwd(), cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        # Try in the configuration directory.
        path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path))

    def __str__(self):
        """String representation."""
        lines = [self.__class__.__name__ + ", Pid: %d" % self.pid]
        app = lines.append

        app("Scheduler options: %s" % str(self.sched_options))
        app(80 * "=")
        app(str(self.flow))

        return "\n".join(lines)

    @property
    def pid(self):
        """The pid of the process associated to the scheduler."""
        try:
            return self._pid

        except AttributeError:
            self._pid = os.getpid()
            return self._pid

    @property
    def pid_file(self):
        """
        Absolute path of the file with the pid.
        The file is located in the workdir of the flow
        """
        return self._pid_file

    @property
    def flow(self):
        """`Flow`."""
        return self._flow

    @property
    def num_excs(self):
        """Number of exceptions raised so far."""
        return len(self.exceptions)

    def get_delta_etime(self):
        """Returns a `timedelta` object representing with the elapsed time."""
        return timedelta(seconds=(time.time() - self.start_time))

    def add_flow(self, flow):
        """Add an :class:`Flow` flow to the scheduler."""
        if hasattr(self, "_flow"):
            raise self.Error("Only one flow can be added to the scheduler.")

        pid_file = os.path.join(flow.workdir, "_PyFlowScheduler.pid")

        if os.path.isfile(pid_file):
            flow.show_status()

            raise self.Error("""\
                pid_file %s already exists
                There are two possibilities:

                   1) There's an another instance of PyFlowScheduler running
                   2) The previous scheduler didn't exit in a clean way

                To solve case 1:
                   Kill the previous scheduler (use 'kill pid' where pid is the number reported in the file)
                   Then you can restart the new scheduler.

                To solve case 2:
                   Remove the pid_file and restart the scheduler.

                Exiting""" % pid_file)

        with open(pid_file, "w") as fh:
            fh.write(str(self.pid))

        self._pid_file = pid_file
        self._flow = flow

    def start(self):
        """
        Starts the scheduler in a new thread. Returns True if success.
        In standalone mode, this method will block until there are no more scheduled jobs.
        """
        self.history.append("Started on %s" % time.asctime())
        self.start_time = time.time()

        if has_sched_v3:
            self.sched.add_job(self.callback, "interval", **self.sched_options)
        else:
            self.sched.add_interval_job(self.callback, **self.sched_options)

        errors = self.flow.look_before_you_leap()
        if errors:
            self.exceptions.append(errors)
            return False

        # Try to run the job immediately. If something goes wrong return without initializing the scheduler.
        self._runem_all()

        if self.exceptions:
            self.cleanup()
            self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions)
            return False

        try:
            self.sched.start()
            return True

        except KeyboardInterrupt:
            self.shutdown(msg="KeyboardInterrupt from user")
            if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"): 
                self.flow.cancel()
            self.flow.pickle_dump()
            return False

    def _runem_all(self):
        """
        This function checks the status of all tasks,
        tries to fix tasks that went unconverged, abicritical, or queuecritical
        and tries to run all the tasks that can be submitted.+
        """
        excs = []
        flow = self.flow

        # Allow to change the manager at run-time
        if self.use_dynamic_manager:
            from pymatgen.io.abinitio.tasks import TaskManager
            new_manager = TaskManager.from_user_config()
            for work in flow:
                work.set_manager(new_manager)

        nqjobs = 0
        if self.contact_resource_manager:
            # This call is expensive and therefore it's optional
            nqjobs = flow.get_njobs_in_queue()
            if nqjobs is None:
                nqjobs = 0
                if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue')

            if nqjobs >= self.max_njobs_inqueue:
                logger.info("Too many jobs in the queue, returning")
                return

        if self.max_nlaunches == -1:
            max_nlaunch = self.max_njobs_inqueue - nqjobs
        else:
            max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches)

        # check status and print it.
        flow.check_status(show=False)

        # fix problems
        # Try to restart the unconverged tasks
        # todo donot fire here but prepare for fireing in rapidfire
        for task in self.flow.unconverged_tasks:
            try:
                logger.info("Flow will try restart task %s" % task)
                fired = task.restart()
                if fired: 
                    self.nlaunch += 1
                    max_nlaunch -= 1
                    if max_nlaunch == 0:
                        logger.info("Restart: too many jobs in the queue, returning")
                        flow.pickle_dump()
                        return
            except task.RestartError:
                excs.append(straceback())

        # move here from withing rapid fire ...
        # fix only prepares for restarting, and sets to ready
        nfixed = flow.fix_abi_critical()
        if nfixed: print("Fixed %d AbiCritical errors" % nfixed)

        # Temporarily disable by MG because I don't know if fix_critical works after the
        # introduction of the new qadapters
        if False:
            nfixed = flow.fix_queue_critical()
            if nfixed: print("Fixed %d QueueCritical errors" % nfixed)

        # update database
        flow.pickle_dump()

        # Submit the tasks that are ready.
        try:
            nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10)
            self.nlaunch += nlaunch

            if nlaunch:
                print("[%s] Number of launches: %d" % (time.asctime(), nlaunch))

        except Exception:
            excs.append(straceback())

        flow.show_status()

        if excs:
            logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs))
            self.exceptions.extend(excs)

    def callback(self):
        """The function that will be executed by the scheduler."""
        try:
            return self._callback()
        except:
            # All exceptions raised here will trigger the shutdown!
            self.exceptions.append(straceback())
            self.shutdown(msg="Exception raised in callback!")

    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if self.verbose:
            print("all_ok", all_ok)

        if all_ok:
            self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit")

        # Handle failures.
        err_msg = ""

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " %
                  (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" +
                        " but send_email returned %d. Aborting now" % retcode)
                err_msg += msg

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_msg += "\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_msg += boxed(msg)

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons 
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_msg += boxed(msg)

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_msg += boxed(msg)

        deadlocked, runnables, running = self.flow.deadlocked_runnables_running()
        #print("\ndeadlocked:\n", deadlocked, "\nrunnables:\n", runnables, "\nrunning\n", running)
        if deadlocked and not runnables and not running:
            msg = "No runnable job with deadlocked tasks:\n %s\nWill shutdown the scheduler and exit" % str(deadlocked)
            err_msg += boxed(msg)

        if err_msg:
            # Something wrong. Quit
            self.shutdown(err_msg)

        return len(self.exceptions)

    def cleanup(self):
        """Cleanup routine: remove the pid file and save the pickle database"""
        try:
            os.remove(self.pid_file)
        except OSError:
            logger.critical("Could not remove pid_file")

        # Save the final status of the flow.
        self.flow.pickle_dump()

    def shutdown(self, msg):
        """Shutdown the scheduler."""
        try:
            self.cleanup()

            self.history.append("Completed on %s" % time.asctime())
            self.history.append("Elapsed time %s" % self.get_delta_etime())

            if self.debug:
                print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds())

            retcode = self.send_email(msg)
            if self.debug:
                print("send_mail retcode", retcode)

            # Write file with the list of exceptions:
            if self.exceptions:
                dump_file = os.path.join(self.flow.workdir, "_exceptions")
                with open(dump_file, "w") as fh:
                    fh.writelines(self.exceptions)
                    fh.write("Shutdown message:\n%s" % msg)

            lines = []
            app = lines.append
            app("Submitted on %s" % time.ctime(self.start_time))
            app("Completed on %s" % time.asctime())
            app("Elapsed time %s" % str(self.get_delta_etime()))
            if self.flow.all_ok:
                app("Flow completed successfully")
            else:
                app("Flow didn't complete successfully")
                app("Shutdown message:\n%s" % msg)
            print("\n".join(lines))

        finally:
            # Shutdown the scheduler thus allowing the process to exit.
            logger.debug('this should be the shutdown of the scheduler')

            # Unschedule all the jobs before calling shutdown
            #self.sched.print_jobs()
            for job in self.sched.get_jobs():
                self.sched.unschedule_job(job)
            #self.sched.print_jobs()
                
            self.sched.shutdown()
            # Uncomment the line below if shutdown does not work!
            #os.system("kill -9 %d" % os.getpid())

    def send_email(self, msg, tag=None):
        """
        Send an e-mail before completing the shutdown.
        Returns 0 if success.
        """
        try:
            return self._send_email(msg, tag)
        except:
            self.exceptions.append(straceback())
            return -2

    def _send_email(self, msg, tag):
        if self.mailto is None:
            return -1

        header = msg.splitlines()
        app = header.append

        app("Submitted on %s" % time.ctime(self.start_time))
        app("Completed on %s" % time.asctime())
        app("Elapsed time %s" % str(self.get_delta_etime()))
        app("Number of errored tasks: %d" % self.flow.num_errored_tasks)
        app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks)

        strio = cStringIO()
        strio.writelines("\n".join(header) + 4 * "\n")

        # Add the status of the flow.
        self.flow.show_status(stream=strio)

        if self.exceptions:
            # Report the list of exceptions.
            strio.writelines(self.exceptions)

        if tag is None:
            tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]"

        return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
Exemple #48
0
class Scheduller():

    def __init__(self, job_func):
        self.job_func = job_func
        self.jobs = []

        config = {'apscheduler.standalone': False }
        self.sched = Scheduler(config)
        self.sched.start()

        self.db = shelve.open(os.path.join(os.path.dirname(__file__), "db/db_timers"), writeback=True)

        if not self.db.has_key('timers'):
            print('Creating new timers db')
            self.db['timers'] = []

        self.apply_all_timers(self.db['timers'])

        #self.db['timers'] = []
        #self.db['timers'].append(dict(cron="* * * * *", switch_name="TV", state=1))
        #self.db['timers'].append(dict(cron="20 14 * * *", switch_name="Speakers", state=1))
        #self.db['timers'].append(dict(cron="30 14 * * *", switch_name="Speakers", state=0))
        #self.db_flush()

    def db_flush(self):
        self.db.sync()
        self.db.close()
        self.db = shelve.open(os.path.join(os.path.dirname(__file__), "db/db_timers"), writeback=True)

    def get_timers(self):
        return self.db['timers']

    def update_db(self, timers):
        self.remove_all_jobs()
        self.db['timers'] = timers
        self.db_flush()
        self.apply_all_timers(self.db['timers'])

    def append_db(self, new_timer):
        self.db['timers'].append(new_timer)
        self.db_flush()
        self.schedulle_job(new_timer)

    def remove_all_jobs(self):
        for id, timer in enumerate(self.db['timers']):
            self.unschedule_job(id)
        self.db_flush()

    def apply_all_timers(self, timers):
        for timer in timers:
            self.schedulle_job(timer)

    def schedulle_job(self, timer):
        switch_name = timer['switch_name']
        state = timer['state']
        minute, hour, day, month, day_of_week = timer['cron'].split()

        #workaround for apscheduler, Monday is day 0 in it's implementation
        if (day_of_week<> "*"):
            day_of_week = int(day_of_week) - 1
            if (day_of_week == -1):
                day_of_week = 6

        job = self.sched.add_cron_job(self.job_func, second=0, minute=minute, hour=hour, day=day, month=month, day_of_week=day_of_week, args=[dict(switch_name=switch_name, state=state)])
        self.jobs.append(job)

    def unschedule_job(self, id):
        print 'remove ID:', id
        self.db['timers'].pop(id)

        self.sched.unschedule_job(self.jobs.pop(int(id)))

    def remove(self, id):
        self.unschedule_job(id)
        self.db_flush()
Exemple #49
0
class schedulerDaemon(object):
    
    def __init__(self):
                       
        #starting scheduler 
        self.sched = Scheduler()
        self.sched.start()
        self.sched.add_listener(job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED)    
        self.recoverySchedulerDaemon()
        self.newEmulationList =[]
        #Logger.init()

    def listJobs(self):
        schedFileLogger.debug("-> listJobs(self)")

        schJobsFormat=self.sched.get_jobs()
        
        #!have to convert list of jobs from scheduler into list of strings to send over the Pyro 4.20 which has new "serpent serializer"
        strJobsList=[]
       
        if schJobsFormat:
            
            for job in self.sched.get_jobs():
                strJobsList.append(str(job))            
            
            schedFileLogger.debug("sending list of jobs")
            #[<Job (name=1-1-MEM_EMU-logger interval-3sec., trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-0-MEM_Distro-lookbusy-mem: 100 Duration: 60.0sec. End Time: 10:11:10, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 10))>)>, <Job (name=1-MEM_EMU-1-1-MEM_Distro-lookbusy-mem: 225 Duration: 48.0sec. End Time: 10:11:04, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 16))>)>, <Job (name=1-MEM_EMU-1-2-MEM_Distro-lookbusy-mem: 225 Duration: 36.0sec. End Time: 10:10:58, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 22))>)>, <Job (name=1-MEM_EMU-1-3-MEM_Distro-lookbusy-mem: 225 Duration: 24.0sec. End Time: 10:10:52, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 28))>)>, <Job (name=1-MEM_EMU-1-4-MEM_Distro-lookbusy-mem: 225 Duration: 12.0sec. End Time: 10:10:46, trigger=<SimpleTrigger (run_date=datetime.datetime(2014, 10, 10, 10, 10, 34))>)>]
            
            return strJobsList
        else:
            schedFileLogger.debug("No jobs to send")
            return []
    
        
       
    def stopSchedulerDaemon(self):
        schedFileLogger.debug("-> stopSchedulerDaemon(self)")
        schedFileLogger.info("stopping Daemon")
        sys.exit(1)   
        sys.exit(0) 
    
    def hello(self):
        schedFileLogger.debug("-> hello(self)") 
        greeting = "Pong!"
        schedFileLogger.debug(greeting)
        return greeting
    
    def deleteJobs(self,emulationID,distribitionName):
        schedFileLogger.debug("-> deleteJobs(self,emulationID,distribitionName)")
        #stringify
        emulationID =str(emulationID)
        distribitionName=str(distribitionName)
        
        schedFileLogger.debug("Looking for job name:"+emulationID+"-"+distribitionName)
        
        if emulationID=="all":
            schedFileLogger.info("Jobs deleted:")
            #setting emulation objects list to initial state
            self.newEmulationList =[]
            for job in self.sched.get_jobs():
                self.sched.unschedule_job(job)
                msg = {"Action":"Job Deleted","jobName":job.name}
                schedFileLogger.info(msg)
                #schedFileLogger.info(str(job.name))
                
        else:
            for job in self.sched.get_jobs():
                if distribitionName in job.name :
                    self.sched.unschedule_job(job)
                    
                    msg = {"Action":"Job Deleted","jobName":job.name}
                    schedFileLogger.info(msg)
                    
                    #schedFileLogger.info( "Job: "+job.name+" Deleted")
                    
                
                else:
                    schedFileLogger.info( "These jobs remain: "+job.name)
    
    #Adding current emulation object for further availability
    def setEmuObject(self,newEmulation):
        try:
            #prevent duplicate entries of emulations to be created
#            print "\nlen emuObject: ", str(len(self.newEmulationList))
            n=0
            if len(self.newEmulationList) >= 1:
                for emus in self.newEmulationList:
                    #print "emu compare:",emus.emulationID,newEmulation.emulationID
                    if emus.emulationID==newEmulation.emulationID:
                        #print "FOUND IT"
                        self.newEmulationList.pop(n)
                    n+=1
                self.newEmulationList.append(newEmulation)
            else:
                self.newEmulationList.append(newEmulation)
            
            return True
        except Exception,e:
            print e
            return False
Exemple #50
0
	email_sched.start()
	text = "it's been twelve hours. here are the latest temperature readings from your new brew"
	email_job = sched.add_interval_job(send_email,hours=8,args = [text, password, True])
	
	# this is the heart of the program:
	# send email to let me know I'm brewing
	send_email("starting brew log.", password, graph=False)
	
	# get list of temps:
	list_of_temps = sys.argv[2::2]
	# get lengths of time for each of those temperatures
	list_of_times = sys.argv[3::2]
	
	# convert to ints
	list_of_temps = map(int,list_of_temps)
	list_of_times = map(int, list_of_times)
	
	print "list of temps"
	print list_of_temps
	print "list of times"
	print list_of_times
	
	for i in range(0,len(list_of_times)):
		send_email("changing temperature to " + str(list_of_temps[i]) + " for " + str(list_of_times[i]) + " hours.", password, graph = False)
		recordAndRegulateTemp(list_of_times[i],list_of_temps[i],writer)
	
	print "program done. fermenter shutting down."
	send_email("ending. fermenter is shutting off", password, graph=True)
	email_sched.unschedule_job(send_email)
	io.output(power_pin, False)
Exemple #51
0
class PyFlowScheduler(object):
    """
    This object schedules the submission of the tasks in a :class:`Flow`.
    There are two types of errors that might occur during the execution of the jobs:

        #. Python exceptions
        #. Errors in the ab-initio code

    Python exceptions are easy to detect and are usually due to a bug in the python code or random errors such as IOError.
    The set of errors in the ab-initio is much much broader. It includes wrong input data, segmentation
    faults, problems with the resource manager, etc. The flow tries to handle the most common cases
    but there's still a lot of room for improvement.
    Note, in particular, that `PyFlowScheduler` will shutdown automatically in the following cases:

        #. The number of python exceptions is > max_num_pyexcs

        #. The number of task errors (i.e. the number of tasks whose status is S_ERROR) is > max_num_abierrs

        #. The number of jobs launched becomes greater than (`safety_ratio` * total_number_of_tasks).

        #. The scheduler will send an email to the user (specified by `mailto`) every `remindme_s` seconds.
           If the mail cannot be sent, the scheduler will shutdown automatically.
           This check prevents the scheduler from being trapped in an infinite loop.
    """
    # Configuration file.
    YAML_FILE = "scheduler.yml"
    USER_CONFIG_DIR = os.path.join(os.path.expanduser("~"), ".abinit", "abipy")

    Error = PyFlowSchedulerError

    @classmethod
    def autodoc(cls):
        i = cls.__init__.__doc__.index("Args:")
        return cls.__init__.__doc__[i+5:]

    def __init__(self, **kwargs):
        """
        Args:
            weeks: number of weeks to wait (DEFAULT: 0).
            days: number of days to wait (DEFAULT: 0).
            hours: number of hours to wait (DEFAULT: 0).
            minutes: number of minutes to wait (DEFAULT: 0).
            seconds: number of seconds to wait (DEFAULT: 0).
            mailto: The scheduler will send an email to `mailto` every `remindme_s` seconds.
                (DEFAULT: None i.e. not used).
            verbose: (int) verbosity level. (DEFAULT: 0)
            use_dynamic_manager: "yes" if the :class:`TaskManager` must be re-initialized from
                file before launching the jobs. (DEFAULT: "no")
            max_njobs_inqueue: Limit on the number of jobs that can be present in the queue. (DEFAULT: 200)
            remindme_s: The scheduler will send an email to the user specified by `mailto` every `remindme_s` seconds.
                (int, DEFAULT: 1 day).
            max_num_pyexcs: The scheduler will exit if the number of python exceptions is > max_num_pyexcs
                (int, DEFAULT: 0)
            max_num_abierrs: The scheduler will exit if the number of errored tasks is > max_num_abierrs
                (int, DEFAULT: 0)
            safety_ratio: The scheduler will exits if the number of jobs launched becomes greater than
               `safety_ratio` * total_number_of_tasks_in_flow. (int, DEFAULT: 5)
            max_nlaunches: Maximum number of tasks launched in a single iteration of the scheduler.
                (DEFAULT: -1 i.e. no limit)
            debug: Debug level. Use 0 for production (int, DEFAULT: 0)
            fix_qcritical: "yes" if the launcher should try to fix QCritical Errors (DEFAULT: "yes")
            rmflow: If "yes", the scheduler will remove the flow directory if the calculation
                completed successfully. (DEFAULT: "no")
            killjobs_if_errors: "yes" if the scheduler should try to kill all the runnnig jobs
                before exiting due to an error. (DEFAULT: "yes")
        """
        # Options passed to the scheduler.
        self.sched_options = AttrDict(
            weeks=kwargs.pop("weeks", 0),
            days=kwargs.pop("days", 0),
            hours=kwargs.pop("hours", 0),
            minutes=kwargs.pop("minutes", 0),
            seconds=kwargs.pop("seconds", 0),
            #start_date=kwargs.pop("start_date", None),
        )
        if all(not v for v in self.sched_options.values()):
            raise self.Error("Wrong set of options passed to the scheduler.")

        self.mailto = kwargs.pop("mailto", None)
        self.verbose = int(kwargs.pop("verbose", 0))
        self.use_dynamic_manager = as_bool(kwargs.pop("use_dynamic_manager", False))
        self.max_njobs_inqueue = kwargs.pop("max_njobs_inqueue", 200)
        self.max_ncores_used = kwargs.pop("max_ncores_used", None)
        self.contact_resource_manager = as_bool(kwargs.pop("contact_resource_manager", False))

        self.remindme_s = float(kwargs.pop("remindme_s", 1 * 24 * 3600))
        self.max_num_pyexcs = int(kwargs.pop("max_num_pyexcs", 0))
        self.max_num_abierrs = int(kwargs.pop("max_num_abierrs", 0))
        self.safety_ratio = int(kwargs.pop("safety_ratio", 5))
        #self.max_etime_s = kwargs.pop("max_etime_s", )
        self.max_nlaunches = kwargs.pop("max_nlaunches", -1)
        self.debug = kwargs.pop("debug", 0)
        self.fix_qcritical = as_bool(kwargs.pop("fix_qcritical", True))
        self.rmflow = as_bool(kwargs.pop("rmflow", False))
        self.killjobs_if_errors = as_bool(kwargs.pop("killjobs_if_errors", True))

        self.customer_service_dir = kwargs.pop("customer_service_dir", None)
        if self.customer_service_dir is not None:
            self.customer_service_dir = Directory(self.customer_service_dir)
            self._validate_customer_service()

        if kwargs:
            raise self.Error("Unknown arguments %s" % kwargs)

        if not has_apscheduler:
            raise RuntimeError("Install apscheduler with pip")

        if has_sched_v3:
            logger.warning("Using scheduler v>=3.0.0")
            from apscheduler.schedulers.blocking import BlockingScheduler
            self.sched = BlockingScheduler()
        else:
            from apscheduler.scheduler import Scheduler
            self.sched = Scheduler(standalone=True)

        self.nlaunch = 0
        self.num_reminders = 1

        # Used to keep track of the exceptions raised while the scheduler is running
        self.exceptions = deque(maxlen=self.max_num_pyexcs + 10)

        # Used to push additional info during the execution.
        self.history = deque(maxlen=100)

    @classmethod
    def from_file(cls, filepath):
        """Read the configuration parameters from a Yaml file."""
        with open(filepath, "rt") as fh:
            return cls(**yaml.safe_load(fh))

    @classmethod
    def from_string(cls, s):
        """Create an istance from string s containing a YAML dictionary."""
        stream = cStringIO(s)
        stream.seek(0)
        return cls(**yaml.safe_load(stream))

    @classmethod
    def from_user_config(cls):
        """
        Initialize the :class:`PyFlowScheduler` from the YAML file 'scheduler.yml'.
        Search first in the working directory and then in the configuration directory of abipy.

        Raises:
            `RuntimeError` if file is not found.
        """
        # Try in the current directory.
        path = os.path.join(os.getcwd(), cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        # Try in the configuration directory.
        path = os.path.join(cls.USER_CONFIG_DIR, cls.YAML_FILE)

        if os.path.exists(path):
            return cls.from_file(path)

        raise cls.Error("Cannot locate %s neither in current directory nor in %s" % (cls.YAML_FILE, path))

    def __str__(self):
        """String representation."""
        lines = [self.__class__.__name__ + ", Pid: %d" % self.pid]
        app = lines.append
        app("Scheduler options: %s" % str(self.sched_options))

        if self.flow is not None:
            app(80 * "=")
            app(str(self.flow))

        return "\n".join(lines)

    @property
    def pid(self):
        """The pid of the process associated to the scheduler."""
        try:
            return self._pid
        except AttributeError:
            self._pid = os.getpid()
            return self._pid

    @property
    def pid_file(self):
        """
        Absolute path of the file with the pid.
        The file is located in the workdir of the flow
        """
        return self._pid_file

    @property
    def flow(self):
        """`Flow`."""
        try:
            return self._flow
        except AttributeError:
            return None

    @property
    def num_excs(self):
        """Number of exceptions raised so far."""
        return len(self.exceptions)

    def get_delta_etime(self):
        """Returns a `timedelta` object representing with the elapsed time."""
        return timedelta(seconds=(time.time() - self.start_time))

    def add_flow(self, flow):
        """
        Add an :class:`Flow` flow to the scheduler.
        """
        if hasattr(self, "_flow"):
            raise self.Error("Only one flow can be added to the scheduler.")

        # Check if we are already using a scheduler to run this flow
        flow.check_pid_file()
        flow.set_spectator_mode(False)

        # Build dirs and files (if not yet done)
        flow.build()

        with open(flow.pid_file, "wt") as fh:
            fh.write(str(self.pid))

        self._pid_file = flow.pid_file
        self._flow = flow

    def _validate_customer_service(self):
        """
        Validate input parameters if customer service is on then
        create directory for tarball files with correct premissions for user and group.
        """
        direc = self.customer_service_dir
        if not direc.exists:
            mode = 0o750
            print("Creating customer_service_dir %s with mode %s" % (direc, mode))
            direc.makedirs()
            os.chmod(direc.path, mode)

        if self.mailto is None:
            raise RuntimeError("customer_service_dir requires mailto option in scheduler.yml")

    def _do_customer_service(self):
        """
        This method is called before the shutdown of the scheduler.
        If customer_service is on and the flow didn't completed successfully,
        a lightweight tarball file with inputs and the most important output files
        is created in customer_servide_dir.
        """
        if self.customer_service_dir is None: return
        doit = self.exceptions or not self.flow.all_ok
        doit = True
        if not doit: return

        prefix = os.path.basename(self.flow.workdir) + "_"

        import tempfile, datetime
        suffix = str(datetime.datetime.now()).replace(" ", "-")
        # Remove milliseconds
        i = suffix.index(".")
        if i != -1: suffix = suffix[:i]
        suffix += ".tar.gz"

        #back = os.getcwd()
        #os.chdir(self.customer_service_dir.path)

        _, tmpname = tempfile.mkstemp(suffix="_" + suffix, prefix=prefix,
                                      dir=self.customer_service_dir.path, text=False)

        print("Dear customer,\n We are about to generate a tarball in\n  %s" % tmpname)
        self.flow.make_light_tarfile(name=tmpname)
        #os.chdir(back)

    def start(self):
        """
        Starts the scheduler in a new thread. Returns 0 if success.
        In standalone mode, this method will block until there are no more scheduled jobs.
        """
        self.history.append("Started on %s" % time.asctime())
        self.start_time = time.time()

        if not has_apscheduler:
            raise RuntimeError("Install apscheduler with pip")

        if has_sched_v3:
            self.sched.add_job(self.callback, "interval", **self.sched_options)
        else:
            self.sched.add_interval_job(self.callback, **self.sched_options)

        errors = self.flow.look_before_you_leap()
        if errors:
            self.exceptions.append(errors)
            return 1

        # Try to run the job immediately. If something goes wrong return without initializing the scheduler.
        self._runem_all()

        if self.exceptions:
            self.cleanup()
            self.send_email(msg="Error while trying to run the flow for the first time!\n %s" % self.exceptions)
            return 1

        try:
            self.sched.start()
            return 0

        except KeyboardInterrupt:
            self.shutdown(msg="KeyboardInterrupt from user")
            if ask_yesno("Do you want to cancel all the jobs in the queue? [Y/n]"):
                print("Number of jobs cancelled:", self.flow.cancel())

            self.flow.pickle_dump()
            return -1

    def _runem_all(self):
        """
        This function checks the status of all tasks,
        tries to fix tasks that went unconverged, abicritical, or queuecritical
        and tries to run all the tasks that can be submitted.+
        """
        excs = []
        flow = self.flow

        # Allow to change the manager at run-time
        if self.use_dynamic_manager:
            from pymatgen.io.abinit.tasks import TaskManager
            new_manager = TaskManager.from_user_config()
            for work in flow:
                work.set_manager(new_manager)

        nqjobs = 0
        if self.contact_resource_manager: # and flow.TaskManager.qadapter.QTYPE == "shell":
            # This call is expensive and therefore it's optional (must be activate in manager.yml)
            nqjobs = flow.get_njobs_in_queue()
            if nqjobs is None:
                nqjobs = 0
                if flow.manager.has_queue:
                    logger.warning('Cannot get njobs_inqueue')
        else:
            # Here we just count the number of tasks in the flow who are running.
            # This logic breaks down if there are multiple schedulers runnig
            # but it's easy to implement without having to contact the resource manager.
            nqjobs = (len(list(flow.iflat_tasks(status=flow.S_RUN))) +
                      len(list(flow.iflat_tasks(status=flow.S_SUB))))

        if nqjobs >= self.max_njobs_inqueue:
            print("Too many jobs in the queue: %s. No job will be submitted." % nqjobs)
            flow.check_status(show=False)
            return

        if self.max_nlaunches == -1:
            max_nlaunch = self.max_njobs_inqueue - nqjobs
        else:
            max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches)

        # check status.
        flow.check_status(show=False)

        # This check is not perfect, we should make a list of tasks to sumbit
        # and select only the subset so that we don't exceeed mac_ncores_used
        # Many sections of this code should be rewritten.
        #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used:
        if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used:
            print("Cannot exceed max_ncores_used %s" % self.max_ncores_used)
            return

        # Try to restart the unconverged tasks
        # TODO: do not fire here but prepare for fireing in rapidfire
        for task in self.flow.unconverged_tasks:
            try:
                logger.info("Flow will try restart task %s" % task)
                fired = task.restart()
                if fired:
                    self.nlaunch += 1
                    max_nlaunch -= 1
                    if max_nlaunch == 0:
                        logger.info("Restart: too many jobs in the queue, returning")
                        flow.pickle_dump()
                        return

            except task.RestartError:
                excs.append(straceback())

        # Temporarily disable by MG because I don't know if fix_critical works after the
        # introduction of the new qadapters
        # reenabled by MsS disable things that do not work at low level
        # fix only prepares for restarting, and sets to ready
        if self.fix_qcritical:
            nfixed = flow.fix_queue_critical()
            if nfixed: print("Fixed %d QCritical error(s)" % nfixed)

        nfixed = flow.fix_abicritical()
        if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed)

        # update database
        flow.pickle_dump()

        # Submit the tasks that are ready.
        try:
            nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10)
            self.nlaunch += nlaunch
            if nlaunch:
                cprint("[%s] Number of launches: %d" % (time.asctime(), nlaunch), "yellow")

        except Exception:
            excs.append(straceback())

        # check status.
        flow.show_status()

        if excs:
            logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs))
            self.exceptions.extend(excs)

    def callback(self):
        """The function that will be executed by the scheduler."""
        try:
            return self._callback()
        except:
            # All exceptions raised here will trigger the shutdown!
            s = straceback()
            self.exceptions.append(s)

            # This is useful when debugging
            #try:
            #    print("Exception in callback, will cancel all tasks")
            #    for task in self.flow.iflat_tasks():
            #        task.cancel()
            #except Exception:
            #    pass

            self.shutdown(msg="Exception raised in callback!\n" + s)

    def _callback(self):
        """The actual callback."""
        if self.debug:
            # Show the number of open file descriptors
            print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds())

        self._runem_all()

        # Mission accomplished. Shutdown the scheduler.
        all_ok = self.flow.all_ok
        if all_ok:
            return self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit")

        # Handle failures.
        err_lines = []

        # Shall we send a reminder to the user?
        delta_etime = self.get_delta_etime()

        if delta_etime.total_seconds() > self.num_reminders * self.remindme_s:
            self.num_reminders += 1
            msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " %
                  (self.pid, self.flow, delta_etime))
            retcode = self.send_email(msg, tag="[REMINDER]")

            if retcode:
                # Cannot send mail, shutdown now!
                msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" +
                        " but send_email returned %d. Aborting now" % retcode)
                err_lines.append(msg)

        #if delta_etime.total_seconds() > self.max_etime_s:
        #    err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s)

        # Too many exceptions. Shutdown the scheduler.
        if self.num_excs > self.max_num_pyexcs:
            msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % (
                self.num_excs, self.max_num_pyexcs)
            err_lines.append(boxed(msg))

        # Paranoid check: disable the scheduler if we have submitted
        # too many jobs (it might be due to some bug or other external reasons
        # such as race conditions between difference callbacks!)
        if self.nlaunch > self.safety_ratio * self.flow.num_tasks:
            msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % (
                self.nlaunch, self.flow.num_tasks)
            err_lines.append(boxed(msg))

        # Count the number of tasks with status == S_ERROR.
        if self.flow.num_errored_tasks > self.max_num_abierrs:
            msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % (
                self.flow.num_errored_tasks, self.max_num_abierrs)
            err_lines.append(boxed(msg))

        # Test on the presence of deadlocks.
        g = self.flow.find_deadlocks()
        if g.deadlocked:
            # Check the flow again so that status are updated.
            self.flow.check_status()

            g = self.flow.find_deadlocks()
            print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running)
            if g.deadlocked and not g.runnables and not g.running:
                err_lines.append("No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked))

        if not g.runnables and not g.running:
            # Check the flow again so that status are updated.
            self.flow.check_status()
            g = self.flow.find_deadlocks()
            if not g.runnables and not g.running:
                err_lines.append("No task is running and cannot find other tasks to submit.")

        # Something wrong. Quit
        if err_lines:
            # Cancel all jobs.
            if self.killjobs_if_errors:
                cprint("killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow")
                try:
                    num_cancelled = 0
                    for task in self.flow.iflat_tasks():
                        num_cancelled += task.cancel()
                    cprint("Killed %d tasks" % num_cancelled, "yellow")
                except Exception as exc:
                    cprint("Exception while trying to kill jobs:\n%s" % str(exc), "red")

            self.shutdown("\n".join(err_lines))

        return len(self.exceptions)

    def cleanup(self):
        """Cleanup routine: remove the pid file and save the pickle database"""
        try:
            os.remove(self.pid_file)
        except OSError as exc:
            logger.critical("Could not remove pid_file: %s", exc)

        # Save the final status of the flow.
        self.flow.pickle_dump()

    def shutdown(self, msg):
        """Shutdown the scheduler."""
        try:
            self.cleanup()

            self.history.append("Completed on: %s" % time.asctime())
            self.history.append("Elapsed time: %s" % self.get_delta_etime())

            if self.debug:
                print(">>>>> shutdown: Number of open file descriptors: %s" % get_open_fds())

            retcode = self.send_email(msg)
            if self.debug:
                print("send_mail retcode", retcode)

            # Write file with the list of exceptions:
            if self.exceptions:
                dump_file = os.path.join(self.flow.workdir, "_exceptions")
                with open(dump_file, "wt") as fh:
                    fh.writelines(self.exceptions)
                    fh.write("Shutdown message:\n%s" % msg)

            lines = []
            app = lines.append
            app("Submitted on: %s" % time.ctime(self.start_time))
            app("Completed on: %s" % time.asctime())
            app("Elapsed time: %s" % str(self.get_delta_etime()))

            if self.flow.all_ok:
                app("Flow completed successfully")
            else:
                app("Flow %s didn't complete successfully" % repr(self.flow.workdir))
                app("use `abirun.py FLOWDIR debug` to analyze the problem.")
                app("Shutdown message:\n%s" % msg)

            print("")
            print("\n".join(lines))
            print("")

            self._do_customer_service()

            if self.flow.all_ok:
                print("Calling flow.finalize()...")
                self.flow.finalize()
                #print("finalized:", self.flow.finalized)
                if self.rmflow:
                    app("Flow directory will be removed...")
                    try:
                        self.flow.rmtree()
                    except Exception:
                        logger.warning("Ignoring exception while trying to remove flow dir.")

        finally:
            # Shutdown the scheduler thus allowing the process to exit.
            logger.debug('This should be the shutdown of the scheduler')

            # Unschedule all the jobs before calling shutdown
            #self.sched.print_jobs()
            if not has_sched_v3:
                for job in self.sched.get_jobs():
                    self.sched.unschedule_job(job)
            #self.sched.print_jobs()

            self.sched.shutdown()
            # Uncomment the line below if shutdown does not work!
            #os.system("kill -9 %d" % os.getpid())

    def send_email(self, msg, tag=None):
        """
        Send an e-mail before completing the shutdown.
        Returns 0 if success.
        """
        try:
            return self._send_email(msg, tag)
        except:
            self.exceptions.append(straceback())
            return -2

    def _send_email(self, msg, tag):
        if self.mailto is None:
            return -1

        header = msg.splitlines()
        app = header.append

        app("Submitted on: %s" % time.ctime(self.start_time))
        app("Completed on: %s" % time.asctime())
        app("Elapsed time: %s" % str(self.get_delta_etime()))
        app("Number of errored tasks: %d" % self.flow.num_errored_tasks)
        app("Number of unconverged tasks: %d" % self.flow.num_unconverged_tasks)

        strio = cStringIO()
        strio.writelines("\n".join(header) + 4 * "\n")

        # Add the status of the flow.
        self.flow.show_status(stream=strio)

        if self.exceptions:
            # Report the list of exceptions.
            strio.writelines(self.exceptions)

        if tag is None:
            tag = " [ALL OK]" if self.flow.all_ok else " [WARNING]"

        return sendmail(subject=self.flow.name + tag, text=strio.getvalue(), mailto=self.mailto)
Exemple #52
0
class AlertSchedulerHandler():
    TYPE_PORT = 'PORT'
    TYPE_METRIC = 'METRIC'
    TYPE_AMS = 'AMS'
    TYPE_SCRIPT = 'SCRIPT'
    TYPE_WEB = 'WEB'
    TYPE_RECOVERY = 'RECOVERY'

    def __init__(self, initializer_module, in_minutes=True):
        self.initializer_module = initializer_module
        self.cachedir = initializer_module.config.alerts_cachedir
        self.stacks_dir = initializer_module.config.stacks_dir
        self.common_services_dir = initializer_module.config.common_services_dir
        self.extensions_dir = initializer_module.config.extensions_dir
        self.host_scripts_dir = initializer_module.config.host_scripts_dir
        self.configuration_builder = initializer_module.configuration_builder

        self._cluster_configuration = initializer_module.configurations_cache
        self.alert_definitions_cache = initializer_module.alert_definitions_cache

        self.config = initializer_module.config

        # the amount of time, in seconds, that an alert can run after it's scheduled time
        alert_grace_period = int(
            self.config.get('agent', 'alert_grace_period', 5))

        apscheduler_standalone = False

        self.APS_CONFIG = {
            'apscheduler.threadpool.core_threads':
            3,
            'apscheduler.coalesce':
            True,
            'apscheduler.standalone':
            apscheduler_standalone,
            'apscheduler.misfire_grace_time':
            alert_grace_period,
            'apscheduler.threadpool.context_injector':
            self._job_context_injector if not apscheduler_standalone else None,
            'apscheduler.threadpool.agent_config':
            self.config
        }

        self._collector = AlertCollector()
        self.__scheduler = Scheduler(self.APS_CONFIG)
        self.__in_minutes = in_minutes
        self.recovery_manger = initializer_module.recovery_manager

        # register python exit handler
        ExitHelper().register(self.exit_handler)

    def _job_context_injector(self, config):
        """
    apscheduler hack to inject monkey-patching, context and configuration to all jobs inside scheduler in case if scheduler running
    in embedded mode

    Please note, this function called in job context thus all injects should be time-running optimized

    :type config AmbariConfig.AmbariConfig
    """
        if not config.use_system_proxy_setting():
            from ambari_commons.network import reconfigure_urllib2_opener
            reconfigure_urllib2_opener(ignore_system_proxy=True)

    def exit_handler(self):
        """
    Exit handler
    """
        self.stop()

    def update_definitions(self, event_type):
        """
    Updates the persisted alert definitions JSON.
    :return:
    """
        # prune out things we don't want to store
        alert_definitions = []
        for cluster_id, command in self.alert_definitions_cache.iteritems():
            command_copy = Utils.get_mutable_copy(command)
            alert_definitions.append(command_copy)

        if event_type == "CREATE":
            # reschedule all jobs, creating new instances
            self.reschedule_all()
        else:
            # reschedule only the jobs that have changed
            self.reschedule()

    def __make_function(self, alert_def):
        return lambda: alert_def.collect()

    def start(self):
        """ loads definitions from file and starts the scheduler """

        if self.__scheduler is None:
            return

        if self.__scheduler.running:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(self.APS_CONFIG)

        alert_callables = self.__load_definitions()

        # schedule each definition
        for _callable in alert_callables:
            self.schedule_definition(_callable)

        logger.info(
            "[AlertScheduler] Starting {0}; currently running: {1}".format(
                str(self.__scheduler), str(self.__scheduler.running)))

        self.__scheduler.start()

    def stop(self):
        if not self.__scheduler is None:
            self.__scheduler.shutdown(wait=False)
            self.__scheduler = Scheduler(self.APS_CONFIG)

        logger.info("[AlertScheduler] Stopped the alert scheduler.")

    def reschedule(self):
        """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
        jobs_scheduled = 0
        jobs_removed = 0

        definitions = self.__load_definitions()
        scheduled_jobs = self.__scheduler.get_jobs()

        self.initializer_module.alert_status_reporter.reported_alerts.clear()

        # for every scheduled job, see if its UUID is still valid
        for scheduled_job in scheduled_jobs:
            uuid_valid = False

            for definition in definitions:
                definition_uuid = definition.get_uuid()
                if scheduled_job.name == definition_uuid:
                    uuid_valid = True
                    break

            # jobs without valid UUIDs should be unscheduled
            if uuid_valid is False:
                jobs_removed += 1
                logger.info("[AlertScheduler] Unscheduling {0}".format(
                    scheduled_job.name))
                self._collector.remove_by_uuid(scheduled_job.name)
                self.__scheduler.unschedule_job(scheduled_job)

        # for every definition, determine if there is a scheduled job
        for definition in definitions:
            definition_scheduled = False
            for scheduled_job in scheduled_jobs:
                definition_uuid = definition.get_uuid()
                if definition_uuid == scheduled_job.name:
                    definition_scheduled = True
                    break

            # if no jobs are found with the definitions UUID, schedule it
            if definition_scheduled is False:
                jobs_scheduled += 1
                self.schedule_definition(definition)

        logger.info(
            "[AlertScheduler] Reschedule Summary: {0} rescheduled, {1} unscheduled"
            .format(str(jobs_scheduled), str(jobs_removed)))

    def reschedule_all(self):
        """
    Removes jobs that are scheduled where their UUID no longer is valid.
    Schedules jobs where the definition UUID is not currently scheduled.
    """
        logger.info("[AlertScheduler] Rescheduling all jobs...")

        jobs_scheduled = 0
        jobs_removed = 0

        definitions = self.__load_definitions()
        scheduled_jobs = self.__scheduler.get_jobs()

        # unschedule all scheduled jobs
        for scheduled_job in scheduled_jobs:
            jobs_removed += 1
            logger.info("[AlertScheduler] Unscheduling {0}".format(
                scheduled_job.name))
            self._collector.remove_by_uuid(scheduled_job.name)
            self.__scheduler.unschedule_job(scheduled_job)

        # for every definition, schedule a job
        for definition in definitions:
            jobs_scheduled += 1
            self.schedule_definition(definition)

        logger.info(
            "[AlertScheduler] Reschedule Summary: {0} unscheduled, {0} rescheduled"
            .format(str(jobs_removed), str(jobs_scheduled)))

    def collector(self):
        """ gets the collector for reporting to the server """
        return self._collector

    def __load_definitions(self):
        """
    Loads all alert definitions from a file. All clusters are stored in
    a single file. This wil also populate the cluster-to-hash dictionary.
    :return:
    """
        definitions = []
        for cluster_id, command_json in self.alert_definitions_cache.iteritems(
        ):
            clusterName = '' if not 'clusterName' in command_json else command_json[
                'clusterName']
            hostName = '' if not 'hostName' in command_json else command_json[
                'hostName']
            publicHostName = '' if not 'publicHostName' in command_json else command_json[
                'publicHostName']
            clusterHash = None if not 'hash' in command_json else command_json[
                'hash']

            # cache the cluster and cluster hash after loading the JSON
            if clusterName != '' and clusterHash is not None:
                logger.info(
                    '[AlertScheduler] Caching cluster {0} with alert hash {1}'.
                    format(clusterName, clusterHash))

            for definition in command_json['alertDefinitions']:
                alert = self.__json_to_callable(
                    clusterName, hostName, publicHostName,
                    Utils.get_mutable_copy(definition))

                if alert is None:
                    continue

                alert.set_helpers(self._collector, self._cluster_configuration,
                                  self.configuration_builder)

                definitions.append(alert)

        return definitions

    def __json_to_callable(self, clusterName, hostName, publicHostName,
                           json_definition):
        """
    converts the json that represents all aspects of a definition
    and makes an object that extends BaseAlert that is used for individual
    """
        alert = None

        try:
            source = json_definition['source']
            source_type = source.get('type', '')

            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(
                    "[AlertScheduler] Creating job type {0} with {1}".format(
                        source_type, str(json_definition)))

            if source_type == AlertSchedulerHandler.TYPE_METRIC:
                alert = MetricAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_AMS:
                alert = AmsAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_PORT:
                alert = PortAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
                source['stacks_directory'] = self.stacks_dir
                source['common_services_directory'] = self.common_services_dir
                source['extensions_directory'] = self.extensions_dir
                source['host_scripts_directory'] = self.host_scripts_dir
                alert = ScriptAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_WEB:
                alert = WebAlert(json_definition, source, self.config)
            elif source_type == AlertSchedulerHandler.TYPE_RECOVERY:
                alert = RecoveryAlert(json_definition, source, self.config,
                                      self.recovery_manger)

            if alert is not None:
                alert.set_cluster(clusterName, json_definition['clusterId'],
                                  hostName, publicHostName)

        except Exception, exception:
            logger.exception(
                "[AlertScheduler] Unable to load an invalid alert definition. It will be skipped."
            )

        return alert
Exemple #53
0
class bakCron(object):
    def __init__(self):
        self.sched = Scheduler()
        self.sched.daemonic = False
        self.sched.start()

        self.assign_jobs()
        self.assign_monitor()

    def get_fileconfig(self):
        ''' 
		获取配置文件的路径,此路径在软件安装时指定目录。
		'''
        policyfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Policy.conf"
        serverfile = os.path.dirname(nsccdbbak.__file__) + "/conf/Server.conf"

        policys = []
        PolicyConfig = ConfigParser.ConfigParser(allow_no_value=True)
        PolicyConfig.read(policyfile)

        for section in PolicyConfig.sections():
            dictTmp = {}
            colon = section.find(':')
            key, value = section[:colon], section[colon + 1:]
            dictTmp[key] = value
            for key, value in PolicyConfig.items(section):
                if 'pass' in key:
                    dictTmp[key] = base64.decodestring(value)
                else:
                    dictTmp[key] = value

            policys.append(dictTmp.copy())
            dictTmp.clear()

        servers = []
        ServerConfig = ConfigParser.ConfigParser(allow_no_value=True)
        ServerConfig.read(serverfile)

        for section in ServerConfig.sections():
            dictTmp = {}
            colon = section.find(':')
            key, value = section[:colon], section[colon + 1:]
            dictTmp[key] = value
            for key, value in ServerConfig.items(section):
                if 'pass' in key:
                    dictTmp[key] = base64.decodestring(value)
                else:
                    dictTmp[key] = value

            servers.append(dictTmp.copy())
            dictTmp.clear()

        return policys, servers

    def assign_jobs(self):
        '''
		读取配置文件,获得针对不同数据库的备份策略,设定备份线程。
		'''
        (policys, servers) = self.get_fileconfig()

        for dictTmp in policys:
            if dictTmp['flag'] == '1':
                for dict in servers:
                    if dict['server'] == dictTmp['server']:
                        serverInfo = dict
                for key in dictTmp.keys():
                    if dictTmp[key] == '':
                        dictTmp[key] = None
                glob_bak_name = 'glob_bak_' + dictTmp['server']
                print[serverInfo, dictTmp['bakcon']]
                self.sched.add_cron_job(self.glob_bak,
                                        args=[serverInfo, dictTmp['bakcon']],
                                        month=dictTmp['globmonth'],
                                        day=dictTmp['globday'],
                                        day_of_week=dictTmp['globweekday'],
                                        hour=dictTmp['globhour'],
                                        minute=dictTmp['globminute'],
                                        second='*/3',
                                        name=glob_bak_name)
                incr_bak_name = 'incr_bak_' + dictTmp['server']
                self.sched.add_cron_job(self.incr_bak,
                                        month=dictTmp['incmonth'],
                                        day=dictTmp['incday'],
                                        day_of_week=dictTmp['incweekday'],
                                        hour=dictTmp['inchour'],
                                        minute=dictTmp['incminute'],
                                        name=incr_bak_name)
        print self.sched.get_jobs()
        print 'assign jobs finished!'

    def assign_monitor(self):
        '''
		设定文件监控线程。
		'''
        self.sched.add_interval_job(self.monitorfile, name='monitorDaemon')
        print self.sched.get_jobs()
        print 'assign monitor finished'

    def filechange(self, monitor, file1, file2, evt_type):
        '''
		备份策略文件发生变化时,撤销计划列表中除文件监控以外的所有计划,然后重新设定备份线程。
		'''
        if evt_type == gio.FILE_MONITOR_EVENT_CHANGED:
            print 'file changed'
            for job in self.sched.get_jobs():
                print job
                if job.name != 'monitorDaemon':
                    self.sched.unschedule_job(job)

            self.assign_jobs()

    def monitorfile(self):
        '''
		启动文件监控线程,并设定多线程运行环境。
		'''
        gfile = gio.File(self.filepath)
        monitor = gfile.monitor_file(gio.FILE_MONITOR_NONE, None)
        monitor.connect("changed", self.filechange)
        gobject.threads_init()
        gml = gobject.MainLoop()
        gml.run()

    def glob_bak(self, serConf, bakcontainer):
        '''
		负责执行一次全局备份,将备份文件上传至云存储。
		'''
        timestr = time.strftime(r"%Y-%m-%d_%H-%M-%S", time.localtime())
        print timestr
        conndb = ConnDatabase(serConf)
        connStor = ConnStorage(serConf)
        (result, bakfilepath) = conndb.conn.glob_bak()
        if result:
            connStor.upload_file(bakcontainer, bakfilepath)
        else:
            print 'global backup error!'

    def incr_bak(self, serConf, bakcontainer):
        '''
		负责执行一次增量备份,将备份文件上传至云存储。
		'''
        conndb = ConnDatabase(serConf)
        connStor = ConnStorage(serConf)
        (result, bakfilepath) = conndb.conn.incr_bak()
        if result:
            connStor.upload_file(bakcontainer, bakfilepath)
        else:
            print 'increase backup error!'
Exemple #54
0
class Sched(Basemodule):
    # ################################################################################
    # initialization of module and optional load of config files
    # ################################################################################
    def __init__(self, instance_queue, global_queue):
        #
        # "sched|port|command or action"
        #
        self.logger = logging.getLogger('Hasip.sched')
        self.sched = Scheduler()
        self.items = ConfigItemReader()
        self.jobs_config = ConfigJobReader()
        self.mod_list = self.items.get_items_dict()  # getting module list from item file
        self.queue_identifier = 'sched'  # this is the 'module address'
        self.instance_queue = instance_queue  # worker queue to receive jobs
        self.global_queue = global_queue  # queue to communicate back to main thread
        self.jobstore = {}
        self.sched.start()

        # read jobs configuration
        self.jobstore = self.jobs_config.get_jobs_dict()
        for job in self.jobstore.keys():
            self.logger.debug(self.jobstore[job][2])
            self.sched.add_cron_job(self.send_msg,
                                    name=job,
                                    year=self.jobstore[job][2],
                                    month=self.jobstore[job][3],
                                    day=self.jobstore[job][4],
                                    week=self.jobstore[job][5],
                                    day_of_week=self.jobstore[job][6],
                                    hour=self.jobstore[job][7],
                                    minute=self.jobstore[job][8],
                                    second=self.jobstore[job][9],
                                    args=(self.jobstore[job][0], self.jobstore[job][1]))

    # ################################################################################
    # main thread of this module file which runs in background and constantly checks working queue for new tasks.
    # ################################################################################
    def worker(self):
        while True:
            instance_queue_element = self.instance_queue.get(True)
            _senderport = instance_queue_element.get("module_from_port")
            _sender = instance_queue_element.get("module_from")
            _port = instance_queue_element.get("module_addr")
            _action = instance_queue_element.get("cmd")
            _optargs = instance_queue_element.get("opt_args")
            options = {
                "create": self.create,
                "delete": self.delete,
                "list_jobs": self.list_jobs
            }
            options[_action](_sender, _senderport, _port, _optargs)

    # ################################################################################
    #
    # "private" methods from here on...
    #
    # ################################################################################
    #Function to put jobs in the running scheduler job queue and store them persistent

    def create(self, sender, senderport, port, optargs):
        if optargs.get('name') in self.jobstore:
            self.delete(sender, senderport, port, optargs.get('name'))
        self.sched.add_cron_job(self.send_msg,
                                name=optargs.get('name'),
                                year=optargs.get('year'),
                                month=optargs.get('month'),
                                day=optargs.get('day'),
                                week=optargs.get('week'),
                                day_of_week=optargs.get('day_of_week'),
                                hour=optargs.get('hour'),
                                minute=optargs.get('minute'),
                                second=optargs.get('second'),
                                args=(optargs.get('module'), optargs.get('action')))
        self.jobstore.update(
            {
                optargs.get('name'): [
                    optargs.get('module'),
                    optargs.get('action'),
                    optargs.get('year'),
                    optargs.get('month'),
                    optargs.get('day'),
                    optargs.get('week'),
                    optargs.get('day_of_week'),
                    optargs.get('hour'),
                    optargs.get('minute'),
                    optargs.get('second')
                ]
            }
        )
        self.write_to_config()

    def delete(self, sender, senderport, port, optargs):
        for job in self.sched.get_jobs():
            if job.name == optargs:
                self.sched.unschedule_job(job)
                del self.jobstore[job.name]
                self.write_to_config()

    def list_jobs(self, sender, senderport, port, optargs):
        dict1 = {}
        string = []
        for job in self.jobstore.keys():
            dict1['jobname'] = job
            dict1['device'] = self.jobstore[job][0]
            dict1['action'] = self.jobstore[job][1]
            dict1['year'] = self.jobstore[job][2]
            dict1['month'] = self.jobstore[job][3]
            dict1['day'] = self.jobstore[job][4]
            dict1['week'] = self.jobstore[job][5]
            dict1['day_of_week'] = self.jobstore[job][6]
            dict1['hour'] = self.jobstore[job][7]
            dict1['min'] = self.jobstore[job][8]
            dict1['sec'] = self.jobstore[job][9]
            string.append(dict1.copy())
            dict1.clear()
        self.logger.debug("help")
        queue_msg = {
            'module_from_port': str(port),
            'module_from': self.queue_identifier,
            'module_rcpt': sender,
            'module_addr': senderport,
            'cmd': 'reply',
            'opt_args': string
        }
        self.global_queue.put(queue_msg)

    def send_msg(self, module, action):  # ########################################
        if module in self.mod_list.keys():  # checking existence of requested module
            rcpt = self.mod_list[module][0]  # setting receiving module from item file
            mid = self.mod_list[module][1]  # setting module id from item file
            msg = {  # creating queue message
                     'module_from_port': 0,  # ########################################
                     'module_from': 'sched',
                     'module_rcpt': rcpt,
                     'module_addr': mid,
                     'cmd': action,
                     'opt_args': ''
            }
            self.global_queue.put(msg)
    
    def write_to_config(self):
        with open('/home/hasip/hasip/config/jobs/example.jobs', 'w') as f:
            conf = ConfigParser()
            for job in self.jobstore.keys():
                try:
                    conf.add_section(job)
                except Exception:
                    pass
                conf.set(job.upper(), 'module', self.jobstore[job][0])
                conf.set(job.upper(), 'action', self.jobstore[job][1])
                conf.set(job.upper(), 'year', self.jobstore[job][2])
                conf.set(job.upper(), 'month', self.jobstore[job][3])
                conf.set(job.upper(), 'week', self.jobstore[job][4])
                conf.set(job.upper(), 'day', self.jobstore[job][5])
                conf.set(job.upper(), 'day_of_week', self.jobstore[job][6])
                conf.set(job.upper(), 'hour', self.jobstore[job][7])
                conf.set(job.upper(), 'minute', self.jobstore[job][8])
                conf.set(job.upper(), 'second', self.jobstore[job][9])
            conf.write(f)