예제 #1
0
class Validator(object):
    """
    Validates the state of all job status records and re-queues any jobs that ran into processing problems.
    """

    def __init__(self, logger, redisHost, redisPort):
        """
        Class constructor.

        :param logger logger: the logger
        :param str redis_host: Redis host where the Redis Q is running
        :param int redis_port: Redis port where the Redis Q is running
        """
        self.logger = logger
        self.config = Config()
        self.redis_host = redisHost
        self.redis_port = redisPort
        self.results = Results(logger, redisHost, redisPort)
        self.workloadTracker = WorkloadTracker(self.logger)

    def check_failed_queue(self, redis_conn):
        """
        Requeue all jobs in the Failed job queue
        """
        with Connection(redis_conn):
            failed_queue = get_failed_queue()

            # TODO: Need to keep track of number of attempts a job is requeued
            for job in failed_queue.get_jobs():
                self.logger.info("Requeued: " + str(job.id))
                failed_queue.requeue(job.id)
    
    def requeue_job(self, job_id):
        """
        Requeues a job for processing

        :param str job_id: Id of the job that needs to be re-processed
        """
        # TODO: Requeue job

    def validate_job_health(self, job_key, redis_conn):
        """
        Validates the health of a job based on the job state and life timespan

        :param str job_key: Key to retrieve job status
        :param object redis_conn: Redis connection object
        """
        # get the job from redis
        jobStatusSerialized = redis_conn.get(job_key)

        if(jobStatusSerialized != None):
            jobStatus = pickle.loads(jobStatusSerialized)
            # check to see if processing started on the job,
            # if the job is still in queued state do nothing and wait for a worker to pick it up to process
            if(jobStatus.job_state == JobState.processing or jobStatus.job_state == JobState.processed):
                # get the lifespan of the job
                lifespan = datetime.utcnow() - jobStatus.created

                # if the lifespan is greater than the config threshold, requeue it
                if(lifespan.seconds > self.config.job_processing_max_time_sec):
                    # requeue job for processing
                    self.requeue_job(jobStatus.job_id)

    def get_active_jobs(self, redis_conn):
        """
        Gets all active jobs from job status collection.

        :param object redis_conn: Redis connection object
        """
        # get all job status keys from redis, if the key exists it is an active job
        keys = redis_conn.keys(self.config.job_status_key_prefix + '*')
        return keys

    def run(self):
        """
        Execute the validator - get all jobs in process and validate their state
        
        :return float perc: returns the percentage of jobs consolidated
        """
        self.logger.info('Validator using redis host: %s:%s', self.redis_host, self.redis_port)

        pool = redis.ConnectionPool(host=self.redis_host, port=self.redis_port)
        redis_conn = redis.Redis(connection_pool=pool)

        self.logger.info('Starting validator')

        # Wait until redis connection can be established
        while(True):
            try:
                redis_conn.ping()
                self.logger.info("Validator redis connection successful.")
                break
            except redis.exceptions.ConnectionError:
                self.logger.info("Validator - redis isn't running, sleep for 5 seconds.")
                time.sleep(5)

        with Connection(redis_conn):
            activejobs = self.get_active_jobs(redis_conn)
            for jobKey in activejobs:
                # validate job processing health using the job status collection
                self.validate_job_health(jobKey, redis_conn)
            
            # record the number of processed jobs
            total_scheduled_jobs = int(redis_conn.get(self.config.scheduled_jobs_count_redis_key))
            remaining_jobs = total_scheduled_jobs - len(activejobs)
            perc = float(remaining_jobs) / total_scheduled_jobs
            status_msg = "Jobs Successfully Proccessed (%): {0:.2f} ... {1}/{2}".format(perc, remaining_jobs, total_scheduled_jobs)
            self.workloadTracker.write(WorkloadEventType.WORKLOAD_PROCESSING_STATUS, status_msg)
            self.logger.info(status_msg)
        
        # requeue jobs in the Failed job queue
        self.check_failed_queue(redis_conn)

        # consolidate any completed results
        self.results.consolidate_results()
        perc = self.results.get_total_jobs_consolidated_status()

        # record the number of consolidated results
        status_msg = "Jobs Consolidated (%): {0:.2f}".format(perc)
        self.workloadTracker.write(WorkloadEventType.WORKLOAD_CONSOLIDATION_STATUS, status_msg)
        self.logger.info(status_msg)

        return perc
예제 #2
0
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s test.py ' +
                                  socket.gethostname() +
                                  ' %(levelname)-5s %(message)s')
    ch.setFormatter(formatter)
    logger.addHandler(ch)


if __name__ == "__main__":
    initLogging()

    results = Results(logger, 'localhost', 6379)

    resultsCount = results.count_consolidated_results()

    print("Results count: " + str(resultsCount))

    for x in range(100):
        results.write_result(str(x))
        print("Created results blob #" + str(x))

    resultsCount = results.count_consolidated_results()

    print("Results count: " + str(resultsCount))

    # sleep for 5 seconds to allow blob consistency to catch up
    time.sleep(5)

    consolidatedResults = results.consolidate_results()

    print("Consolidated results: " + str(consolidatedResults))