Example #1
0
    def run(self):
        signal.signal(signal.SIGTERM, graceful_exit)
        signal.signal(signal.SIGINT, graceful_exit)
        self._logger.info('Starting...')

        counter = 0

        while True:
            # process one element from work queue
            task_id = task.work_unqueue(self._redis, self._service)
            if task_id is not None:
                try:
                    self._advance_task(task_id)
                except RuntimeWarning:
                    self._logger.warning(
                        '%s: failed to acquire a lock, retrying', task_id)
                    task.work_queue(self._redis, task_id, self._service)
                except Exception as e:
                    self._logger.error('%s: %s', task_id, str(e))
                    with self._redis.acquire_lock(task_id):
                        task.set_log(self._redis, self._taskfile_dir, task_id,
                                     str(e))
                        task.terminate(self._redis,
                                       task_id,
                                       phase="launch_error")
                    self._logger.info(traceback.format_exc())
            # every 0.01s * refresh_counter - check if we can find some free resource
            if counter > self._refresh_counter:
                # if there are some queued tasks, look for free resources
                if self._redis.exists('queued:%s' % self._service):
                    self._logger.debug('checking processes on : %s',
                                       self._service)
                    self._select_best_task_to_process(
                        self._services[self._service])
                counter = 0

            counter += 1
            time.sleep(self._work_cycle)
Example #2
0
    def run(self):
        self._logger.info('Starting worker')

        # Subscribe to beat expiration.
        pubsub = self._redis.pubsub()
        pubsub.psubscribe('__keyspace@0__:beat:*')
        pubsub.psubscribe('__keyspace@0__:queue:*')
        counter = 0

        while True:
            message = pubsub.get_message()
            if message:
                channel = message['channel']
                data = message['data']
                if data == 'expired':
                    self._logger.warning('received expired event on channel %s', channel)
                    if channel.startswith('__keyspace@0__:beat:'):
                        task_id = channel[20:]
                        service = self._redis.hget('task:'+task_id, 'service')
                        if service in self._services:
                            self._logger.info('%s: task expired', task_id)
                            with self._redis.acquire_lock(task_id):
                                task.terminate(self._redis, task_id, phase='expired')
                    elif channel.startswith('__keyspace@0__:queue:'):
                        task_id = channel[21:]
                        service = self._redis.hget('task:'+task_id, 'service')
                        if service in self._services:
                            task.work_queue(self._redis, task_id, service)
            else:
                for service in self._services:
                    task_id = task.work_unqueue(self._redis, service)
                    if task_id is not None:
                        try:
                            self._advance_task(task_id)
                        except RuntimeWarning:
                            self._logger.warning(
                                '%s: failed to acquire a lock, retrying', task_id)
                            task.work_queue(self._redis, task_id, service)
                        except Exception as e:
                            self._logger.error('%s: %s', task_id, str(e))
                            with self._redis.acquire_lock(task_id):
                                task.set_log(self._redis, task_id, str(e))
                                task.terminate(self._redis, task_id, phase="launch_error")
                    else:
                        if counter > self._refresh_counter:
                            resources = self._services[service].list_resources()
                            for resource in resources:                                    
                                keyr = 'resource:%s:%s' % (service, resource)
                                key_busy = 'busy:%s:%s' % (service, resource)
                                key_reserved = 'reserved:%s:%s' % (service, resource)
                                if not self._redis.exists(key_busy) and self._redis.hlen(keyr) < resources[resource]:
                                    if self._redis.exists(key_reserved) and self._redis.ttl('queue:'+self._redis.get(key_reserved))>10:
                                        self._redis.expire('queue:'+self._redis.get(key_reserved), 5)
                                        break
                            if self._redis.exists('queued:%s' % service):
                                resources = self._services[service].list_resources()
                                self._logger.debug('checking processes on : %s', service)
                                availableResource = False
                                for resource in resources:                                    
                                    keyr = 'resource:%s:%s' % (service, resource)
                                    key_busy = 'busy:%s:%s' % (service, resource)
                                    key_reserved = 'reserved:%s:%s' % (service, resource)
                                    if not self._redis.exists(key_busy) and self._redis.hlen(keyr) < resources[resource]:
                                        if not self._redis.exists(key_reserved):
                                            availableResource = True
                                        break
                                if availableResource:
                                    self._logger.debug('resources available on %s - trying dequeuing', service)
                                    self._service_unqueue(self._services[service])
                if counter > self._refresh_counter:
                    counter = 0
            counter += 1
            time.sleep(0.01)
Example #3
0
def post_log(task_id):
    content = flask.request.get_data()
    content = task.set_log(redis, taskfile_dir, task_id, content, max_log_size)
    (task_id, content) = post_function('POST/task/log', task_id, content)
    return flask.jsonify(200)
Example #4
0
    def run(self):
        self._logger.info('Starting worker')

        # Subscribe to beat expiration.
        pubsub = self._redis.pubsub()
        pubsub.psubscribe('__keyspace@0__:beat:*')
        pubsub.psubscribe('__keyspace@0__:queue:*')
        counter = 0
        counter_beat = 1000

        while True:
            counter_beat += 1
            # every 1000 * 0.01s (10s) - check & reset beat of the worker
            if counter_beat > 1000:
                counter_beat = 0
                if self._redis.exists(self._worker_id):
                    self._redis.hset(self._worker_id, "beat_time", time.time())
                    self._redis.expire(self._worker_id, 1200)
                else:
                    self._logger.info('stopped by key expiration/removal')
                    sys.exit(0)

            # every 100 * 0.01s (1s) - check worker administration command
            if counter_beat % 100 == 0:
                workeradmin.process(self._logger, self._redis, self._service)
                if (self._default_config_timestamp
                        and self._redis.hget('default', 'timestamp') !=
                        self._default_config_timestamp):
                    self._logger.info(
                        'stopped by default configuration change')
                    sys.exit(0)

            # process one message from the queue
            message = pubsub.get_message()
            if message:
                channel = message['channel']
                data = message['data']
                if data == 'expired':
                    # task expired, not beat was received
                    if channel.startswith('__keyspace@0__:beat:'):
                        task_id = channel[20:]
                        service = self._redis.hget('task:' + task_id,
                                                   'service')
                        if service in self._services:
                            self._logger.info('%s: task expired', task_id)
                            with self._redis.acquire_lock(task_id):
                                task.terminate(self._redis,
                                               task_id,
                                               phase='expired')
                    # expired in the queue - comes back in the work queue
                    elif channel.startswith('__keyspace@0__:queue:'):
                        task_id = channel[21:]
                        service = self._redis.hget('task:' + task_id,
                                                   'service')
                        if service in self._services:
                            self._logger.info('%s: move to work queue',
                                              task_id)
                            task.work_queue(self._redis, task_id, service)

            # process one element from work queue
            task_id = task.work_unqueue(self._redis, self._service)
            if task_id is not None:
                try:
                    self._advance_task(task_id)
                except RuntimeWarning:
                    self._logger.warning(
                        '%s: failed to acquire a lock, retrying', task_id)
                    task.work_queue(self._redis, task_id, self._service)
                except Exception as e:
                    self._logger.error('%s: %s', task_id, str(e))
                    with self._redis.acquire_lock(task_id):
                        task.set_log(self._redis, self._taskfile_dir, task_id,
                                     str(e))
                        task.terminate(self._redis,
                                       task_id,
                                       phase="launch_error")

            # every 0.01s * refresh_counter - check if we can find some free resource
            if counter > self._refresh_counter:
                # if there are some queued tasks, look for free resources
                if self._redis.exists('queued:%s' % self._service):
                    self._logger.debug('checking processes on : %s',
                                       self._service)
                    self._service_unqueue(self._services[self._service])
                counter = 0

            counter += 1
            time.sleep(0.01)
Example #5
0
def post_log(task_id):
    content = flask.request.get_data()
    task.set_log(redis, task_id, content)
    return flask.jsonify(200)