Beispiel #1
0
    def redispatch(self, name, sid, srl, service, reason, now):
        entry = None
        try:
            entry = self.entries[sid][srl]
        except KeyError:
            return False

        try:
            stage = self.service_manager.stage_by_name(service.name)
            d = getattr(entry, name)[stage]
            c = entry.completed_services[stage]
            if service.name in c or d and service.name in d:
                return False
            log.info("%s for %s: %s/%s", reason, service.name, sid, srl)
            self.dispatch(service, entry, now)
            return True
        except Exception as ex:  #pylint: disable=W0703
            trace = get_stacktrace_info(ex)
            log.error("Couldn't redispatch to %s for %s/%s: %s", service.name,
                      sid, srl, trace)
            response = Task(deepcopy(entry.task.raw))
            response.watermark(service.name, '')
            response.nonrecoverable_failure(trace)
            self.storage_queue.push({
                'type': 'error',
                'name': service.name,
                'response': response,
            })
            return False
Beispiel #2
0
    def dispatch(self, service, entry, now):
        task = entry.task
        sid = task.sid
        srl = task.srl
        name = service.name

        queue_size = self.queue_size[name] = self.queue_size.get(name, 0) + 1
        entry.retries[name] = entry.retries.get(name, -1) + 1

        if task.profile:
            if entry.retries[name]:
                log.info('%s Graph: "%s" -> "%s/%s" [label=%d];', sid, srl,
                         srl, name, entry.retries[name])
            else:
                log.info('%s Graph: "%s" -> "%s/%s";', sid, srl, srl, name)
                log.info('%s Graph: "%s/%s" [label=%s];', sid, srl, name, name)

        file_count = len(self.entries[sid]) + len(self.completed[sid])

        # Warning: Please do not change the text of the error messages below.
        msg = None
        if self._service_is_down(service, now):
            msg = 'Service down.'
        elif entry.retries[name] > config.core.dispatcher.max.retries:
            msg = 'Max retries exceeded.'
        elif entry.retries[name] >= 1:
            log.debug("Retry sending %s/%s to %s", sid, srl, name)
        elif task.depth > config.core.dispatcher.max.depth:
            msg = 'Max depth exceeded.'
        elif file_count > config.core.dispatcher.max.files:
            msg = 'Max files exceeded.'

        if msg:
            log.debug(' '.join((msg, "Not sending %s/%s to %s." % \
                         (sid, srl, name))))
            response = Task(deepcopy(task.raw))
            response.watermark(name, '')
            response.nonrecoverable_failure(msg)
            self.storage_queue.push({
                'type': 'error',
                'name': name,
                'response': response,
            })
            return False

        if service.skip(task):
            response = Task(deepcopy(task.raw))
            response.watermark(name, '')
            response.success()
            q.send_raw(response.as_dispatcher_response())
            return False

        # Setup an ack timeout.
        seconds = min(service.timeout * (queue_size + 5), 7200)

        task.ack_timeout = seconds
        task.sent = now

        service.proxy.execute(task.priority, task.as_service_request(name))

        # Add the timeout to the end of its respective list.
        ack_timeout = self.ack_timeout
        lst = ack_timeout.get(seconds, [])
        lst.append(Timeout(sid, srl, name, now + seconds))
        ack_timeout[seconds] = lst

        return True
Beispiel #3
0
logger.info("Monitoring the following service queues: %s", threshold)

while True:
    queue_lengths = get_service_queue_lengths()

    over = {
        k: v for k, v in queue_lengths.iteritems() if v > (threshold.get(k, 0) or v)
    }

    for name, size in over.iteritems():
        excess = size - threshold.get(name, size)
        if excess <= 0:
            continue

        for msg in get_queue(name).unpush(excess):
            # noinspection PyBroadException
            try:
                t = Task(msg)

                t.watermark(name, '')
                t.nonrecoverable_failure('Service busy.')
                t.cache_key = store.save_error(name, None, None, t)

                dispatch_queue.send_raw(t.as_dispatcher_response())
                logger.info("%s is too busy to process %s.", name, t.srl)
            except:  # pylint:disable=W0702
                logger.exception('Problem sending response:')

    time.sleep(config.system.update_interval)