Esempio n. 1
0
    def send_raw(self, raw, shards=None):
        if not shards:
            config = forge.get_config()
            shards = config.core.dispatcher.shards

        task = Task(raw)
        self.send(task, shards)
Esempio n. 2
0
    def redispatch(self, name, sid, srl, service, reason, now):
        entry = None
        try:
            entry = self.entries[sid][srl]
        except KeyError:
            return False

        try:
            stage = self.service_manager.stage_by_name(service.name)
            d = getattr(entry, name)[stage]
            c = entry.completed_services[stage]
            if service.name in c or d and service.name in d:
                return False
            log.info("%s for %s: %s/%s", reason, service.name, sid, srl)
            self.dispatch(service, entry, now)
            return True
        except Exception as ex:  #pylint: disable=W0703
            trace = get_stacktrace_info(ex)
            log.error("Couldn't redispatch to %s for %s/%s: %s", service.name,
                      sid, srl, trace)
            response = Task(deepcopy(entry.task.raw))
            response.watermark(service.name, '')
            response.nonrecoverable_failure(trace)
            self.storage_queue.push({
                'type': 'error',
                'name': service.name,
                'response': response,
            })
            return False
Esempio n. 3
0
def ingester():  # df node def # pylint:disable=R0912
    datastore = forge.get_datastore()
    user_groups = {}

    # Move from ingest to unique and waiting queues.
    # While there are entries in the ingest queue we consume chunk_size
    # entries at a time and move unique entries to uniqueq / queued and
    # duplicates to their own queues / waiting.
    while running:
        while True:
            result = completeq.pop(blocking=False)  # df pull pop
            if not result:
                break

            completed(Task(result))  # df push calls

        entry = ingestq.pop(timeout=1)  # df pull pop
        if not entry:
            continue

        trafficq.push(entry)  # df push push

        sha256 = entry.get('sha256', '')
        if not sha256 or len(sha256) != 64:
            logger.error("Invalid sha256: %s", entry)
            continue

        entry['md5'] = entry.get('md5', '').lower()
        entry['sha1'] = entry.get('sha1', '').lower()
        entry['sha256'] = sha256.lower()

        ingest(datastore, user_groups, entry)  # df push calls

    datastore.close()
Esempio n. 4
0
 def _send_control_queue_call(cls, shard, state, **kw):
     name = reply_queue_name(state)
     kw.update({
         'state': state,
         'watch_queue': name,
     })
     t = Task({}, **kw)
     forge.get_control_queue('control-queue-' + str(shard)).push(t.raw)
     nq = NamedQueue(name)
     return nq.pop(timeout=5)
Esempio n. 5
0
    def _do_work(self, raw_task):
        """ Complete an incoming work item.

        Note: This will block while a service is executing the task.
        For some services this could be many seconds or even minutes.
        """
        assert not isinstance(raw_task, list)
        task = Task(raw_task)
        # noinspection PyProtectedMember
        self.service._handle_task(task)
        self.work_count.value += 1
Esempio n. 6
0
    def _drain(self):

        with self._current_work_items_lock:
            if not self._current_work_items:
                self.log.info('EXIT_DRAIN:0')
                return

            result_store = forge.get_datastore()
            dispatch_queue = forge.get_dispatch_queue()
            self.log.info('EXIT_DRAIN:%s', len(self._current_work_items))
            for item in self._current_work_items:
                work = Task(item)
                task = Task({})
                task.sid = work.sid
                task.srl = work.srl
                task.dispatch_queue = work.dispatch_queue
                task.classification = work.classification
                self.log.info("DRAIN: %s/%s", task.sid, task.srl)
                task.watermark(self.service_cls.SERVICE_NAME, None)
                task.recoverable_failure(
                    'Task was pre-empted (shutdown, vm revert or cull)')
                task.cache_key = result_store.save_error(
                    self.service_cls.SERVICE_NAME, None, None, task)
                dispatch_queue.send_raw(task.as_dispatcher_response())
Esempio n. 7
0
    def _check_time_drift(self):
        dispatcher = '0'
        name = reply_queue_name('cli_get_time')
        t = Task({}, **{
            'state': 'get_system_time',
            'watch_queue': name,
        })
        forge.get_control_queue('control-queue-' + dispatcher).push(t.raw)
        nq = NamedQueue(name)
        r = nq.pop(timeout=5)
        if r is None or 'time' not in r:
            self.log.warn('timed out trying to determine dispatchers clock.')
            return

        clock_difference = abs(r['time'] - time.time())
        if clock_difference > 600:
            self.log.info(
                'Dispatchers clock %s away from ours. Clocks are not set correctly',
                clock_difference)
        else:
            self.log.debug('Clock drift from dispatcher: %s.',
                           clock_difference)
Esempio n. 8
0
    def dispatch(self, service, entry, now):
        task = entry.task
        sid = task.sid
        srl = task.srl
        name = service.name

        queue_size = self.queue_size[name] = self.queue_size.get(name, 0) + 1
        entry.retries[name] = entry.retries.get(name, -1) + 1

        if task.profile:
            if entry.retries[name]:
                log.info('%s Graph: "%s" -> "%s/%s" [label=%d];', sid, srl,
                         srl, name, entry.retries[name])
            else:
                log.info('%s Graph: "%s" -> "%s/%s";', sid, srl, srl, name)
                log.info('%s Graph: "%s/%s" [label=%s];', sid, srl, name, name)

        file_count = len(self.entries[sid]) + len(self.completed[sid])

        # Warning: Please do not change the text of the error messages below.
        msg = None
        if self._service_is_down(service, now):
            msg = 'Service down.'
        elif entry.retries[name] > config.core.dispatcher.max.retries:
            msg = 'Max retries exceeded.'
        elif entry.retries[name] >= 1:
            log.debug("Retry sending %s/%s to %s", sid, srl, name)
        elif task.depth > config.core.dispatcher.max.depth:
            msg = 'Max depth exceeded.'
        elif file_count > config.core.dispatcher.max.files:
            msg = 'Max files exceeded.'

        if msg:
            log.debug(' '.join((msg, "Not sending %s/%s to %s." % \
                         (sid, srl, name))))
            response = Task(deepcopy(task.raw))
            response.watermark(name, '')
            response.nonrecoverable_failure(msg)
            self.storage_queue.push({
                'type': 'error',
                'name': name,
                'response': response,
            })
            return False

        if service.skip(task):
            response = Task(deepcopy(task.raw))
            response.watermark(name, '')
            response.success()
            q.send_raw(response.as_dispatcher_response())
            return False

        # Setup an ack timeout.
        seconds = min(service.timeout * (queue_size + 5), 7200)

        task.ack_timeout = seconds
        task.sent = now

        service.proxy.execute(task.priority, task.as_service_request(name))

        # Add the timeout to the end of its respective list.
        ack_timeout = self.ack_timeout
        lst = ack_timeout.get(seconds, [])
        lst.append(Timeout(sid, srl, name, now + seconds))
        ack_timeout[seconds] = lst

        return True
Esempio n. 9
0
def init():
    datastore = forge.get_datastore()
    datastore.commit_index('submission')

    sids = [
        x['submission.sid'] for x in datastore.stream_search(
            'submission',
            'state:submitted AND times.submitted:[NOW-1DAY TO *] '
            'AND submission.metadata.type:* '
            'AND NOT submission.description:Resubmit*')
    ]

    submissions = {}
    submitted = {}
    for submission in datastore.get_submissions(sids):
        task = Task(submission)

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            continue

        if forge.determine_ingest_queue(task.root_sha256) != ingestq_name:
            continue

        scan_key = task.scan_key
        submissions[task.sid] = submission
        submitted[scan_key] = task.sid

    # Outstanding is the set of things Riak believes are being scanned.
    outstanding = set(submitted.keys())

    # Keys is the set of things middleman believes are being scanned.
    keys = set(scanning.keys())

    # Inflight is the set of submissions middleman and Riak agree are inflight.
    inflight = outstanding.intersection(keys)

    # Missing is the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    missing = keys.difference(inflight)

    # Process the set of submissions Riak believes are incomplete but
    # middleman doesn't know about.
    for scan_key in outstanding.difference(inflight):
        sid = submitted.get(scan_key, None)

        if not sid:
            logger.info("Init: No sid found for incomplete")
            continue

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            logger.info("Init: Not root_sha256 or original_selected")
            continue

        submission = submissions[sid]

        task = Task(submission)

        if not task.metadata:
            logger.info("Init: Incomplete submission is not one of ours: %s",
                        sid)

        stype = None
        try:
            stype = task.metadata.get('type', None)
        except:  # pylint: disable=W0702
            logger.exception(
                "Init: Incomplete submission has malformed metadata: %s", sid)

        if not stype:
            logger.info("Init: Incomplete submission missing type: %s", sid)

        raw = {
            'metadata': task.metadata,
            'overrides': get_submission_overrides(task, overrides),
            'sha256': task.root_sha256,
            'type': stype,
        }
        raw['overrides']['selected'] = task.original_selected

        reinsert(datastore, " (incomplete)", Notice(raw), logger)

    r = redis.StrictRedis(persistent['host'], persistent['port'],
                          persistent['db'])

    # Duplicates is the set of sha256s where a duplicate queue exists.
    duplicates = [
        x.replace(dup_prefix, '', 1) for x in r.keys(dup_prefix + '*')
    ]

    # Process the set of duplicates where no scanning or riak entry exists.
    for scan_key in set(duplicates).difference(outstanding.union(keys)):
        raw = dupq.pop(dup_prefix + scan_key, blocking=False)
        if not raw:
            logger.warning("Init: Couldn't pop off dup queue (%s)", scan_key)
            dupq.delete(dup_prefix + scan_key)
            continue

        reinsert(datastore, " (missed duplicate)", Notice(raw), logger)

    while True:
        res = completeq.pop(blocking=False)
        if not res:
            break

        scan_key = completed(Task(res))
        try:
            missing.remove(scan_key)
        except:  # pylint: disable=W0702
            pass

    # Process the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    for scan_key in missing:
        raw = scanning.pop(scan_key)
        if raw:
            reinsert(datastore, '', Notice(raw), logger, retry_all=False)

    # Set up time outs for all inflight submissions.
    expiry_time = now(max_time)
    for scan_key in inflight:
        # No need to lock. We're the only thing running at this point.
        timeouts.append(Timeout(scan_key, expiry_time))

    signal.signal(signal.SIGINT, interrupt)
    signal.signal(signal.SIGTERM, interrupt)

    datastore.close()
Esempio n. 10
0
 def _do_work(self, work):
     tasks = [Task(raw) for raw in work]
     # noinspection PyProtectedMember
     self.service._handle_task_batch(tasks)
Esempio n. 11
0
logger.info("Monitoring the following service queues: %s", threshold)

while True:
    queue_lengths = get_service_queue_lengths()

    over = {
        k: v for k, v in queue_lengths.iteritems() if v > (threshold.get(k, 0) or v)
    }

    for name, size in over.iteritems():
        excess = size - threshold.get(name, size)
        if excess <= 0:
            continue

        for msg in get_queue(name).unpush(excess):
            # noinspection PyBroadException
            try:
                t = Task(msg)

                t.watermark(name, '')
                t.nonrecoverable_failure('Service busy.')
                t.cache_key = store.save_error(name, None, None, t)

                dispatch_queue.send_raw(t.as_dispatcher_response())
                logger.info("%s is too busy to process %s.", name, t.srl)
            except:  # pylint:disable=W0702
                logger.exception('Problem sending response:')

    time.sleep(config.system.update_interval)