Esempio n. 1
0
def retry(raw, scan_key, sha256, ex):  # df node def
    current_time = now()

    notice = Notice(raw)
    retries = notice.get('retries', 0) + 1

    if retries > max_retries:
        trace = ''
        if ex and type(ex) != FileStoreException:
            trace = ': ' + get_stacktrace_info(ex)
        logger.error('Max retries exceeded for %s%s', sha256, trace)
        dupq.delete(dup_prefix + scan_key)
    elif expired(current_time - seconds(notice.get('ts', current_time)), 0):
        logger.info('No point retrying expired submission for %s', sha256)
        dupq.delete(dup_prefix + scan_key)  # df pull delete
    else:
        logger.info('Requeuing %s (%s)', sha256, ex or 'unknown')
        notice.set('retries', retries)
        notice.set('retry_at', now(retry_delay))

        retryq.push(notice.raw)  # df push push
Esempio n. 2
0
def dropper():  # df node def
    datastore = forge.get_datastore()

    while running:
        raw = dropq.pop(timeout=1)  # df pull pop
        if not raw:
            continue

        notice = Notice(raw)

        send_notification(notice)

        c12n = notice.get('classification',
                          config.core.middleman.classification)
        expiry = now_as_iso(86400)
        sha256 = notice.get('sha256')

        datastore.save_or_freshen_file(sha256, {'sha256': sha256}, expiry,
                                       c12n)

    datastore.close()
Esempio n. 3
0
def ingest(datastore, user_groups, raw):  # df node def
    notice = Notice(raw)

    ignore_size = notice.get('ignore_size', False)
    never_drop = notice.get('never_drop', False)
    sha256 = notice.get('sha256')
    size = notice.get('size', 0)

    # Make sure we have a submitter ...
    user = notice.get('submitter', None)
    if user is None:
        user = config.submissions.user
        notice.set('submitter', user)

    # ... and groups.
    groups = notice.get('groups', None)
    if groups is None:
        groups = user_groups.get(user, None)
        if groups is None:
            ruser = datastore.get_user(user)
            if not ruser:
                return
            groups = ruser.get('groups', [])
            user_groups[user] = groups
        notice.set('groups', groups)

    selected = notice.get('selected', None)
    if not selected:
        selected = selected_initial
        notice.set('selected', selected)
        notice.set('resubmit_to', ['Dynamic Analysis'])

    resubmit_to = notice.get('resubmit_to', None)
    if resubmit_to is None:
        notice.set('resubmit_to', [])

    ingester_counts.increment('ingest.bytes_ingested', int(size))
    ingester_counts.increment('ingest.submissions_ingested')

    if not sha256:
        send_notification(notice,
                          failure="Invalid sha256",
                          logfunc=logger.warning)
        return

    c12n = notice.get('classification', '')
    if not Classification.is_valid(c12n):
        send_notification(notice,
                          failure="Invalid classification %s" % c12n,
                          logfunc=logger.warning)
        return

    metadata = notice.get('metadata', {})
    if isinstance(metadata, dict):
        to_delete = []
        for k, v in metadata.iteritems():
            size = sys.getsizeof(v, -1)
            if isinstance(v, basestring):
                size = len(v)
            if size > config.core.middleman.max_value_size:
                to_delete.append(k)
            elif size < 0:
                to_delete.append(k)
        if to_delete:
            logger.info('Removing %s from %s', to_delete, notice.raw)
            for k in to_delete:
                metadata.pop(k, None)

    if size > config.submissions.max.size and not ignore_size and not never_drop:
        notice.set(
            'failure',
            "File too large (%d > %d)" % (size, config.submissions.max.size))
        dropq.push(notice.raw)  # df push push
        ingester_counts.increment('ingest.skipped')
        return

    pprevious, previous, score = None, False, None
    if not notice.get('ignore_cache', False):
        pprevious, previous, score, _ = check(datastore, notice)

    # Assign priority.
    low_priority = is_low_priority(notice)

    priority = notice.get('priority')
    if priority is None:
        priority = priority_value['medium']

        if score is not None:
            priority = priority_value['low']
            for level in ('critical', 'high'):
                if score >= threshold_value[level]:
                    priority = priority_value[level]
                    break
        elif low_priority:
            priority = priority_value['low']

    # Reduce the priority by an order of magnitude for very old files.
    current_time = now()
    if priority and \
            expired(current_time - seconds(notice.get('ts', current_time)), 0):
        priority = (priority / 10) or 1

    notice.set('priority', priority)

    # Do this after priority has been assigned.
    # (So we don't end up dropping the resubmission).
    if previous:
        ingester_counts.increment('ingest.duplicates')
        finalize(pprevious, previous, score, notice)  # df push calls
        return

    if drop(notice):  # df push calls
        return

    if is_whitelisted(notice):  # df push calls
        return

    uniqueq.push(priority, notice.raw)  # df push push
Esempio n. 4
0
def submitter():  # df node def
    client = forge.get_submission_service()
    datastore = forge.get_datastore()

    while running:
        try:
            raw = submissionq.pop(timeout=1)  # df pull pop
            if not raw:
                continue

            # noinspection PyBroadException
            try:
                sha256 = raw['sha256']
            except Exception:  # pylint: disable=W0703
                logger.exception("Malformed entry on submission queue:")
                continue

            if not sha256:
                logger.error("Malformed entry on submission queue: %s", raw)
                continue

            notice = Notice(raw)
            if drop(notice):  # df push calls
                continue

            if is_whitelisted(notice):  # df push calls
                continue

            pprevious, previous, score = None, False, None
            if not notice.get('ignore_cache', False):
                pprevious, previous, score, scan_key = check(datastore, notice)

            if previous:
                if not notice.get('resubmit_to', []) and not pprevious:
                    logger.warning(
                        "No psid for what looks like a resubmission of %s: %s",
                        sha256, scan_key)
                finalize(pprevious, previous, score, notice)  # df push calls
                continue

            with ScanLock(scan_key):
                if scanning.exists(scan_key):
                    logger.debug('Duplicate %s', sha256)
                    ingester_counts.increment('ingest.duplicates')
                    dupq.push(dup_prefix + scan_key,
                              notice.raw)  # df push push
                    continue

                scanning.add(scan_key, notice.raw)  # df push add

            ex = return_exception(submit, client, notice)
            if not ex:
                continue

            ingester_counts.increment('ingest.error')

            should_retry = True
            tex = type(ex)
            if tex == FileStoreException:
                ex = tex("Problem with file: %s" % sha256)
            elif tex == CorruptedFileStoreException:
                logger.error(
                    "Submission failed due to corrupted filestore: %s" %
                    ex.message)
                should_retry = False
            else:
                trace = get_stacktrace_info(ex)
                logger.error("Submission failed: %s", trace)

            raw = scanning.pop(scan_key)
            if not raw:
                logger.error('No scanning entry for for %s', sha256)
                continue

            if not should_retry:
                continue

            retry(raw, scan_key, sha256, ex)

            if tex == riak.RiakError:
                raise ex  # pylint: disable=E0702

        except Exception:  # pylint:disable=W0703
            logger.exception("Unexpected error")