Esempio n. 1
0
def init_notice(raw, logger):
    global Notice  # pylint: disable=W0603
    if Notice is None:
        from assemblyline.al.common.notice import Notice

    logger.info('Sending alert: %s', str(raw))

    return Notice(raw)
Esempio n. 2
0
def dropper():  # df node def
    datastore = forge.get_datastore()

    while running:
        raw = dropq.pop(timeout=1)  # df pull pop
        if not raw:
            continue

        notice = Notice(raw)

        send_notification(notice)

        c12n = notice.get('classification',
                          config.core.middleman.classification)
        expiry = now_as_iso(86400)
        sha256 = notice.get('sha256')

        datastore.save_or_freshen_file(sha256, {'sha256': sha256}, expiry,
                                       c12n)

    datastore.close()
Esempio n. 3
0
def retry(raw, scan_key, sha256, ex):  # df node def
    current_time = now()

    notice = Notice(raw)
    retries = notice.get('retries', 0) + 1

    if retries > max_retries:
        trace = ''
        if ex and type(ex) != FileStoreException:
            trace = ': ' + get_stacktrace_info(ex)
        logger.error('Max retries exceeded for %s%s', sha256, trace)
        dupq.delete(dup_prefix + scan_key)
    elif expired(current_time - seconds(notice.get('ts', current_time)), 0):
        logger.info('No point retrying expired submission for %s', sha256)
        dupq.delete(dup_prefix + scan_key)  # df pull delete
    else:
        logger.info('Requeuing %s (%s)', sha256, ex or 'unknown')
        notice.set('retries', retries)
        notice.set('retry_at', now(retry_delay))

        retryq.push(notice.raw)  # df push push
Esempio n. 4
0
def init():
    datastore = forge.get_datastore()
    datastore.commit_index('submission')

    sids = [
        x['submission.sid'] for x in datastore.stream_search(
            'submission',
            'state:submitted AND times.submitted:[NOW-1DAY TO *] '
            'AND submission.metadata.type:* '
            'AND NOT submission.description:Resubmit*')
    ]

    submissions = {}
    submitted = {}
    for submission in datastore.get_submissions(sids):
        task = Task(submission)

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            continue

        if forge.determine_ingest_queue(task.root_sha256) != ingestq_name:
            continue

        scan_key = task.scan_key
        submissions[task.sid] = submission
        submitted[scan_key] = task.sid

    # Outstanding is the set of things Riak believes are being scanned.
    outstanding = set(submitted.keys())

    # Keys is the set of things middleman believes are being scanned.
    keys = set(scanning.keys())

    # Inflight is the set of submissions middleman and Riak agree are inflight.
    inflight = outstanding.intersection(keys)

    # Missing is the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    missing = keys.difference(inflight)

    # Process the set of submissions Riak believes are incomplete but
    # middleman doesn't know about.
    for scan_key in outstanding.difference(inflight):
        sid = submitted.get(scan_key, None)

        if not sid:
            logger.info("Init: No sid found for incomplete")
            continue

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            logger.info("Init: Not root_sha256 or original_selected")
            continue

        submission = submissions[sid]

        task = Task(submission)

        if not task.metadata:
            logger.info("Init: Incomplete submission is not one of ours: %s",
                        sid)

        stype = None
        try:
            stype = task.metadata.get('type', None)
        except:  # pylint: disable=W0702
            logger.exception(
                "Init: Incomplete submission has malformed metadata: %s", sid)

        if not stype:
            logger.info("Init: Incomplete submission missing type: %s", sid)

        raw = {
            'metadata': task.metadata,
            'overrides': get_submission_overrides(task, overrides),
            'sha256': task.root_sha256,
            'type': stype,
        }
        raw['overrides']['selected'] = task.original_selected

        reinsert(datastore, " (incomplete)", Notice(raw), logger)

    r = redis.StrictRedis(persistent['host'], persistent['port'],
                          persistent['db'])

    # Duplicates is the set of sha256s where a duplicate queue exists.
    duplicates = [
        x.replace(dup_prefix, '', 1) for x in r.keys(dup_prefix + '*')
    ]

    # Process the set of duplicates where no scanning or riak entry exists.
    for scan_key in set(duplicates).difference(outstanding.union(keys)):
        raw = dupq.pop(dup_prefix + scan_key, blocking=False)
        if not raw:
            logger.warning("Init: Couldn't pop off dup queue (%s)", scan_key)
            dupq.delete(dup_prefix + scan_key)
            continue

        reinsert(datastore, " (missed duplicate)", Notice(raw), logger)

    while True:
        res = completeq.pop(blocking=False)
        if not res:
            break

        scan_key = completed(Task(res))
        try:
            missing.remove(scan_key)
        except:  # pylint: disable=W0702
            pass

    # Process the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    for scan_key in missing:
        raw = scanning.pop(scan_key)
        if raw:
            reinsert(datastore, '', Notice(raw), logger, retry_all=False)

    # Set up time outs for all inflight submissions.
    expiry_time = now(max_time)
    for scan_key in inflight:
        # No need to lock. We're the only thing running at this point.
        timeouts.append(Timeout(scan_key, expiry_time))

    signal.signal(signal.SIGINT, interrupt)
    signal.signal(signal.SIGTERM, interrupt)

    datastore.close()
Esempio n. 5
0
def ingest(datastore, user_groups, raw):  # df node def
    notice = Notice(raw)

    ignore_size = notice.get('ignore_size', False)
    never_drop = notice.get('never_drop', False)
    sha256 = notice.get('sha256')
    size = notice.get('size', 0)

    # Make sure we have a submitter ...
    user = notice.get('submitter', None)
    if user is None:
        user = config.submissions.user
        notice.set('submitter', user)

    # ... and groups.
    groups = notice.get('groups', None)
    if groups is None:
        groups = user_groups.get(user, None)
        if groups is None:
            ruser = datastore.get_user(user)
            if not ruser:
                return
            groups = ruser.get('groups', [])
            user_groups[user] = groups
        notice.set('groups', groups)

    selected = notice.get('selected', None)
    if not selected:
        selected = selected_initial
        notice.set('selected', selected)
        notice.set('resubmit_to', ['Dynamic Analysis'])

    resubmit_to = notice.get('resubmit_to', None)
    if resubmit_to is None:
        notice.set('resubmit_to', [])

    ingester_counts.increment('ingest.bytes_ingested', int(size))
    ingester_counts.increment('ingest.submissions_ingested')

    if not sha256:
        send_notification(notice,
                          failure="Invalid sha256",
                          logfunc=logger.warning)
        return

    c12n = notice.get('classification', '')
    if not Classification.is_valid(c12n):
        send_notification(notice,
                          failure="Invalid classification %s" % c12n,
                          logfunc=logger.warning)
        return

    metadata = notice.get('metadata', {})
    if isinstance(metadata, dict):
        to_delete = []
        for k, v in metadata.iteritems():
            size = sys.getsizeof(v, -1)
            if isinstance(v, basestring):
                size = len(v)
            if size > config.core.middleman.max_value_size:
                to_delete.append(k)
            elif size < 0:
                to_delete.append(k)
        if to_delete:
            logger.info('Removing %s from %s', to_delete, notice.raw)
            for k in to_delete:
                metadata.pop(k, None)

    if size > config.submissions.max.size and not ignore_size and not never_drop:
        notice.set(
            'failure',
            "File too large (%d > %d)" % (size, config.submissions.max.size))
        dropq.push(notice.raw)  # df push push
        ingester_counts.increment('ingest.skipped')
        return

    pprevious, previous, score = None, False, None
    if not notice.get('ignore_cache', False):
        pprevious, previous, score, _ = check(datastore, notice)

    # Assign priority.
    low_priority = is_low_priority(notice)

    priority = notice.get('priority')
    if priority is None:
        priority = priority_value['medium']

        if score is not None:
            priority = priority_value['low']
            for level in ('critical', 'high'):
                if score >= threshold_value[level]:
                    priority = priority_value[level]
                    break
        elif low_priority:
            priority = priority_value['low']

    # Reduce the priority by an order of magnitude for very old files.
    current_time = now()
    if priority and \
            expired(current_time - seconds(notice.get('ts', current_time)), 0):
        priority = (priority / 10) or 1

    notice.set('priority', priority)

    # Do this after priority has been assigned.
    # (So we don't end up dropping the resubmission).
    if previous:
        ingester_counts.increment('ingest.duplicates')
        finalize(pprevious, previous, score, notice)  # df push calls
        return

    if drop(notice):  # df push calls
        return

    if is_whitelisted(notice):  # df push calls
        return

    uniqueq.push(priority, notice.raw)  # df push push
Esempio n. 6
0
def completed(task):  # df node def
    sha256 = task.root_sha256

    psid = task.psid
    score = task.score
    sid = task.sid

    scan_key = task.scan_key

    with ScanLock(scan_key):
        # Remove the entry from the hash of submissions in progress.
        raw = scanning.pop(scan_key)  # df pull pop
        if not raw:
            logger.warning("Untracked submission (score=%d) for: %s %s",
                           int(score), sha256, str(task.metadata))

            # Not a result we care about. We are notified for every
            # submission that completes. Some submissions will not be ours.
            if task.metadata:
                stype = None
                try:
                    stype = task.metadata.get('type', None)
                except:  # pylint: disable=W0702
                    logger.exception("Malformed metadata: %s:", sid)

                if not stype:
                    return scan_key

                if (task.description or '').startswith(default_prefix):
                    raw = {
                        'metadata': task.metadata,
                        'overrides': get_submission_overrides(task, overrides),
                        'sha256': sha256,
                        'type': stype,
                    }

                    finalize(psid, sid, score, Notice(raw))
            return scan_key

        errors = task.raw.get('error_count', 0)
        file_count = task.raw.get('file_count', 0)
        ingester_counts.increment('ingest.submissions_completed')
        ingester_counts.increment('ingest.files_completed', file_count)
        ingester_counts.increment('ingest.bytes_completed', int(task.size
                                                                or 0))

        notice = Notice(raw)

        with cache_lock:
            _add(scan_key, psid, sid, score, errors, now())

        finalize(psid, sid, score, notice)  # df push calls

        def exhaust():
            while True:
                res = dupq.pop(  # df pull pop
                    dup_prefix + scan_key, blocking=False)
                if res is None:
                    break
                yield res

        # You may be tempted to remove the assignment to dups and use the
        # value directly in the for loop below. That would be a mistake.
        # The function finalize may push on the duplicate queue which we
        # are pulling off and so condensing those two lines creates a
        # potential infinite loop.
        dups = [dup for dup in exhaust()]
        for dup in dups:
            finalize(psid, sid, score, Notice(dup))

    return scan_key
Esempio n. 7
0
def submitter():  # df node def
    client = forge.get_submission_service()
    datastore = forge.get_datastore()

    while running:
        try:
            raw = submissionq.pop(timeout=1)  # df pull pop
            if not raw:
                continue

            # noinspection PyBroadException
            try:
                sha256 = raw['sha256']
            except Exception:  # pylint: disable=W0703
                logger.exception("Malformed entry on submission queue:")
                continue

            if not sha256:
                logger.error("Malformed entry on submission queue: %s", raw)
                continue

            notice = Notice(raw)
            if drop(notice):  # df push calls
                continue

            if is_whitelisted(notice):  # df push calls
                continue

            pprevious, previous, score = None, False, None
            if not notice.get('ignore_cache', False):
                pprevious, previous, score, scan_key = check(datastore, notice)

            if previous:
                if not notice.get('resubmit_to', []) and not pprevious:
                    logger.warning(
                        "No psid for what looks like a resubmission of %s: %s",
                        sha256, scan_key)
                finalize(pprevious, previous, score, notice)  # df push calls
                continue

            with ScanLock(scan_key):
                if scanning.exists(scan_key):
                    logger.debug('Duplicate %s', sha256)
                    ingester_counts.increment('ingest.duplicates')
                    dupq.push(dup_prefix + scan_key,
                              notice.raw)  # df push push
                    continue

                scanning.add(scan_key, notice.raw)  # df push add

            ex = return_exception(submit, client, notice)
            if not ex:
                continue

            ingester_counts.increment('ingest.error')

            should_retry = True
            tex = type(ex)
            if tex == FileStoreException:
                ex = tex("Problem with file: %s" % sha256)
            elif tex == CorruptedFileStoreException:
                logger.error(
                    "Submission failed due to corrupted filestore: %s" %
                    ex.message)
                should_retry = False
            else:
                trace = get_stacktrace_info(ex)
                logger.error("Submission failed: %s", trace)

            raw = scanning.pop(scan_key)
            if not raw:
                logger.error('No scanning entry for for %s', sha256)
                continue

            if not should_retry:
                continue

            retry(raw, scan_key, sha256, ex)

            if tex == riak.RiakError:
                raise ex  # pylint: disable=E0702

        except Exception:  # pylint:disable=W0703
            logger.exception("Unexpected error")