Ejemplo n.º 1
0
def get_file_tree(sid, **kwargs):
    """
    Get the file hierarchy of a given Submission ID. This is
    an N deep recursive process but is limited to the max depth
    set in the system settings.
    
    Variables:
    sid         => Submission ID to get the tree for
    
    Arguments: 
    None
    
    Data Block:
    None

    API call example:
    /api/v4/submission/tree/12345678-1234-1234-1234-1234567890AB/
    
    Result example:
    {                                # Dictionary of file blocks
     "1f...11": {                    # File sha256 (sha256)
       "score": 923,                 # Score for the file
       "name": ["file.exe",...]      # List of possible names for the file
       "children": {...}             # Dictionary of children file blocks
       }, ...
  
    """
    user = kwargs['user']

    data = STORAGE.submission.get(sid, as_obj=False)
    if data is None:
        return make_api_response("", "Submission ID %s does not exists." % sid,
                                 404)

    if data and user and Classification.is_accessible(user['classification'],
                                                      data['classification']):
        return make_api_response(
            STORAGE.get_or_create_file_tree(
                data,
                config.submission.max_extraction_depth,
                cl_engine=Classification,
                user_classification=user['classification']))
    else:
        return make_api_response(
            "", "You are not allowed to view the data of this submission", 403)
Ejemplo n.º 2
0
def get_report(submission_id, **kwargs):
    """
    Create a report for a submission based on its ID.

    Variables:
    submission_id   ->   ID of the submission to create the report for

    Arguments:
    None

    Data Block:
    None

    Result example:
    { <THE REPORT> }
    """
    user = kwargs['user']
    submission = STORAGE.submission.get(submission_id, as_obj=False)
    if submission is None:
        return make_api_response(
            "", "Submission ID %s does not exists." % submission_id, 404)

    submission['important_files'] = set()
    submission['report_filtered'] = False

    if user and Classification.is_accessible(user['classification'],
                                             submission['classification']):
        if submission['state'] != 'completed':
            return make_api_response(
                "", f"It is too early to generate the report. "
                f"Submission ID {submission_id} is incomplete.", 425)

        tree = STORAGE.get_or_create_file_tree(
            submission,
            config.submission.max_extraction_depth,
            cl_engine=Classification,
            user_classification=user['classification'])
        submission['file_tree'] = tree['tree']
        submission['classification'] = Classification.max_classification(
            submission['classification'], tree['classification'])
        if tree['filtered']:
            submission['report_filtered'] = True

        errors = submission.pop('errors', None)
        submission['params']['services']['errors'] = list(
            set([x.split('.')[1] for x in errors]))

        def recurse_get_names(data):
            output = {}
            for key, val in data.items():
                output.setdefault(key, [])

                for res_name in val['name']:
                    output[key].append(res_name)

                children = recurse_get_names(val['children'])
                for c_key, c_names in children.items():
                    output.setdefault(c_key, [])
                    output[c_key].extend(c_names)

            return output

        name_map = recurse_get_names(tree['tree'])

        summary = get_or_create_summary(submission_id,
                                        submission.pop('results', []),
                                        user['classification'])
        tags = summary['tags']
        attack_matrix = summary['attack_matrix']
        heuristics = summary['heuristics']
        submission['classification'] = Classification.max_classification(
            submission['classification'], summary['classification'])
        if summary['filtered']:
            submission['report_filtered'] = True

        if summary['partial']:
            submission['report_partial'] = True

        submission['attack_matrix'] = {}
        submission['heuristics'] = {}
        submission['tags'] = {}

        # Process attack matrix
        for item in attack_matrix:
            sha256 = item['key'][:64]

            for cat in item['categories']:

                submission['attack_matrix'].setdefault(cat, {})
                submission['attack_matrix'][cat].setdefault(
                    item['name'], {
                        'h_type': item['h_type'],
                        'files': []
                    })
                for name in name_map.get(sha256, [sha256]):
                    if (name, sha256) not in submission['attack_matrix'][cat][
                            item['name']]['files']:
                        submission['attack_matrix'][cat][
                            item['name']]['files'].append((name, sha256))
                    submission['important_files'].add(sha256)

        # Process heuristics
        for h_type, items in heuristics.items():
            submission['heuristics'].setdefault(h_type, {})
            for item in items:
                sha256 = item['key'][:64]
                submission['heuristics'][h_type].setdefault(item['name'], [])
                for name in name_map.get(sha256, [sha256]):
                    if (name, sha256) not in submission['heuristics'][h_type][
                            item['name']]:
                        submission['heuristics'][h_type][item['name']].append(
                            (name, sha256))
                    submission['important_files'].add(sha256)

        # Process tags
        for t in tags:
            summary_type = None

            if t["type"] in config.submission.tag_types.behavior:
                summary_type = 'behaviors'
            elif t["type"] in config.submission.tag_types.attribution:
                summary_type = 'attributions'
            elif t["type"] in config.submission.tag_types.ioc:
                summary_type = 'indicators_of_compromise'

            if t['value'] == "" or summary_type is None:
                continue

            sha256 = t["key"][:64]

            # Tags
            submission['tags'].setdefault(summary_type, {})
            submission['tags'][summary_type].setdefault(t['type'], {})
            submission['tags'][summary_type][t['type']].setdefault(
                t['value'], {
                    'h_type': t['h_type'],
                    'files': []
                })
            for name in name_map.get(sha256, [sha256]):
                if (name, sha256) not in submission['tags'][summary_type][
                        t['type']][t['value']]['files']:
                    submission['tags'][summary_type][t['type']][
                        t['value']]['files'].append((name, sha256))
                submission['important_files'].add(sha256)

        submitted_sha256 = submission['files'][0]['sha256']
        submission["file_info"] = STORAGE.file.get(submitted_sha256,
                                                   as_obj=False)
        if submitted_sha256 in submission['important_files']:
            submission['important_files'].remove(submitted_sha256)

        submission['important_files'] = list(submission['important_files'])

        return make_api_response(submission)
    else:
        return make_api_response(
            "", "You are not allowed to view the data of this submission", 403)
Ejemplo n.º 3
0
def get_full_results(sid, **kwargs):
    """
    Get the full results for a given Submission ID. The difference
    between this and the get results API is that this one gets the
    actual values of the result and error keys instead of listing 
    the keys.
    
    Variables:
    sid         => Submission ID to get the full results for
    
    Arguments: 
    None
    
    Data Block:
    None
    
    Result example:
    {"classification": "UNRESTRICTIED"  # Access control for the submission
     "error_count": 0,                  # Number of errors in this submission
     "errors": [],                      # List of error blocks (see Get Service Error)
     "file_count": 4,                   # Number of files in this submission
     "files": [                         # List of submitted files
       ["FNAME", "sha256"], ...],              # Each file = List of name/sha256
     "file_infos": {                    # Dictionary of fil info blocks
       "234...235": <<FILE_INFO>>,          # File in block
       ...},                                # Keyed by file's sha256
     "file_tree": {                     # File tree of the submission
       "333...7a3": {                       # File tree item
        "children": {},                         # Recursive children of file tree item
        "name": ["file.exe",...]                # List of possible names for the file
        "score": 0                              # Score of the file
       },, ...},                            # Keyed by file's sha256
     "missing_error_keys": [],          # Errors that could not be fetched from the datastore
     "missing_result_keys": [],         # Results that could not be fetched from the datastore
     "results": [],                     # List of Results Blocks (see Get Service Result)
     "services": {                      # Service Block
       "selected": ["mcafee"],              # List of selected services
       "params": {},                        # Service specific parameters
       "excluded": []                       # List of excluded services
       },
     "state": "completed",              # State of the submission
     "submission": {                    # Submission Block
       "profile": true,                     # Should keep stats about execution?
       "description": "",                   # Submission description
       "ttl": 30,                           # Submission days to live
       "ignore_filtering": false,           # Ignore filtering services?
       "priority": 1000,                    # Submission priority, higher = faster
       "ignore_cache": true,                # Force reprocess even is result exist?
       "groups": ["group", ...],            # List of groups with access
       "sid": "ab9...956",                  # Submission ID
       "submitter": "user",                 # Uname of the submitter
       "max_score": 1422, },                # Score of highest scoring file
     "times": {                         # Timing block
       "completed": "2014-...",             # Completed time
       "submitted": "2014-..."              # Submitted time
       }
    }
    """
    max_retry = 10

    def get_results(keys):
        out = {}
        res = {}
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2**(retry - 7))
            res.update(
                STORAGE.get_multiple_results(keys,
                                             Classification,
                                             as_obj=False))
            keys = [x for x in keys if x not in res]
            retry += 1

        results = {}
        for k, v in res.items():
            file_info = data['file_infos'].get(k[:64], None)
            if file_info:
                v = format_result(user['classification'], v,
                                  file_info['classification'])
                if v:
                    results[k] = v

        out["results"] = results
        out["missing_result_keys"] = keys

        return out

    def get_errors(keys):
        out = {}
        err = {}
        missing = []
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2**(retry - 7))
            try:
                err.update(STORAGE.error.multiget(keys, as_obj=False))
            except MultiKeyError as e:
                LOGGER.warning(
                    f"Trying to get multiple errors but some are missing: {str(e.keys)}"
                )
                err.update(e.partial_output)
                missing.extend(e.keys)
            keys = [x for x in keys if x not in err and x not in missing]
            retry += 1

        out["errors"] = err
        out["missing_error_keys"] = keys + missing

        return out

    def get_file_infos(keys):
        infos = {}
        missing = []
        retry = 0
        while keys and retry < max_retry:
            if retry:
                time.sleep(2**(retry - 7))
            try:
                infos.update(STORAGE.file.multiget(keys, as_obj=False))
            except MultiKeyError as e:
                LOGGER.warning(
                    f"Trying to get multiple files but some are missing: {str(e.keys)}"
                )
                infos.update(e.partial_output)
                missing.extend(e.keys)
            keys = [x for x in keys if x not in infos and x not in missing]
            retry += 1

        return infos, missing

    def recursive_flatten_tree(tree):
        sha256s = []

        for key, val in tree.items():
            sha256s.extend(recursive_flatten_tree(val.get('children', {})))
            if key not in sha256s:
                sha256s.append(key)

        return list(set(sha256s))

    user = kwargs['user']
    data = STORAGE.submission.get(sid, as_obj=False)
    if data is None:
        return make_api_response("", "Submission ID %s does not exists." % sid,
                                 404)

    if data and user and Classification.is_accessible(user['classification'],
                                                      data['classification']):
        res_keys = data.get("results", [])
        err_keys = data.get("errors", [])

        data['file_tree'] = STORAGE.get_or_create_file_tree(
            data,
            config.submission.max_extraction_depth,
            cl_engine=Classification,
            user_classification=user['classification'])['tree']
        data['file_infos'], data['missing_file_keys'] = get_file_infos(
            recursive_flatten_tree(data['file_tree']))
        data.update(get_results(res_keys))
        data.update(get_errors(err_keys))

        for r in data['results'].values():
            data['classification'] = Classification.max_classification(
                data['classification'], r['classification'])

        for f in data['file_infos'].values():
            data['classification'] = Classification.max_classification(
                data['classification'], f['classification'])

        return make_api_response(data)
    else:
        return make_api_response(
            "", "You are not allowed to view the data of this submission", 403)