コード例 #1
0
def decode_file(original_path, fileinfo, identify):
    extracted_path = None
    hdr = {}
    with open(original_path, 'rb') as original_file:
        if is_cart(original_file.read(256)):
            original_file.seek(0)

            _, hdr, _ = _unpack_header(original_file)
            al_type = flatten(hdr).get('al.type', None)
            if not al_type:
                original_file.seek(0)

                extracted_fd, extracted_path = tempfile.mkstemp()
                extracted_file = os.fdopen(extracted_fd, 'wb')

                cart_extracted = False
                try:
                    hdr, _ = unpack_stream(original_file, extracted_file)
                    cart_extracted = True

                except Exception:
                    extracted_path = None
                    hdr = {}
                    fileinfo['type'] = 'corrupted/cart'

                finally:
                    extracted_file.close()

                if cart_extracted and extracted_path:
                    fileinfo = identify.fileinfo(extracted_path)

    return extracted_path, fileinfo, hdr
コード例 #2
0
    def parse_link(self, parent_res, path):
        with open(path, "rb") as fh:
            metadata = decode_lnk(fh.read())

        if metadata is None:
            return False

        body_output = {
            build_key(k): v
            for k, v in flatten(metadata).items() if v
        }
        res = ResultSection("Metadata extracted by parse_lnk",
                            body_format=BODY_FORMAT.KEY_VALUE,
                            body=json.dumps(body_output),
                            parent=parent_res)

        bp = metadata.get("BasePath", "").strip()
        rp = metadata.get("RELATIVE_PATH", "").strip()
        nn = metadata.get("NetName", "").strip()
        cla = metadata.get("COMMAND_LINE_ARGUMENTS", "").strip()
        s = BAD_LINK_RE.search(cla.lower())
        if s:
            res.set_heuristic(1)
        res.add_tag(tag_type="file.name.extracted",
                    value=(bp or rp or nn).rsplit("\\")[-1])
        res.add_tag(tag_type="dynamic.process.command_line",
                    value=f"{(rp or bp or nn)} {cla}".strip())

        for k, v in body_output.items():
            tag_type = TAG_MAP.get("LNK", {}).get(k, None) or \
                       TAG_MAP.get(None, {}).get(k, None)
            if tag_type:
                res.add_tag(tag_type, v)

        return True
コード例 #3
0
    def _create_random_section(self):
        # choose a random body format
        body_format = random.choice(FORMAT_LIST)

        # create a section with a random title
        section = ResultSection(get_random_phrase(3, 7), body_format=body_format)

        # choose random amount of lines in the body
        for _ in range(1, 5):
            # generate random line
            section.add_line(get_random_phrase(5, 10))

        # choose random amount of tags
        tags = flatten(get_random_tags())
        for key, val in tags.items():
            for v in val:
                section.add_tag(key, v)

        # set a heuristic a third of the time
        if random.choice([False, False, True]):
            section.set_heuristic(random.randint(1, 4))

        # Create random sub-sections
        if random.choice([False, False, True]):
            section.add_subsection(self._create_random_section())

        return section
コード例 #4
0
def test_compat_tag_map():
    flatten_map = flatten(tag_map)
    for _ in range(10):
        random_key = random.choice(list(v3_lookup_map.keys()))
        try:
            assert random_key in flatten_map[v3_lookup_map[random_key]]
        except KeyError:
            assert random_key in UNUSED
コード例 #5
0
def fix_section_data(section):
    if section['body_format'] in JSON_SECTIONS and isinstance(
            section['body'], str):
        # Loading JSON formatted sections
        try:
            section['body'] = json.loads(section['body'])
        except ValueError:
            pass

    # Changing tags to a list
    section['tags'] = tag_dict_to_list(flatten(section['tags']), False)
    section['tags'] += tag_dict_to_list(section.pop('safelisted_tags', {}),
                                        True)
    return section
コード例 #6
0
def test_dict_flatten():
    src = {
        "a": {
            "b": {
                "c": 1
            }
        },
        "b": {
            "d": {
                2
            }
        }
    }

    flat_src = flatten(src)
    assert src == unflatten(flat_src)
    assert list(flat_src.keys()) == ["a.b.c", "b.d"]
コード例 #7
0
def generalize_result(result):
    # At first we were comparing the full result and removing the random/unpredictable information.
    # Now we are only keeping the strict minimum to compare with.
    # supplementary/extracted sha256 + heuristics heur_id + tags
    trimed_result = {}
    if "response" in result:
        trimed_result["response"] = {}
        if "supplementary" in result["response"]:
            trimed_result["response"]["supplementary"] = sorted(
                [x["sha256"] for x in result["response"]["supplementary"]])
        if "extracted" in result["response"]:
            trimed_result["response"]["extracted"] = sorted(
                [{
                    "name": x["name"],
                    "sha256": x["sha256"]
                } for x in result["response"]["extracted"]],
                key=lambda x: x["sha256"],
            )

    if "result" in result:
        trimed_result["result"] = {}
        if "sections" in result["result"]:
            trimed_result["result"] = {"heuristics": [], "tags": {}}
            for section in result["result"]["sections"]:
                if "heuristic" in section:
                    if section["heuristic"] is not None:
                        if "heur_id" in section["heuristic"]:
                            trimed_result["result"]["heuristics"].append(
                                section["heuristic"]["heur_id"])
                if "tags" in section:
                    if section["tags"]:
                        for k, v in flatten(section["tags"]).items():
                            if k in trimed_result["result"]["tags"]:
                                trimed_result["result"]["tags"][k].extend(v)
                            else:
                                trimed_result["result"]["tags"][k] = v

            # Sort the heur_id and tags lists so they always appear in the same order even if
            # the result sections where moved around.
            trimed_result["result"]["heuristics"] = sorted(
                trimed_result["result"]["heuristics"])
            for k, v in trimed_result["result"]["tags"].items():
                trimed_result["result"]["tags"][k] = sorted(v)

    return trimed_result
コード例 #8
0
    def get_tag_list_from_keys(self, keys):
        if len(keys) == 0:
            return []
        keys = [x for x in list(keys) if not x.endswith(".e")]
        items = self.result.multiget(keys, as_obj=False)

        out = []
        for key, item in items.items():
            for section in item.get('result', {}).get('sections', []):
                for tag_type, tags in flatten(section.get('tags', {})).items():
                    if tags is not None:
                        for tag in tags:
                            out.append({
                                'type':
                                tag_type,
                                'short_type':
                                tag_type.rsplit(".", 1)[-1],
                                'value':
                                tag,
                                'key':
                                key
                            })

        return out
コード例 #9
0
ファイル: tagging.py プロジェクト: malvidin/assemblyline-base
def tag_dict_to_list(tag_dict: Dict) -> List[Dict]:
    return [{
        'type': k,
        'value': t,
        'short_type': k.rsplit(".", 1)[-1]
    } for k, v in flatten(tag_dict).items() if v is not None for t in v]
コード例 #10
0
    def get_summary_from_keys(self, keys):
        out = {
            "tags": [],
            "attack_matrix": [],
            "heuristics": {
                "info": [],
                "suspicious": [],
                "malicious": []
            }
        }
        done_map = {"heuristics": set(), "attack": set(), "tags": set()}

        if len(keys) == 0:
            return out

        keys = [x for x in list(keys) if not x.endswith(".e")]
        items = self.result.multiget(keys, as_obj=False)

        for key, item in items.items():
            for section in item.get('result', {}).get('sections', []):
                h_type = "info"

                if section.get('heuristic', False):
                    # Get the heuristics data
                    if section['heuristic']['score'] < 100:
                        h_type = "info"
                    elif section['heuristic']['score'] < 1000:
                        h_type = "suspicious"
                    else:
                        h_type = "malicious"

                    cache_key = f"{section['heuristic']['heur_id']}_{key}"
                    if cache_key not in done_map['heuristics']:
                        out['heuristics'][h_type].append({
                            'heur_id':
                            section['heuristic']['heur_id'],
                            'name':
                            section['heuristic']['name'],
                            'key':
                            key
                        })
                        done_map['heuristics'].add(cache_key)

                    if section['heuristic'].get('attack_id', False):
                        # Get attack matrix data
                        attack_id = section['heuristic']['attack_id']

                        cache_key = f"{attack_id}_{key}"
                        if cache_key not in done_map['attack']:
                            out['attack_matrix'].append({
                                "key":
                                key,
                                "attack_id":
                                attack_id,
                                "h_type":
                                h_type,
                                "name":
                                section['heuristic']['attack_pattern'],
                                "categories":
                                section['heuristic']['attack_categories']
                            })
                            done_map['attack'].add(cache_key)

                # Get tagging data
                for tag_type, tags in flatten(section.get('tags', {})).items():
                    if tags is not None:
                        for tag in tags:
                            cache_key = f"{tag_type}_{tag}_{key}"

                            if cache_key not in done_map['tags']:
                                out['tags'].append({
                                    'type':
                                    tag_type,
                                    'h_type':
                                    h_type,
                                    'short_type':
                                    tag_type.rsplit(".", 1)[-1],
                                    'value':
                                    tag,
                                    'key':
                                    key
                                })
                                done_map['tags'].add(cache_key)

        return out
コード例 #11
0
def ingest_single_file(**kwargs):
    """
    Ingest a single file, sha256 or URL in the system

        Note 1:
            If you are submitting a sha256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

        Note 3:
            The ingest API uses the user's default settings to submit files to the system
            unless these settings are overridden in the 'params' field. Although, there are
            exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are
            resetted to False because the lead to dangerous behavior in the system.

    Variables:
    None

    Arguments:
    None

    Data Block (SHA256 or URL):
    {
     //REQUIRED VALUES: One of the following
     "sha256": "1234...CDEF"         # SHA256 hash of the file
     "url": "http://...",            # Url to fetch the file from

     //OPTIONAL VALUES
     "name": "file.exe",             # Name of the file

     "metadata": {                   # Submission Metadata
         "key": val,                    # Key/Value pair for metadata parameters
         },

     "params": {                     # Submission parameters
         "key": val,                    # Key/Value pair for params that differ from the user's defaults
         },                                 # DEFAULT: /api/v3/user/submission_params/<user>/

     "generate_alert": False,        # Generate an alert in our alerting system or not
     "notification_queue": None,     # Name of the notification queue
     "notification_threshold": None, # Threshold for notification
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"params": {"ignore_cache": true}, "generate_alert": true}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission

    Result example:
    { "ingest_id": <ID OF THE INGESTED FILE> }
    """
    user = kwargs['user']
    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())
    extracted_path = original_file = None
    try:
        # Get data block and binary blob
        if 'multipart/form-data' in request.content_type:
            if 'json' in request.values:
                data = json.loads(request.values['json'])
            else:
                data = {}
            binary = request.files['bin']
            name = data.get("name", binary.filename)
            sha256 = None
            url = None
        elif 'application/json' in request.content_type:
            data = request.json
            binary = None
            sha256 = data.get('sha256', None)
            url = data.get('url', None)
            name = data.get("name",
                            None) or sha256 or os.path.basename(url) or None
        else:
            return make_api_response({}, "Invalid content type", 400)

        if not data:
            return make_api_response({}, "Missing data block", 400)

        # Get notification queue parameters
        notification_queue = data.get('notification_queue', None)
        notification_threshold = data.get('notification_threshold', None)
        if not isinstance(notification_threshold,
                          int) and notification_threshold:
            return make_api_response(
                {}, "notification_threshold should be and int", 400)

        # Get file name
        if not name:
            return make_api_response({}, "Filename missing", 400)

        name = safe_str(os.path.basename(name))
        if not name:
            return make_api_response({}, "Invalid filename", 400)

        try:
            os.makedirs(out_dir)
        except Exception:
            pass
        original_file = out_file = os.path.join(out_dir, name)

        # Prepare variables
        extra_meta = {}
        fileinfo = None
        do_upload = True
        al_meta = {}

        # Load default user params
        s_params = ui_to_submission_params(load_user_settings(user))

        # Reset dangerous user settings to safe values
        s_params.update({
            'deep_scan': False,
            "priority": 150,
            "ignore_cache": False,
            "ignore_dynamic_recursion_prevention": False,
            "ignore_filtering": False,
            "type": "INGEST"
        })

        # Apply provided params
        s_params.update(data.get("params", {}))

        # Load file
        if not binary:
            if sha256:
                fileinfo = STORAGE.file.get_if_exists(
                    sha256,
                    as_obj=False,
                    archive_access=config.datastore.ilm.update_archive)
                if FILESTORE.exists(sha256):
                    if fileinfo:
                        if not Classification.is_accessible(
                                user['classification'],
                                fileinfo['classification']):
                            return make_api_response(
                                {}, "SHA256 does not exist in our datastore",
                                404)
                        else:
                            # File's classification must be applied at a minimum
                            s_params[
                                'classification'] = Classification.max_classification(
                                    s_params['classification'],
                                    fileinfo['classification'])
                    else:
                        # File is in storage and the DB no need to upload anymore
                        do_upload = False
                    # File exists in the filestore and the user has appropriate file access
                    FILESTORE.download(sha256, out_file)
                else:
                    return make_api_response(
                        {}, "SHA256 does not exist in our datastore", 404)
            else:
                if url:
                    if not config.ui.allow_url_submissions:
                        return make_api_response(
                            {}, "URL submissions are disabled in this system",
                            400)

                    try:
                        safe_download(url, out_file)
                        extra_meta['submitted_url'] = url
                    except FileTooBigException:
                        return make_api_response({},
                                                 "File too big to be scanned.",
                                                 400)
                    except InvalidUrlException:
                        return make_api_response({},
                                                 "Url provided is invalid.",
                                                 400)
                    except ForbiddenLocation:
                        return make_api_response(
                            {}, "Hostname in this URL cannot be resolved.",
                            400)
                else:
                    return make_api_response(
                        {},
                        "Missing file to scan. No binary, sha256 or url provided.",
                        400)
        else:
            binary.save(out_file)

        if do_upload and os.path.getsize(out_file) == 0:
            return make_api_response({},
                                     err="File empty. Ingestion failed",
                                     status_code=400)

        # Apply group params if not specified
        if 'groups' not in s_params:
            s_params['groups'] = user['groups']

        # Get generate alert parameter
        generate_alert = data.get('generate_alert',
                                  s_params.get('generate_alert', False))
        if not isinstance(generate_alert, bool):
            return make_api_response({}, "generate_alert should be a boolean",
                                     400)

        # Override final parameters
        s_params.update({
            'generate_alert':
            generate_alert,
            'max_extracted':
            config.core.ingester.default_max_extracted,
            'max_supplementary':
            config.core.ingester.default_max_supplementary,
            'priority':
            min(s_params.get("priority", 150), config.ui.ingest_max_priority),
            'submitter':
            user['uname']
        })

        # Enforce maximum DTL
        if config.submission.max_dtl > 0:
            s_params['ttl'] = min(int(
                s_params['ttl']), config.submission.max_dtl) if int(
                    s_params['ttl']) else config.submission.max_dtl

        # No need to re-calculate fileinfo if we have it already
        if not fileinfo:
            # Calculate file digest
            fileinfo = IDENTIFY.fileinfo(out_file)

            # Validate file size
            if fileinfo['size'] > MAX_SIZE and not s_params.get(
                    'ignore_size', False):
                msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed"
                return make_api_response({}, err=msg, status_code=413)
            elif fileinfo['size'] == 0:
                return make_api_response({},
                                         err="File empty. Ingestion failed",
                                         status_code=400)

            # Decode cart if needed
            extracted_path, fileinfo, al_meta = decode_file(
                out_file, fileinfo, IDENTIFY)
            if extracted_path:
                out_file = extracted_path

        # Alter filename and classification based on CaRT output
        meta_classification = al_meta.pop('classification',
                                          s_params['classification'])
        if meta_classification != s_params['classification']:
            try:
                s_params['classification'] = Classification.max_classification(
                    meta_classification, s_params['classification'])
            except InvalidClassification as ic:
                return make_api_response(
                    {},
                    "The classification found inside the cart file cannot be merged with "
                    f"the classification the file was submitted as: {str(ic)}",
                    400)
        name = al_meta.pop('name', name)

        # Validate ingest classification
        if not Classification.is_accessible(user['classification'],
                                            s_params['classification']):
            return make_api_response(
                {}, "You cannot start a submission with higher "
                "classification then you're allowed to see", 400)

        # Freshen file object
        expiry = now_as_iso(s_params['ttl'] * 24 * 60 *
                            60) if s_params.get('ttl', None) else None
        STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry,
                                     s_params['classification'])

        # Save the file to the filestore if needs be
        # also no need to test if exist before upload because it already does that
        if do_upload:
            FILESTORE.upload(out_file, fileinfo['sha256'], location='far')

        # Setup notification queue if needed
        if notification_queue:
            notification_params = {
                "queue": notification_queue,
                "threshold": notification_threshold
            }
        else:
            notification_params = {}

        # Load metadata, setup some default values if they are missing and append the cart metadata
        ingest_id = get_random_id()
        metadata = flatten(data.get("metadata", {}))
        metadata['ingest_id'] = ingest_id
        metadata['type'] = s_params['type']
        metadata.update(al_meta)
        if 'ts' not in metadata:
            metadata['ts'] = now_as_iso()
        metadata.update(extra_meta)

        # Set description if it does not exists
        s_params['description'] = s_params[
            'description'] or f"[{s_params['type']}] Inspection of file: {name}"

        # Create submission object
        try:
            submission_obj = Submission({
                "sid":
                ingest_id,
                "files": [{
                    'name': name,
                    'sha256': fileinfo['sha256'],
                    'size': fileinfo['size']
                }],
                "notification":
                notification_params,
                "metadata":
                metadata,
                "params":
                s_params
            })
        except (ValueError, KeyError) as e:
            return make_api_response({}, err=str(e), status_code=400)

        # Send submission object for processing
        ingest.push(submission_obj.as_primitives())
        submission_received(submission_obj)

        return make_api_response({"ingest_id": ingest_id})

    finally:
        # Cleanup files on disk
        try:
            if original_file and os.path.exists(original_file):
                os.unlink(original_file)
        except Exception:
            pass

        try:
            if extracted_path and os.path.exists(extracted_path):
                os.unlink(extracted_path)
        except Exception:
            pass

        try:
            if os.path.exists(out_dir):
                shutil.rmtree(out_dir, ignore_errors=True)
        except Exception:
            pass
コード例 #12
0
}

UNUSED = [
    'BASE64_ALPHABET',
    'DISPLAY_SEARCH_STRING',
    'DYNAMIC_MALICIOUSNESS',
    'DYNAMIC_MALWARE_PATTERN',
    'FILE_ATTRIBUTION',
    'FILE_EXTENSION',
    'FILE_MIMETYPE',
    'HEURISTIC',
    'REQUEST_SCORE',
    'REQUEST_USERNAME',
    'SERVICE_DESCRIPTION',
    'SERVICE_DISPLAY_NAME',
    'SERVICE_NAME',
]


def reverse_map(data: Dict) -> Dict:
    output = {}
    for k, v in data.items():
        for x in v:
            output[x] = k

    return output


v3_lookup_map = reverse_map(flatten(tag_map))
v3_lookup_map.update({k: None for k in UNUSED})
コード例 #13
0
    def service_finished(self, sid: str, result_key: str, result: Result,
                         temporary_data: Optional[Dict[str, Any]] = None):
        """Notifies the dispatcher of service completion, and possible new files to dispatch."""
        # Make sure the dispatcher knows we were working on this task
        task_key = ServiceTask.make_key(sid=sid, service_name=result.response.service_name, sha=result.sha256)
        task = self.running_tasks.pop(task_key)
        if not task:
            self.log.warning(f"[{sid}/{result.sha256}] {result.response.service_name} could not find the specified "
                             f"task in its set of running tasks while processing successful results.")
            return
        task = ServiceTask(task)

        # Save or freshen the result, the CONTENT of the result shouldn't change, but we need to keep the
        # most distant expiry time to prevent pulling it out from under another submission too early
        if result.is_empty():
            # Empty Result will not be archived therefore result.archive_ts drives their deletion
            self.ds.emptyresult.save(result_key, {"expiry_ts": result.archive_ts})
        else:
            while True:
                old, version = self.ds.result.get_if_exists(
                    result_key, archive_access=self.config.datastore.ilm.update_archive, version=True)
                if old:
                    if old.expiry_ts and result.expiry_ts:
                        result.expiry_ts = max(result.expiry_ts, old.expiry_ts)
                    else:
                        result.expiry_ts = None
                try:
                    self.ds.result.save(result_key, result, version=version)
                    break
                except VersionConflictException as vce:
                    self.log.info(f"Retrying to save results due to version conflict: {str(vce)}")

        # Send the result key to any watching systems
        msg = {'status': 'OK', 'cache_key': result_key}
        for w in self._get_watcher_list(task.sid).members():
            NamedQueue(w, host=self.redis).push(msg)

        # Save the tags
        tags = []
        for section in result.result.sections:
            tags.extend(tag_dict_to_list(flatten(section.tags.as_primitives())))

        # Pull out file names if we have them
        file_names = {}
        for extracted_data in result.response.extracted:
            if extracted_data.name:
                file_names[extracted_data.sha256] = extracted_data.name

        #
        dispatcher = task.metadata['dispatcher__']
        result_queue = self._get_queue_from_cache(DISPATCH_RESULT_QUEUE + dispatcher)
        ex_ts = result.expiry_ts.strftime(DATEFORMAT) if result.expiry_ts else result.archive_ts.strftime(DATEFORMAT)
        result_queue.push({
            # 'service_task': task.as_primitives(),
            # 'result': result.as_primitives(),
            'sid': task.sid,
            'sha256': result.sha256,
            'service_name': task.service_name,
            'service_version': result.response.service_version,
            'service_tool_version': result.response.service_tool_version,
            'archive_ts': result.archive_ts.strftime(DATEFORMAT),
            'expiry_ts': ex_ts,
            'result_summary': {
                'key': result_key,
                'drop': result.drop_file,
                'score': result.result.score,
                'children': [r.sha256 for r in result.response.extracted],
            },
            'tags': tags,
            'extracted_names': file_names,
            'temporary_data': temporary_data
        })
コード例 #14
0
    def _handle_task_result(self, exec_time: int, task: ServiceTask,
                            result: Dict[str, Any], client_id, service_name,
                            freshen: bool, metric_factory):
        def freshen_file(file_info_list, item):
            file_info = file_info_list.get(item['sha256'], None)
            if file_info is None or not self.filestore.exists(item['sha256']):
                return True
            else:
                file_info['archive_ts'] = archive_ts
                file_info['expiry_ts'] = expiry_ts
                file_info['classification'] = item['classification']
                self.datastore.save_or_freshen_file(
                    item['sha256'],
                    file_info,
                    file_info['expiry_ts'],
                    file_info['classification'],
                    is_section_image=item.get('is_section_image', False))
            return False

        archive_ts = now_as_iso(self.config.datastore.ilm.days_until_archive *
                                24 * 60 * 60)
        if task.ttl:
            expiry_ts = now_as_iso(task.ttl * 24 * 60 * 60)
        else:
            expiry_ts = None

        # Check if all files are in the filestore
        if freshen:
            missing_files = []
            hashes = list(
                set([
                    f['sha256'] for f in result['response']['extracted'] +
                    result['response']['supplementary']
                ]))
            file_infos = self.datastore.file.multiget(hashes,
                                                      as_obj=False,
                                                      error_on_missing=False)

            with elasticapm.capture_span(
                    name="handle_task_result.freshen_files",
                    span_type="tasking_client"):
                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=5) as executor:
                    res = {
                        f['sha256']: executor.submit(freshen_file, file_infos,
                                                     f)
                        for f in result['response']['extracted'] +
                        result['response']['supplementary']
                    }
                for k, v in res.items():
                    if v.result():
                        missing_files.append(k)

            if missing_files:
                return missing_files

        # Add scores to the heuristics, if any section set a heuristic
        with elasticapm.capture_span(
                name="handle_task_result.process_heuristics",
                span_type="tasking_client"):
            total_score = 0
            for section in result['result']['sections']:
                zeroize_on_sig_safe = section.pop('zeroize_on_sig_safe', True)
                section['tags'] = flatten(section['tags'])
                if section.get('heuristic'):
                    heur_id = f"{service_name.upper()}.{str(section['heuristic']['heur_id'])}"
                    section['heuristic']['heur_id'] = heur_id
                    try:
                        section[
                            'heuristic'], new_tags = self.heuristic_handler.service_heuristic_to_result_heuristic(
                                section['heuristic'], self.heuristics,
                                zeroize_on_sig_safe)
                        for tag in new_tags:
                            section['tags'].setdefault(tag[0], [])
                            if tag[1] not in section['tags'][tag[0]]:
                                section['tags'][tag[0]].append(tag[1])
                        total_score += section['heuristic']['score']
                    except InvalidHeuristicException:
                        section['heuristic'] = None

        # Update the total score of the result
        result['result']['score'] = total_score

        # Add timestamps for creation, archive and expiry
        result['created'] = now_as_iso()
        result['archive_ts'] = archive_ts
        result['expiry_ts'] = expiry_ts

        # Pop the temporary submission data
        temp_submission_data = result.pop('temp_submission_data', None)
        if temp_submission_data:
            old_submission_data = {
                row.name: row.value
                for row in task.temporary_submission_data
            }
            temp_submission_data = {
                k: v
                for k, v in temp_submission_data.items()
                if k not in old_submission_data or v != old_submission_data[k]
            }
            big_temp_data = {
                k: len(str(v))
                for k, v in temp_submission_data.items()
                if len(str(v)) > self.config.submission.max_temp_data_length
            }
            if big_temp_data:
                big_data_sizes = [f"{k}={v}" for k, v in big_temp_data.items()]
                self.log.warning(
                    f"[{task.sid}] The following temporary submission keys where ignored because they are "
                    "bigger then the maximum data size allowed "
                    f"[{self.config.submission.max_temp_data_length}]: {' | '.join(big_data_sizes)}"
                )
                temp_submission_data = {
                    k: v
                    for k, v in temp_submission_data.items()
                    if k not in big_temp_data
                }

        # Process the tag values
        with elasticapm.capture_span(name="handle_task_result.process_tags",
                                     span_type="tasking_client"):
            for section in result['result']['sections']:
                # Perform tag safelisting
                tags, safelisted_tags = self.tag_safelister.get_validated_tag_map(
                    section['tags'])
                section['tags'] = unflatten(tags)
                section['safelisted_tags'] = safelisted_tags

                section['tags'], dropped = construct_safe(
                    Tagging, section.get('tags', {}))

                # Set section score to zero and lower total score if service is set to zeroize score
                # and all tags were safelisted
                if section.pop('zeroize_on_tag_safe', False) and \
                        section.get('heuristic') and \
                        len(tags) == 0 and \
                        len(safelisted_tags) != 0:
                    result['result']['score'] -= section['heuristic']['score']
                    section['heuristic']['score'] = 0

                if dropped:
                    self.log.warning(
                        f"[{task.sid}] Invalid tag data from {service_name}: {dropped}"
                    )

        result = Result(result)
        result_key = result.build_key(
            service_tool_version=result.response.service_tool_version,
            task=task)
        self.dispatch_client.service_finished(task.sid, result_key, result,
                                              temp_submission_data)

        # Metrics
        if result.result.score > 0:
            metric_factory.increment('scored')
        else:
            metric_factory.increment('not_scored')

        self.log.info(
            f"[{task.sid}] {client_id} - {service_name} "
            f"successfully completed task {f' in {exec_time}ms' if exec_time else ''}"
        )

        self.status_table.set(
            client_id, (service_name, ServiceStatus.Idle, time.time() + 5))
コード例 #15
0
    def get_summary_from_keys(self,
                              keys,
                              cl_engine=forge.get_classification(),
                              user_classification=None):
        out = {
            "tags": [],
            "attack_matrix": [],
            "heuristics": {
                "info": [],
                "suspicious": [],
                "malicious": []
            },
            "classification": cl_engine.UNRESTRICTED,
            "filtered": False
        }
        done_map = {"heuristics": set(), "attack": set(), "tags": set()}

        if len(keys) == 0:
            return out

        keys = [x for x in list(keys) if not x.endswith(".e")]
        file_keys = list(set([x[:64] for x in keys]))
        try:
            items = self.result.multiget(keys, as_obj=False)
        except MultiKeyError as e:
            # Generate partial summaries even if results are missing
            log.warning(
                f"Trying to generate summary but we are missing result(s): {str(e.keys)}"
            )
            items = e.partial_output
            out['missing_results'] = e.keys
        try:
            files = self.file.multiget(file_keys, as_obj=False)
        except MultiKeyError as e:
            # Generate partial summaries even if results are missing
            log.warning(
                f"Trying to generate summary but we are missing file(s): {str(e.keys)}"
            )
            files = e.partial_output
            out['missing_files'] = e.keys

        for key, item in items.items():
            for section in item.get('result', {}).get('sections', []):
                file_classification = files.get(key[:64], {}).get(
                    'classification', section['classification'])
                if user_classification:
                    if not cl_engine.is_accessible(user_classification,
                                                   section['classification']):
                        out["filtered"] = True
                        continue
                    if not cl_engine.is_accessible(user_classification,
                                                   file_classification):
                        out["filtered"] = True
                        continue

                out["classification"] = cl_engine.max_classification(
                    out["classification"], section['classification'])
                out["classification"] = cl_engine.max_classification(
                    out["classification"], file_classification)

                h_type = "info"

                if section.get('heuristic', False):
                    # Get the heuristics data
                    if section['heuristic']['score'] < 100:
                        h_type = "info"
                    elif section['heuristic']['score'] < 1000:
                        h_type = "suspicious"
                    else:
                        h_type = "malicious"

                    cache_key = f"{section['heuristic']['heur_id']}_{key}"
                    if cache_key not in done_map['heuristics']:
                        out['heuristics'][h_type].append({
                            'heur_id':
                            section['heuristic']['heur_id'],
                            'name':
                            section['heuristic']['name'],
                            'key':
                            key
                        })
                        done_map['heuristics'].add(cache_key)

                    for attack in section['heuristic'].get('attack', []):
                        # Get attack matrix data
                        attack_id = attack['attack_id']

                        cache_key = f"{attack_id}_{key}"
                        if cache_key not in done_map['attack']:
                            out['attack_matrix'].append({
                                "key":
                                key,
                                "attack_id":
                                attack_id,
                                "h_type":
                                h_type,
                                "name":
                                attack['pattern'],
                                "categories":
                                attack['categories']
                            })
                            done_map['attack'].add(cache_key)

                # Get tagging data
                for tag_type, tags in flatten(section.get('tags', {})).items():
                    if tags is not None:
                        for tag in tags:
                            cache_key = f"{tag_type}_{tag}_{key}"

                            if cache_key not in done_map['tags']:
                                out['tags'].append({
                                    'type':
                                    tag_type,
                                    'h_type':
                                    h_type,
                                    'short_type':
                                    tag_type.rsplit(".", 1)[-1],
                                    'value':
                                    tag,
                                    'key':
                                    key
                                })
                                done_map['tags'].add(cache_key)

        return out
コード例 #16
0
 def validate_tags(tag_map):
     tag_map, _ = construct_safe(Tagging, unflatten(tag_map))
     tag_map = flatten(tag_map.as_primitives(strip_null=True))
     return tag_map
コード例 #17
0
def submit(**kwargs):
    """
    Submit a single file, sha256 or url for analysis

        Note 1:
            If you are submitting a sh256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

    Variables:
    None
    
    Arguments: 
    None
    
    Data Block (SHA256 or URL):
    {
      // REQUIRED: One of the two following
      "sha256": "123...DEF",      # SHA256 hash of the file already in the datastore
      "url": "http://...",        # Url to fetch the file from

      // OPTIONAL VALUES
      "name": "file.exe",         # Name of the file to scan otherwise the sha256 or base file of the url

      "metadata": {               # Submission metadata
        "key": val,                 # Key/Value pair metadata values
      },

      "params": {                 # Submission parameters
        "key": val,                 # Key/Value pair for params that different then defaults
      },                            # Default params can be fetch at /api/v3/user/submission_params/<user>/
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"metadata": {"hello": "world"}}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission


    Result example:
    <Submission message object as a json dictionary>
    """
    user = kwargs['user']
    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())

    with forge.get_filestore() as f_transport:
        try:
            # Get data block and binary blob
            if 'multipart/form-data' in request.content_type:
                if 'json' in request.values:
                    data = json.loads(request.values['json'])
                else:
                    data = {}
                binary = request.files['bin']
                name = data.get("name", binary.filename)
                sha256 = None
                url = None
            elif 'application/json' in request.content_type:
                data = request.json
                binary = None
                sha256 = data.get('sha256', None)
                url = data.get('url', None)
                name = data.get(
                    "name", None) or sha256 or os.path.basename(url) or None
            else:
                return make_api_response({}, "Invalid content type", 400)

            if data is None:
                return make_api_response({}, "Missing data block", 400)

            if not name:
                return make_api_response({}, "Filename missing", 400)

            name = os.path.basename(name)
            if not name:
                return make_api_response({}, "Invalid filename", 400)

            # Create task object
            if "ui_params" in data:
                s_params = ui_to_submission_params(data['ui_params'])
            else:
                s_params = ui_to_submission_params(
                    STORAGE.user_settings.get(user['uname'], as_obj=False))

            if not s_params:
                s_params = get_default_user_settings(user)

            s_params.update(data.get("params", {}))
            if 'groups' not in s_params:
                s_params['groups'] = user['groups']

            s_params['quota_item'] = True
            s_params['submitter'] = user['uname']
            if not s_params['description']:
                s_params['description'] = "Inspection of file: %s" % name

            if not Classification.is_accessible(user['classification'],
                                                s_params['classification']):
                return make_api_response(
                    {}, "You cannot start a scan with higher "
                    "classification then you're allowed to see", 400)

            # Prepare the output directory
            try:
                os.makedirs(out_dir)
            except Exception:
                pass
            out_file = os.path.join(out_dir, name)

            # Get the output file
            extra_meta = {}
            if not binary:
                if sha256:
                    if f_transport.exists(sha256):
                        f_transport.download(sha256, out_file)
                    else:
                        return make_api_response(
                            {}, "SHA256 does not exist in our datastore", 404)
                else:
                    if url:
                        if not config.ui.allow_url_submissions:
                            return make_api_response(
                                {},
                                "URL submissions are disabled in this system",
                                400)

                        try:
                            safe_download(url, out_file)
                            extra_meta['submitted_url'] = url
                        except FileTooBigException:
                            return make_api_response(
                                {}, "File too big to be scanned.", 400)
                        except InvalidUrlException:
                            return make_api_response(
                                {}, "Url provided is invalid.", 400)
                        except ForbiddenLocation:
                            return make_api_response(
                                {}, "Hostname in this URL cannot be resolved.",
                                400)
                    else:
                        return make_api_response(
                            {},
                            "Missing file to scan. No binary, sha256 or url provided.",
                            400)
            else:
                with open(out_file, "wb") as my_file:
                    my_file.write(binary.read())

            try:
                metadata = flatten(data.get('metadata', {}))
                metadata.update(extra_meta)

                submission_obj = Submission({
                    "files": [],
                    "metadata": metadata,
                    "params": s_params
                })
            except (ValueError, KeyError) as e:
                return make_api_response("", err=str(e), status_code=400)

            # Submit the task to the system
            try:
                result = SubmissionClient(datastore=STORAGE,
                                          filestore=f_transport,
                                          config=config).submit(
                                              submission_obj,
                                              local_files=[out_file],
                                              cleanup=False)
            except SubmissionException as e:
                return make_api_response("", err=str(e), status_code=400)

            return make_api_response(result.as_primitives())

        finally:
            try:
                # noinspection PyUnboundLocalVariable
                os.unlink(out_file)
            except Exception:
                pass

            try:
                shutil.rmtree(out_dir, ignore_errors=True)
            except Exception:
                pass
コード例 #18
0
    def submit(self,
               submission_obj: SubmissionObject,
               local_files: List = None,
               completed_queue=None):
        """Submit several files in a single submission.

        After this method runs, there should be no local copies of the file left.
        """
        if local_files is None:
            local_files = []

        if len(submission_obj.files) == 0 and len(local_files) == 0:
            raise SubmissionException("No files found to submit...")

        if submission_obj.params.ttl:
            expiry = epoch_to_iso(submission_obj.time.timestamp() +
                                  submission_obj.params.ttl * 24 * 60 * 60)
        else:
            expiry = None
        max_size = self.config.submission.max_file_size

        for local_file in local_files:
            # Upload/download, extract, analyze files
            original_classification = str(submission_obj.params.classification)
            file_hash, size, new_metadata = self._ready_file(
                local_file, expiry, original_classification)
            new_name = new_metadata.pop('name',
                                        safe_str(os.path.basename(local_file)))
            meta_classification = new_metadata.pop('classification',
                                                   original_classification)
            if meta_classification != original_classification:
                try:
                    submission_obj.params.classification = Classification.max_classification(
                        meta_classification, original_classification)
                except InvalidClassification as ic:
                    raise SubmissionException(
                        "The classification found inside the cart file cannot be merged with "
                        f"the classification the file was submitted as: {str(ic)}"
                    )

            submission_obj.metadata.update(**flatten(new_metadata))

            # Check that after we have resolved exactly what to pass on, that it
            # remains a valid target for scanning
            if size > max_size and not submission_obj.params.ignore_size:
                msg = "File too large (%d > %d). Submission failed" % (
                    size, max_size)
                raise SubmissionException(msg)
            elif size == 0:
                msg = "File empty. Submission failed"
                raise SubmissionException(msg)

            submission_obj.files.append(
                File({
                    'name': new_name,
                    'size': size,
                    'sha256': file_hash,
                }))

        # Clearing runtime_excluded on initial submit or resubmit
        submission_obj.params.services.runtime_excluded = []

        # We should now have all the information we need to construct a submission object
        sub = Submission(
            dict(
                archive_ts=now_as_iso(
                    self.config.datastore.ilm.days_until_archive * 24 * 60 *
                    60),
                classification=submission_obj.params.classification,
                error_count=0,
                errors=[],
                expiry_ts=expiry,
                file_count=len(submission_obj.files),
                files=submission_obj.files,
                max_score=0,
                metadata=submission_obj.metadata,
                params=submission_obj.params,
                results=[],
                sid=submission_obj.sid,
                state='submitted',
                scan_key=submission_obj.scan_key,
            ))

        if self.config.ui.allow_malicious_hinting and submission_obj.params.malicious:
            sub.verdict = {"malicious": [submission_obj.params.submitter]}

        self.datastore.submission.save(sub.sid, sub)

        self.log.debug("Submission complete. Dispatching: %s", sub.sid)
        self.dispatcher.dispatch_submission(sub,
                                            completed_queue=completed_queue)

        return sub
コード例 #19
0
    def submit(self, submission_obj: SubmissionObject, local_files: List = None, cleanup=True, completed_queue=None):
        """Submit several files in a single submission.

        After this method runs, there should be no local copies of the file left.
        """
        if local_files is None:
            local_files = []

        try:
            expiry = now_as_iso(submission_obj.params.ttl * 24 * 60 * 60) if submission_obj.params.ttl else None
            max_size = self.config.submission.max_file_size

            if len(submission_obj.files) == 0:
                if len(local_files) == 0:
                    raise SubmissionException("No files found to submit...")

                for local_file in local_files:
                    # Upload/download, extract, analyze files
                    file_hash, size, new_metadata = self._ready_file(local_file, expiry,
                                                                     str(submission_obj.params.classification),
                                                                     cleanup, upload=True)
                    new_name = new_metadata.pop('name', safe_str(os.path.basename(local_file)))
                    submission_obj.params.classification = new_metadata.pop('classification',
                                                                            submission_obj.params.classification)
                    submission_obj.metadata.update(**flatten(new_metadata))

                    # Check that after we have resolved exactly what to pass on, that it
                    # remains a valid target for scanning
                    if size > max_size and not submission_obj.params.ignore_size:
                        msg = "File too large (%d > %d). Submission failed" % (size, max_size)
                        raise SubmissionException(msg)
                    elif size == 0:
                        msg = "File empty. Submission failed"
                        raise SubmissionException(msg)

                    submission_obj.files.append(File({
                        'name': new_name,
                        'size': size,
                        'sha256': file_hash,
                    }))
            else:
                for f in submission_obj.files:
                    temporary_path = None
                    try:
                        fd, temporary_path = tempfile.mkstemp(prefix="submission.submit")
                        os.close(fd)  # We don't need the file descriptor open
                        self.filestore.download(f.sha256, temporary_path)
                        file_hash, size, new_metadata = self._ready_file(temporary_path, expiry,
                                                                         str(submission_obj.params.classification),
                                                                         cleanup, sha256=f.sha256)

                        new_name = new_metadata.pop('name', f.name)
                        submission_obj.params.classification = new_metadata.pop('classification',
                                                                                submission_obj.params.classification)
                        submission_obj.metadata.update(**flatten(new_metadata))

                        # Check that after we have resolved exactly what to pass on, that it
                        # remains a valid target for scanning
                        if size > max_size and not submission_obj.params.ignore_size:
                            msg = "File too large (%d > %d). Submission failed" % (size, max_size)
                            raise SubmissionException(msg)
                        elif size == 0:
                            msg = "File empty. Submission failed"
                            raise SubmissionException(msg)

                        if f.size is None:
                            f.size = size

                        f.name = new_name
                        f.sha256 = file_hash

                    finally:
                        if temporary_path:
                            if os.path.exists(temporary_path):
                                os.unlink(temporary_path)

            # Initialize the temporary data from the submission parameter
            if submission_obj.params.initial_data:
                try:
                    temp_hash_name = get_temporary_submission_data_name(submission_obj.sid,
                                                                        submission_obj.files[0].sha256)
                    temporary_submission_data = ExpiringHash(temp_hash_name, host=self.redis)
                    temporary_submission_data.multi_set(json.loads(submission_obj.params.initial_data))
                except ValueError as err:
                    self.log.warning(f"[{submission_obj.sid}] could not process initialization data: {err}")

            # Clearing runtime_excluded on initial submit or resubmit
            submission_obj.params.services.runtime_excluded = []

            # We should now have all the information we need to construct a submission object
            sub = Submission(dict(
                archive_ts=now_as_iso(self.config.datastore.ilm.days_until_archive * 24 * 60 * 60),
                classification=submission_obj.params.classification,
                error_count=0,
                errors=[],
                expiry_ts=expiry,
                file_count=len(submission_obj.files),
                files=submission_obj.files,
                max_score=0,
                metadata=submission_obj.metadata,
                params=submission_obj.params,
                results=[],
                sid=submission_obj.sid,
                state='submitted'
            ))
            self.datastore.submission.save(sub.sid, sub)

            self.log.debug("Submission complete. Dispatching: %s", sub.sid)
            self.dispatcher.dispatch_submission(sub, completed_queue=completed_queue)

            return sub
        finally:
            # Just in case this method fails clean up local files
            if cleanup:
                for path in local_files:
                    if path and os.path.exists(path):
                        # noinspection PyBroadException
                        try:
                            os.unlink(path)
                        except Exception:
                            self.log.error("Couldn't delete dangling file %s", path)