Exemple #1
0
    def handle_artefacts(artefact_list: list,
                         request: ServiceRequest) -> ResultSection:
        """
        Goes through each artefact in artefact_list, uploading them and adding result sections accordingly

        Positional arguments:
        artefact_list -- list of dictionaries that each represent an artefact
        """

        validated_artefacts = SandboxOntology._validate_artefacts(
            artefact_list)

        artefacts_result_section = ResultSection("Sandbox Artefacts")

        for artefact in validated_artefacts:
            SandboxOntology._handle_artefact(artefact,
                                             artefacts_result_section)

            if artefact.to_be_extracted:
                try:
                    request.add_extracted(artefact.path, artefact.name,
                                          artefact.description)
                except MaxExtractedExceeded:
                    # To avoid errors from being raised when too many files have been extracted
                    pass
            else:
                request.add_supplementary(artefact.path, artefact.name,
                                          artefact.description)

        return artefacts_result_section if artefacts_result_section.subsections else None
    def extract_powershell(self, parameter: str, section: ResultSection,
                           request: ServiceRequest) -> None:
        """Searches parameter for PowerShell, adds as extracted if found

        Args:
            parameter: String to be searched
            section: Section to be modified if PowerShell found
        """

        matches = find_powershell_strings(parameter.encode())

        if not matches:
            return

        self.found_powershell = True

        for match in matches:
            powershell_command = get_powershell_command(match.value)
            sha256hash = hashlib.sha256(powershell_command).hexdigest()
            # Add PowerShell code as extracted, account for duplicates
            if sha256hash not in self.file_hashes:
                powershell_filename = f"{sha256hash[0:10]}.ps1"
                ResultSection(
                    "Discovered PowerShell code in parameter.",
                    parent=section,
                    body=powershell_command[:100].decode() +
                    f"... see [{powershell_filename}]",
                )
                powershell_file_path = os.path.join(self.working_directory,
                                                    powershell_filename)
                with open(powershell_file_path, "wb") as f:
                    f.write(powershell_command)
                request.add_extracted(
                    powershell_file_path, powershell_filename,
                    "Discovered PowerShell code in parameter")
                self.file_hashes.append(sha256hash)
    def dexray(self, request: ServiceRequest, local: str):
        """Iterate through quarantine decrypt methods.
        Args:
            request: AL request object.
            local: File path of AL sample.
        Returns:
            True if archive is password protected, and number of white-listed embedded files.
        """
        encoding = request.file_type.replace("quarantine/", "")
        extracted = []
        metadata = {}

        # Try all extracting methods
        for extract_method in self.extract_methods:
            # noinspection PyArgumentList
            extracted, metadata = extract_method(local, self.sha,
                                                 self.working_directory,
                                                 encoding)
            if extracted or metadata:
                break

        extracted_count = len(extracted)
        # safe_str the file name (fn)
        extracted = [[fp, safe_str(fn), e] for fp, fn, e in extracted]
        for child in extracted:
            try:
                # If the file is not successfully added as extracted, then decrease the extracted file counter
                if not request.add_extracted(*child):
                    extracted_count -= 1
            except MaxExtractedExceeded:
                raise MaxExtractedExceeded(
                    f"This file contains {extracted_count} extracted files, exceeding the "
                    f"maximum of {request.max_extracted} extracted files allowed. "
                    "None of the files were extracted.")

        return metadata
Exemple #4
0
    def _handle_subanalyses(self, request: ServiceRequest, sha256: str,
                            analysis_id: str, file_verdict_map: Dict[str, str],
                            parent_section: ResultSection) -> None:
        """
        This method handles the subanalyses for a given analysis ID
        :param request: The service request object
        :param sha256: The hash of the given file
        :param analysis_id: The ID for the analysis which we will be retrieving
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_result_section: The result section that the network
        result section will be added to, if applicable
        :return: None
        """
        so = SandboxOntology()

        # This boolean is used to determine if we should try to download another file
        can_we_download_files = True

        # These sets will be used as we work through the process trees
        process_path_set = set()
        command_line_set = set()

        # Now let's get into the subanalyses for this sample
        sub_analyses = self.client.get_sub_analyses_by_id(analysis_id)

        for sub in sub_analyses:
            sub_analysis_id = sub["sub_analysis_id"]

            # Get the extraction info, which is basically the details of how the subanalysis object came to be
            extraction_info = sub.pop("extraction_info", None)

            # Processes is only present when the sample has undergone dynamic execution
            if extraction_info and "processes" not in extraction_info:
                extraction_info = None

            code_reuse = self.client.get_sub_analysis_code_reuse_by_id(
                analysis_id, sub_analysis_id)

            if code_reuse:
                families = code_reuse.pop("families", [])
            else:
                families = []

            if not families and not extraction_info:
                # Otherwise, boring!
                continue

            if families and not any(family["reused_gene_count"] > 1
                                    for family in families):
                # Most likely a false positive
                continue

            ###
            # If we have gotten to this point, then the sub analysis is worth reporting
            ###

            extraction_method = sub["source"].replace("_", " ")

            if extraction_method != "root":
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}, extracted via {extraction_method}"
                )
            else:
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}")

            metadata = self.client.get_sub_analysis_metadata_by_id(
                analysis_id, sub_analysis_id)
            processed_subanalysis = self._process_details(
                metadata.copy(), UNINTERESTING_SUBANALYSIS_KEYS)
            sub_kv_section.update_items(processed_subanalysis)
            parent_section.add_subsection(sub_kv_section)

            if code_reuse:
                code_reuse_kv_section = ResultKeyValueSection(
                    "Code reuse detected")
                code_reuse_kv_section.update_items(code_reuse)
                sub_kv_section.add_subsection(code_reuse_kv_section)

            sub_sha256 = sub["sha256"]
            if families:
                self._process_families(families, sub_sha256, file_verdict_map,
                                       sub_kv_section)

            if extraction_info:
                self._process_extraction_info(extraction_info["processes"],
                                              process_path_set,
                                              command_line_set, so)

            # Setting a heuristic here or downloading the file would be redundant if the hash matched the original file
            if sub_sha256 != sha256:
                self._set_heuristic_by_verdict(
                    sub_kv_section, file_verdict_map.get(sub_sha256))

                if can_we_download_files:
                    file_was_downloaded = self.client.download_file_by_sha256(
                        sub_sha256, self.working_directory)
                    if file_was_downloaded:
                        path = f"{self.working_directory}/{sub_sha256}.sample"
                        request.add_extracted(
                            path,
                            f"{sub_sha256}.sample",
                            f"Extracted via {extraction_method}",
                        )
                        self.log.debug(
                            f"Added {sub_sha256}.sample as an extracted file.")
                    else:
                        can_we_download_files = False

        process_tree_section = so.get_process_tree_result_section()
        for process_path in process_path_set:
            process_tree_section.add_tag("dynamic.process.file_name",
                                         process_path)
        for command_line in command_line_set:
            process_tree_section.add_tag("dynamic.process.command_line",
                                         command_line)
        if process_tree_section.body:
            parent_section.add_subsection(process_tree_section)
    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )
    def check_for_b64(self, data: str, section: ResultSection,
                      request: ServiceRequest, file_contents: bytes) -> bool:
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: base64 subsection, must have heuristic set

        Returns:
            decoded: Boolean which is true if base64 found
        """
        assert section.heuristic

        decoded_param = data
        decoded = False

        encoded_data = data.encode()
        for content, start, end in find_base64(encoded_data):
            if encoded_data[start:end] in file_contents:
                # Present in original file, not an intermediate IoC
                continue
            try:
                # Powershell base64 will be utf-16
                content = content.decode("utf-16").encode()
            except UnicodeDecodeError:
                pass
            try:
                if len(content) < FILE_PARAMETER_SIZE:
                    decoded_param = decoded_param[:
                                                  start] + " " + content.decode(
                                                      errors="ignore"
                                                  ) + decoded_param[end:]
                else:
                    b64hash = ""
                    pe_files = find_pe_files(content)
                    for pe_file in pe_files:
                        b64hash = hashlib.sha256(pe_file).hexdigest()
                        pe_path = os.path.join(self.working_directory, b64hash)
                        with open(pe_path, "wb") as f:
                            f.write(pe_file)
                        request.add_extracted(
                            pe_path, b64hash,
                            "PE file found in base64 encoded parameter")
                        section.heuristic.add_signature_id("pe_file")
                    if not pe_files:
                        b64hash = hashlib.sha256(content).hexdigest()
                        content_path = os.path.join(self.working_directory,
                                                    b64hash)
                        with open(content_path, "wb") as f:
                            f.write(content)
                        request.add_extracted(
                            content_path, b64hash,
                            "Large base64 encoded parameter")
                        section.heuristic.add_signature_id("possible_file")
                    decoded_param = decoded_param[:
                                                  start] + f"[See extracted file {b64hash}]" + decoded_param[
                                                      end:]
                decoded = True
            except Exception:
                pass

        if decoded:
            section.heuristic.increment_frequency()
            section.add_line(
                f"Possible Base64 {truncate(data)} decoded: {decoded_param}")
            self.find_ip(decoded_param)

        return decoded
Exemple #7
0
    def execute(self, request: ServiceRequest) -> None:
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values and request.get_param('extract_original_iocs'):
            ioc_res = ResultSection(
                "The following IOCs were found in the original file",
                parent=request.result,
                body_format=BODY_FORMAT.MEMORY_DUMP)
            for k, val in pat_values.items():
                for v in val:
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                        )
                        ioc_res.add_tag(k, v)

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list: List[Tuple[str, bytes]] = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if regex.match(regex.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            with ThreadPoolExecutor() as executor:
                threads = [
                    executor.submit(technique, layer)
                    for name, technique in techniques
                ]
                results = [thread.result() for thread in threads]
                for i in range(len(results)):
                    result = results[i]
                    if result:
                        layers_list.append((techniques[i][0], result))
                        # Looks like it worked, restart with new layer
                        layer = result
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    with ThreadPoolExecutor() as executor:
                        threads = [
                            executor.submit(technique, layer)
                            for name, technique in final_pass
                        ]
                        results = [thread.result() for thread in threads]
                        for i in range(len(results)):
                            result = results[i]
                            if result:
                                layers_list.append((techniques[i][0], result))
                    break
                for x in second_pass:
                    techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            else:  # num_layers >= 100
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags: Dict[str, List[bytes]] = {}

                for uri in pat_values.get('network.static.uri', []):
                    # Compare URIs without query string
                    uri = uri.split(b'?', 1)[0]
                    if uri not in request.file_contents:
                        diff_tags.setdefault('network.static.uri', [])
                        diff_tags['network.static.uri'].append(uri)

                if request.deep_scan or (len(clean) > 1000
                                         and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    file_name = f"{os.path.basename(request.file_name)}_decoded_final"
                    file_path = os.path.join(self.working_directory, file_name)
                    # Ensure directory exists before write
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    with open(file_path, 'wb+') as f:
                        f.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {file_path}"
                        )
                    request.add_extracted(file_path, file_name,
                                          "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for extracted in self.files_extracted:
                        file_name = os.path.basename(extracted)
                        ext_file_res.add_line(file_name)
                        request.add_extracted(
                            extracted, file_name,
                            "File of interest deobfuscated from sample")
Exemple #8
0
    def execute(self, request: ServiceRequest) -> None:
        request.result = Result()

        # 1. Calculate entropy map
        with open(request.file_path, "rb") as fin:
            (entropy, part_entropies) = calculate_partition_entropy(fin)

        entropy_graph_data = {"type": "colormap", "data": {"domain": [0, 8], "values": part_entropies}}

        ResultSection(
            f"File entropy: {round(entropy, 3)}",
            parent=request.result,
            body_format=BODY_FORMAT.GRAPH_DATA,
            body=json.dumps(entropy_graph_data, allow_nan=False),
        )

        if request.file_type != "shortcut/windows":
            # 2. Get hachoir metadata
            parser = createParser(request.file_path)
            if parser is not None:
                with parser:
                    parser_tags = parser.getParserTags()
                    parser_id = parser_tags.get("id", "unknown")

                    # Do basic metadata extraction
                    metadata = extractMetadata(parser, 1)

                    if metadata:
                        kv_body: Dict[str, Union[str, List[str]]] = {}
                        tags: List[Tuple[str, str]] = []
                        for m in metadata:
                            if m.key == "comment":
                                for v in m.values:
                                    key, val = get_type_val(v.text, "comment")
                                    if not val:
                                        continue

                                    kv_body[key] = val

                                    tag_type = TAG_MAP.get(parser_id, {}).get(key, None) or TAG_MAP.get(None, {}).get(
                                        key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, val))
                            elif m.key in ["mime_type"]:
                                pass
                            else:
                                values = [v.text for v in m.values]
                                if len(values) == 1 and values[0]:
                                    kv_body[m.key] = values[0]
                                elif values:
                                    kv_body[m.key] = values

                                for v in values:
                                    tag_type = TAG_MAP.get(parser_id, {}).get(m.key, None) or TAG_MAP.get(None, {}).get(
                                        m.key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, v))

                        if kv_body:
                            res = ResultSection(
                                f"Metadata extracted by hachoir-metadata [Parser: {parser_id}]",
                                body=json.dumps(kv_body, allow_nan=False),
                                body_format=BODY_FORMAT.KEY_VALUE,
                                parent=request.result,
                            )

                            for t_type, t_val in tags:
                                res.add_tag(t_type, t_val)

        # 3. Get Exiftool Metadata
        exif = subprocess.run(["exiftool", "-j", request.file_path], capture_output=True, check=False)
        if exif.stdout:
            exif_data = json.loads(exif.stdout.decode("utf-8", errors="ignore"))
            res_data = exif_data[0]
            if "Error" not in res_data:
                exif_body = {}
                for k, v in res_data.items():
                    if v and k not in [
                        "SourceFile",
                        "ExifToolVersion",
                        "FileName",
                        "Directory",
                        "FileSize",
                        "FileModifyDate",
                        "FileAccessDate",
                        "FileInodeChangeDate",
                        "FilePermissions",
                        "FileType",
                        "FileTypeExtension",
                        "MIMEType",
                        "Warning",
                    ]:
                        if v in [float("inf"), -float("inf"), float("nan")]:
                            exif = subprocess.run(
                                ["exiftool", f"-{k}", "-T", request.file_path], capture_output=True, check=False
                            )
                            v = exif.stdout.decode("utf-8", errors="ignore").strip()
                        exif_body[build_key(k)] = v
                if exif_body:
                    e_res = ResultSection(
                        "Metadata extracted by ExifTool",
                        body=json.dumps(exif_body, allow_nan=False),
                        body_format=BODY_FORMAT.KEY_VALUE,
                        parent=request.result,
                    )
                    for k, v in exif_body.items():
                        tag_type = TAG_MAP.get(res_data.get("FileTypeExtension", "UNK").upper(), {}).get(
                            k, None
                        ) or TAG_MAP.get(None, {}).get(k, None)
                        if tag_type:
                            e_res.add_tag(tag_type, v)

        # 4. Lnk management.
        if request.file_type == "shortcut/windows":
            with open(request.file_path, "rb") as indata:
                lnk = LnkParse3.lnk_file(indata)

            features = lnk.get_json(get_all=True)

            lnk_result_section = ResultSection(
                "Extra metadata extracted by LnkParse3",
                parent=request.result,
            )

            heur_1_items = {}
            risky_executable = ["rundll32.exe", "powershell.exe", "cmd.exe", "mshta.exe"]

            if "command_line_arguments" in features["data"]:
                if any(x in features["data"]["command_line_arguments"].lower() for x in risky_executable):
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
                elif " && " in features["data"]["command_line_arguments"]:
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]

            lbp = ""
            if "local_base_path" in features["link_info"]:
                lbp = features["link_info"]["local_base_path"]
                if "common_path_suffix" in features["link_info"]:
                    lbp = f"{lbp}{features['link_info']['common_path_suffix']}"
                if any(x in lbp.lower() for x in risky_executable):
                    heur_1_items["local_base_path"] = features["link_info"]["local_base_path"]

            if "relative_path" in features["data"]:
                if any(x in features["data"]["relative_path"].lower() for x in risky_executable):
                    heur_1_items["relative_path"] = features["data"]["relative_path"]

            target = ""
            if "target" in features:
                import ntpath

                if "items" in features["target"]:
                    last_item = None
                    for item in features["target"]["items"]:
                        if "primary_name" in item:
                            last_item = item
                            target = ntpath.join(target, item["primary_name"])

                    if last_item and last_item["flags"] == "Is directory":
                        target = ""

                    if any(x in target.lower() for x in risky_executable):
                        heur_1_items["target_file_dosname"] = target

            if "icon_location" in features["data"]:
                deceptive_icons = ["wordpad.exe", "shell32.dll"]

                lnk_result_section.add_tag(
                    tag_type="file.shortcut.icon_location", value=features["data"]["icon_location"]
                )
                if any(
                    features["data"]["icon_location"].lower().strip('"').strip("'").endswith(x) for x in deceptive_icons
                ):
                    heur = Heuristic(4)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    heur_section.set_item("icon_location", features["data"]["icon_location"])

            timestamps = []
            if features["header"]["creation_time"]:
                timestamps.append(("creation_time", features["header"]["creation_time"]))
            if features["header"]["modified_time"]:
                timestamps.append(("modified_time", features["header"]["modified_time"]))

            if request.task.depth != 0:
                heur2_earliest_ts = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
                    days=self.config.get("heur2_flag_more_recent_than_days", 3)
                )
                heur2_latest_ts = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=2)
                recent_timestamps = []
                future_timestamps = []
                for k, timestamp in timestamps:
                    if timestamp < heur2_earliest_ts:
                        continue
                    if timestamp > heur2_latest_ts:
                        future_timestamps.append((k, timestamp))
                        continue
                    recent_timestamps.append((k, timestamp))

                if recent_timestamps:
                    heur = Heuristic(2)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in recent_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())
                if future_timestamps:
                    heur = Heuristic(3)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in future_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())

            if "DISTRIBUTED_LINK_TRACKER_BLOCK" in features["extra"]:
                if "machine_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    machine_id = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["machine_identifier"]
                    lnk_result_section.add_tag("file.shortcut.machine_id", machine_id)
                    if machine_id.lower().startswith("desktop-"):
                        heur = Heuristic(5)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                        heur_section.set_item("machine_identifier", machine_id)
                if "droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)
                elif "birth_droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["birth_droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)

            # Adapted code from previous logic. May be best replaced by new heuristics and logic.
            bp = str(lbp).strip()
            rp = str(features["data"].get("relative_path", "")).strip()
            nn = str(features["data"].get("net_name", "")).strip()
            t = str(target).strip().rsplit("\\")[-1].strip()
            cla = str(features["data"].get("command_line_arguments", "")).strip()

            filename_extracted = (bp or rp or t or nn).rsplit("\\")[-1].strip()
            if filename_extracted:
                lnk_result_section.add_tag(tag_type="file.name.extracted", value=(bp or rp or t or nn).rsplit("\\")[-1])

            process_cmdline = f"{(rp or bp or t or nn)} {cla}".strip()
            if process_cmdline:
                lnk_result_section.add_tag(tag_type="file.shortcut.command_line", value=process_cmdline)

            cmd_code = None
            if filename_extracted in ["cmd", "cmd.exe"]:
                cmd_code = (get_cmd_command(f"{filename_extracted} {cla}".encode()), "bat")
                if "rundll32 " in cla:  # We are already checking for rundll32.exe as part of risky_executable
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
            elif filename_extracted in ["powershell", "powershell.exe"]:
                cmd_code = (get_powershell_command(f"{filename_extracted} {cla}".encode()), "ps1")

            if heur_1_items:
                heur = Heuristic(1)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.update_items(heur_1_items)

            if cmd_code:
                sha256hash = hashlib.sha256(cmd_code[0]).hexdigest()
                cmd_filename = f"{sha256hash[0:10]}.{cmd_code[1]}"
                cmd_file_path = os.path.join(self.working_directory, cmd_filename)
                with open(cmd_file_path, "wb") as cmd_f:
                    cmd_f.write(cmd_code[0])
                request.add_extracted(
                    cmd_file_path,
                    cmd_filename,
                    "Extracted LNK execution code",
                )

            def _datetime_to_str(obj):
                if isinstance(obj, datetime.datetime):
                    return obj.isoformat()
                return obj

            temp_path = os.path.join(self.working_directory, "features.json")
            with open(temp_path, "w") as f:
                json.dump(features, f, default=_datetime_to_str)
            request.add_supplementary(temp_path, "features.json", "Features extracted from the LNK file")

            if lnk.appended_data:
                sha256hash = hashlib.sha256(lnk.appended_data).hexdigest()
                appended_data_path = os.path.join(self.working_directory, sha256hash)
                with open(appended_data_path, "wb") as appended_data_f:
                    appended_data_f.write(lnk.appended_data)
                request.add_extracted(
                    appended_data_path,
                    sha256hash,
                    "Additional data at the end of the LNK file",
                )
                heur = Heuristic(6)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.set_item("Length", len(lnk.appended_data))

        # 5. URL file management
        if request.file_type == "shortcut/web":
            config = ConfigParser()
            config.read(request.file_path)

            res = ResultKeyValueSection("Metadata extracted by Ini Reader", parent=request.result)
            for k, v in config["InternetShortcut"].items():
                res.set_item(k, v)

                if k == "url":
                    if v.startswith("http://") or v.startswith("https://"):
                        res.add_tag("network.static.uri", v)
                    elif v.startswith("file:"):
                        heur = Heuristic(1)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=res)
                        heur_section.set_item("url", v)

            config.pop("InternetShortcut", None)
            if config.sections():
                extra_res = ResultKeyValueSection("Extra sections", parent=res)
                extra_res.set_item("Names", ", ".join(config.sections()))