Example #1
0
    def recurse_add_res(self, file_res, res_list, new_files, parent=None):
        for res_dic in res_list:
            # Check if condition is OK
            if self.pass_condition(res_dic.get("condition", None)):
                res = ResultSection(res_dic['title_text'],
                                    classification=res_dic.get('classification', Classification.UNRESTRICTED),
                                    parent=parent, body_format=res_dic.get('body_format', BODY_FORMAT.TEXT))
                heur_id = self.heuristic_alteration(res_dic.get('score_condition', None), res_dic['heur_id'])
                res.set_heuristic(heur_id)

                # Add Tags
                tags = res_dic.get('tags', [])
                for res_tag in tags:
                    res.add_tag(res_tag[0], res_tag[1])

                # Add body
                body = res_dic.get('body', None)
                if body:
                    res.set_body(body)

                # File for resubmit
                files = res_dic.get('files', [])
                for res_file in files:
                    if isinstance(res_file, tuple):
                        res_file = res_file[1]
                    new_files.append(res_file)

                # Add to file res if root result
                if parent is None:
                    file_res.add_section(res)
    def _create_random_section(self):
        # choose a random body format
        body_format = random.choice(FORMAT_LIST)

        # create a section with a random title
        section = ResultSection(get_random_phrase(3, 7), body_format=body_format)

        # choose random amount of lines in the body
        for _ in range(1, 5):
            # generate random line
            section.add_line(get_random_phrase(5, 10))

        # choose random amount of tags
        tags = flatten(get_random_tags())
        for key, val in tags.items():
            for v in val:
                section.add_tag(key, v)

        # set a heuristic a third of the time
        if random.choice([False, False, True]):
            section.set_heuristic(random.randint(1, 4))

        # Create random sub-sections
        if random.choice([False, False, True]):
            section.add_subsection(self._create_random_section())

        return section
    def execute(self, request):
        result = Result()
        request.result = result
        file_path = request.file_path
        password = request.get_param('password')
        start_point = request.get_param('start point')

        try:
            data = process_file(file=file_path,
                                password=password,
                                noninteractive=True,
                                no_indent=True,
                                output_level=0,
                                return_deobfuscated=True,
                                extract_only=True)

            data_deobfuscated = process_file(
                file=file_path,
                password=password,
                start_point=start_point,
                noninteractive=True,
                no_indent=True,
                output_level=0,
                output_formula_format='[[CELL-ADDR]]: [[INT-FORMULA]]',
                return_deobfuscated=True)
        except Exception as e:
            section = ResultSection('Failed to analyze', parent=request.result)
            section.add_line(str(e))
            if str(e).startswith('Failed to decrypt'):
                section.set_heuristic(6)
            return

        add_results(result, data, data_deobfuscated)
Example #4
0
    def parse_link(self, parent_res, path):
        with open(path, "rb") as fh:
            metadata = decode_lnk(fh.read())

        if metadata is None:
            return False

        body_output = {
            build_key(k): v
            for k, v in flatten(metadata).items() if v
        }
        res = ResultSection("Metadata extracted by parse_lnk",
                            body_format=BODY_FORMAT.KEY_VALUE,
                            body=json.dumps(body_output),
                            parent=parent_res)

        bp = metadata.get("BasePath", "").strip()
        rp = metadata.get("RELATIVE_PATH", "").strip()
        nn = metadata.get("NetName", "").strip()
        cla = metadata.get("COMMAND_LINE_ARGUMENTS", "").strip()
        s = BAD_LINK_RE.search(cla.lower())
        if s:
            res.set_heuristic(1)
        res.add_tag(tag_type="file.name.extracted",
                    value=(bp or rp or nn).rsplit("\\")[-1])
        res.add_tag(tag_type="dynamic.process.command_line",
                    value=f"{(rp or bp or nn)} {cla}".strip())

        for k, v in body_output.items():
            tag_type = TAG_MAP.get("LNK", {}).get(k, None) or \
                       TAG_MAP.get(None, {}).get(k, None)
            if tag_type:
                res.add_tag(tag_type, v)

        return True
Example #5
0
 def dump_invalid_properties(self, parent_res):
     if self.invalid_properties_count:
         res = ResultSection(
             f"We've found {self.invalid_properties_count} properties with IDs different than "
             f"1 (storage), 2 (stream) and 5 (root)",
             parent=parent_res)
         res.set_heuristic(50)
def get_result_subsection(result, title, heuristic):
    result_subsection = None
    # Set appropriate result subsection if it already exists
    for subsection in result.subsections:
        if subsection.title_text == title:
            result_subsection = subsection
    # Create appropriate result subsection if it doesn't already exist
    if not result_subsection:
        result_subsection = ResultSection(title)
        result.add_subsection(result_subsection)
        result_subsection.set_heuristic(heuristic)
    return result_subsection
Example #7
0
 def test_parse_results(response, correct_res_secs,
                        metadefender_class_instance):
     from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic
     metadefender_class_instance.blocklist = ["a"]
     metadefender_class_instance.sig_score_revision_map = {}
     metadefender_class_instance.kw_score_revision_map = {}
     metadefender_class_instance.current_node = "http://blah"
     metadefender_class_instance.nodes[
         metadefender_class_instance.current_node] = {
             "engine_map": {
                 "z": {
                     "version": "blah",
                     "def_time": "blah"
                 },
                 "y": {
                     "version": "blah",
                     "def_time": "blah"
                 }
             },
             "queue_times": [],
             "file_count": 0
         }
     correct_result = Result()
     for correct_res_sec in correct_res_secs:
         section = ResultSection(
             correct_res_sec["title_text"],
             body_format=BODY_FORMAT.TEXT if
             not correct_res_sec.get("body_format") else BODY_FORMAT.JSON,
             body=correct_res_sec.get("body"))
         for subsec in correct_res_sec.get("subsections", []):
             subsection = ResultSection(
                 subsec["title_text"],
                 body=subsec["body"],
                 body_format=BODY_FORMAT.KEY_VALUE,
                 tags=subsec.get("tags"),
             )
             if subsec.get("heuristic"):
                 subsection.set_heuristic(subsec["heuristic"]["heur_id"])
                 print(subsec["heuristic"]["signatures"])
                 for key in subsec["heuristic"]["signatures"].keys():
                     subsection.heuristic.add_signature_id(key)
             section.add_subsection(subsection)
         correct_result.add_section(section)
     actual_result = metadefender_class_instance.parse_results(response)
     for index, section in enumerate(actual_result.sections):
         assert check_section_equality(section,
                                       correct_result.sections[index])
Example #8
0
    def dump_properties(self, parent_res):
        # 1. start with id 0 and naviguate the tree from there.
        self.dump_dir('0', '\\', parent_res, False)

        # 2. any missing properties, look for dir first?
        while len(self.parent) > 0:
            cur_dir = list(self.parent.items())[0][0]
            if self.property_dict[cur_dir][1]:
                del self.parent[cur_dir]
            else:
                while cur_dir in self.parent and self.property_dict[
                        self.parent[cur_dir]][1] is False:
                    cur_dir = self.parent[cur_dir]
                self.dump_dir(cur_dir, '\\-ORPHAN-\\', parent_res, True)

        for (p_id, field_struct) in self.property_dict.items():
            if field_struct[1] is False and field_struct[0][
                    'type'].display == 'storage':
                self.dump_dir(p_id, '\\-ORPHAN-\\', parent_res, True)

        if len(self.invalid_streams) > 0:
            res_error = ResultSection(
                "Trying to access stream content from the short block, but root[0] doesn't "
                "even exist.  This file is either corrupted, patched or exploiting a "
                "vulnerability.",
                parent=parent_res)
            res_error.add_line(
                f"Unable to access the following stream(s): {'', ''.join(self.invalid_streams)}"
            )
            res_error.set_heuristic(40)

        # 3. any missing properties, with no parent?
        orphans = {}
        for (p_id, field_struct) in self.property_dict.items():
            if field_struct[1] is False and field_struct[0]['name'].value != '':
                orphans[p_id] = field_struct

        if len(orphans) > 0:
            res = ResultSection("OLE2 STORAGE: \\-ORPHAN-")
            for (p_id, field_struct) in orphans.items():
                self.dump_property(field_struct[0], '\\-ORPHAN-', p_id, res,
                                   parent_res, True)

            if len(res.subsections) > 0:
                parent_res.add_subsection(res)
Example #9
0
    def _set_heuristic_by_verdict(self, result_section: ResultSection,
                                  verdict: Optional[str]) -> None:
        """
        This method sets the heuristic of the result section based on the verdict
        :param result_section: The result section that will have its heuristic set
        :param verdict: The verdict of the file
        :return: None
        """
        if not verdict:
            return

        if (verdict not in Verdicts.INTERESTING_VERDICTS.value
                and verdict not in Verdicts.UNINTERESTING_VERDICTS.value):
            self.log.debug(f"{verdict} was spotted. Is this useful?")
        elif verdict in Verdicts.MALICIOUS_VERDICTS.value:
            result_section.set_heuristic(1)
        elif verdict in Verdicts.SUSPICIOUS_VERDICTS.value:
            result_section.set_heuristic(2)
        elif verdict in Verdicts.TRUSTED_VERDICTS.value:
            self.log.debug(
                f"The verdict was {verdict}. Can we do something with this?")
	def execute(self, request):
		result = Result()
		file = request.file_path

		with open(file, "rb") as f:
			file_content = f.read()

		content_list = autoit_ripper.extract(data=file_content)

		if content_list:
			content = content_list[0][1].decode("utf-8")

			text_section = ResultSection('[DUMP RESULT]')
			text_section.add_line(content)
			text_section.set_heuristic(1)
			result.add_section(text_section)

			with open(self.working_directory + "script.au3", "w") as f:
				f.write(content)
			request.add_extracted(self.working_directory + 'script.au3', 'script.au3', 'This is the unpacked script')
		
		request.result = result
Example #11
0
    def execute(self, request):
        result = Result()
        file_path = request.file_path

        p1 = subprocess.Popen("clamscan -a -z --detect-pua --alert-macros " +
                              file_path,
                              shell=True,
                              stdout=subprocess.PIPE)
        p1.wait()
        stdout = p1.communicate()[0].decode("utf-8")

        report = stdout.split("\n")
        report = list(filter(None, report))

        text_section = ResultSection("Successfully scanned the file")
        if "FOUND" in report[0]:
            text_section.set_heuristic(1)

        for l in report:
            text_section.add_line(l)

        result.add_section(text_section)
        request.result = result
Example #12
0
    def test_init(mocker):
        from json import dumps
        from assemblyline_v4_service.common.result import BODY_FORMAT, ResultSection
        mocker.patch("assemblyline_v4_service.common.api.ServiceAPIError")
        from metadefender import AvHitSection
        av_name = "blah"
        virus_name = "blah"
        engine = {}
        heur_id = 1
        sig_score_rev_map = {}
        kw_score_rev_map = {}
        safelist_match = []
        actual_res_sec = AvHitSection(av_name, virus_name, engine, heur_id,
                                      sig_score_rev_map, kw_score_rev_map,
                                      safelist_match)
        correct_result_section = ResultSection(
            f"{av_name} identified the file as {virus_name}")
        correct_result_section.set_heuristic(1)
        correct_result_section.heuristic.add_signature_id(
            f"{av_name}.{virus_name}")
        correct_result_section.add_tag("av.virus_name", virus_name)
        correct_result_section.set_body(
            dumps({
                "av_name": av_name,
                "virus_name": virus_name,
                "scan_result": "infected",
                "engine_version": "unknown",
                "engine_definition_time": "unknown"
            }), BODY_FORMAT.KEY_VALUE)
        assert check_section_equality(actual_res_sec, correct_result_section)

        engine = {"version": "blah", "def_time": 1}
        heur_id = 2
        safelist_match = ["blah"]
        actual_res_sec = AvHitSection(av_name, virus_name, engine, heur_id,
                                      sig_score_rev_map, kw_score_rev_map,
                                      safelist_match)
        correct_result_section = ResultSection(
            f"{av_name} identified the file as {virus_name}")
        correct_result_section.add_tag("av.virus_name", virus_name)
        correct_result_section.set_heuristic(2)
        correct_result_section.heuristic.add_signature_id(
            f"{av_name}.{virus_name}", 0)
        correct_result_section.set_body(
            dumps({
                "av_name": av_name,
                "virus_name": virus_name,
                "scan_result": "suspicious",
                "engine_version": "blah",
                "engine_definition_time": 1
            }), BODY_FORMAT.KEY_VALUE)
        assert check_section_equality(actual_res_sec, correct_result_section)

        kw_score_rev_map = {"bla": 1}
        actual_res_sec = AvHitSection(av_name, virus_name, engine, heur_id,
                                      sig_score_rev_map, kw_score_rev_map,
                                      safelist_match)
        correct_result_section = ResultSection(
            f"{av_name} identified the file as {virus_name}")
        correct_result_section.add_tag("av.virus_name", virus_name)
        correct_result_section.set_heuristic(2)
        correct_result_section.heuristic.add_signature_id(
            f"{av_name}.{virus_name}", 1)
        correct_result_section.set_body(
            dumps({
                "av_name": av_name,
                "virus_name": virus_name,
                "scan_result": "suspicious",
                "engine_version": "blah",
                "engine_definition_time": 1
            }), BODY_FORMAT.KEY_VALUE)
        assert check_section_equality(actual_res_sec, correct_result_section)

        kw_score_rev_map = {"bla": 1, "h": 2}
        actual_res_sec = AvHitSection(av_name, virus_name, engine, heur_id,
                                      sig_score_rev_map, kw_score_rev_map,
                                      safelist_match)
        correct_result_section = ResultSection(
            f"{av_name} identified the file as {virus_name}")
        correct_result_section.add_tag("av.virus_name", virus_name)
        correct_result_section.set_heuristic(2)
        correct_result_section.heuristic.add_signature_id(
            f"{av_name}.{virus_name}", 2)
        correct_result_section.set_body(
            dumps({
                "av_name": av_name,
                "virus_name": virus_name,
                "scan_result": "suspicious",
                "engine_version": "blah",
                "engine_definition_time": 1
            }), BODY_FORMAT.KEY_VALUE)
        assert check_section_equality(actual_res_sec, correct_result_section)

        sig_score_rev_map = {f"{av_name}.{virus_name}": 10}
        actual_res_sec = AvHitSection(av_name, virus_name, engine, heur_id,
                                      sig_score_rev_map, kw_score_rev_map,
                                      safelist_match)
        correct_result_section = ResultSection(
            f"{av_name} identified the file as {virus_name}")
        correct_result_section.add_tag("av.virus_name", virus_name)
        correct_result_section.set_heuristic(2)
        correct_result_section.heuristic.add_signature_id(
            f"{av_name}.{virus_name}", 10)
        correct_result_section.set_body(
            dumps({
                "av_name": av_name,
                "virus_name": virus_name,
                "scan_result": "suspicious",
                "engine_version": "blah",
                "engine_definition_time": 1
            }), BODY_FORMAT.KEY_VALUE)
        assert check_section_equality(actual_res_sec, correct_result_section)
Example #13
0
    def execute(self, request):
        file_path = request.file_path
        result = Result()

        # Report the version of suricata as the service context
        request.set_service_context(
            f"Suricata version: {self.get_suricata_version()}")

        # restart Suricata if we need to
        self.start_suricata_if_necessary()

        # Strip frame headers from the PCAP, since Suricata sometimes has trouble parsing strange PCAPs
        stripped_filepath = self.strip_frame_headers(file_path)

        # Check to make sure the size of the stripped file isn't 0 - this happens on pcapng files
        # TODO: there's probably a better way to do this - don't event strip it if it's pcapng
        if os.stat(stripped_filepath).st_size == 0:
            stripped_filepath = file_path

        # Switch stdout and stderr so we don't get our logs polluted
        mystdout = StringIO()
        old_stdout = sys.stdout
        sys.stdout = mystdout

        mystderr = StringIO()
        old_stderr = sys.stderr
        sys.stderr = mystderr

        # Pass the pcap file to Suricata via the socket
        ret = self.suricata_sc.send_command(
            "pcap-file", {
                "filename": stripped_filepath,
                "output-dir": self.working_directory
            })

        if not ret or ret["return"] != "OK":
            self.log.exception(
                f"Failed to submit PCAP for processing: {ret['message']}")

        # Wait for the socket finish processing our PCAP
        while True:
            time.sleep(1)
            try:
                ret = self.suricata_sc.send_command("pcap-current")
                if ret and ret["message"] == "None":
                    break
            except ConnectionResetError as e:
                raise RecoverableError(e)

        # Bring back stdout and stderr
        sys.stdout = old_stdout
        sys.stderr = old_stderr
        # NOTE: for now we will ignore content of mystdout and mystderr but we have them just in case...

        alerts, signatures, domains, ips, urls, email_addresses, tls_dict, extracted_files, reverse_lookup = self.parse_suricata_output(
        ).values()

        file_extracted_section = ResultSection("File(s) extracted by Suricata")
        # Parse the json results of the service
        if request.get_param("extract_files"):
            for file in extracted_files:
                sha256, filename, extracted_file_path = file.values()
                self.log.info(f"extracted file {filename}")
                try:
                    if request.add_extracted(
                            extracted_file_path,
                            filename,
                            "Extracted by Suricata",
                            safelist_interface=self.api_interface):
                        file_extracted_section.add_line(filename)
                        if filename != sha256:
                            file_extracted_section.add_tag(
                                'file.name.extracted', filename)
                except FileNotFoundError as e:
                    # An intermittent issue, just try again
                    raise RecoverableError(e)
                except MaxExtractedExceeded:
                    # We've hit our limit
                    pass

        # Report a null score to indicate that files were extracted. If no sigs hit, it's not clear
        # where the extracted files came from
        if file_extracted_section.body:
            result.add_section(file_extracted_section)

        # Add tags for the domains, urls, and IPs we've discovered
        root_section = ResultSection("Discovered IOCs", parent=result)
        if domains:
            domain_section = ResultSection("Domains", parent=root_section)
            for domain in domains:
                domain_section.add_line(domain)
                domain_section.add_tag('network.dynamic.domain', domain)
        if ips:
            ip_section = ResultSection("IP Addresses", parent=root_section)
            for ip in ips:
                # Make sure it's not a local IP
                if not (ip.startswith("127.") or ip.startswith("192.168.")
                        or ip.startswith("10.") or
                        (ip.startswith("172.")
                         and 16 <= int(ip.split(".")[1]) <= 31)):
                    ip_section.add_line(ip)
                    ip_section.add_tag('network.dynamic.ip', ip)

        if urls:
            url_section = ResultSection("URLs", parent=root_section)
            for url in urls:
                url_section.add_line(url)
                url_section.add_tag('network.dynamic.uri', url)
        if email_addresses:
            email_section = ResultSection("Email Addresses",
                                          parent=root_section)
            for eml in email_addresses:
                email_section.add_line(eml)
                email_section.add_tag('network.email.address', eml)

        # Map between suricata key names and AL tag types
        tls_mappings = {
            "subject": 'cert.subject',
            "issuerdn": 'cert.issuer',
            "version": 'cert.version',
            "notbefore": 'cert.valid.start',
            "notafter": 'cert.valid.end',
            "fingerprint": 'cert.thumbprint',
            "sni": 'network.tls.sni'
        }

        if tls_dict:
            tls_section = ResultSection("TLS Information",
                                        parent=root_section,
                                        body_format=BODY_FORMAT.JSON)
            kv_body = {}
            for tls_type, tls_values in tls_dict.items():
                if tls_type == "fingerprint":
                    # make sure the cert fingerprint/thumbprint matches other values,
                    # like from PEFile
                    tls_values = [
                        v.replace(":", "").lower() for v in tls_values
                    ]

                if tls_type in tls_mappings:
                    kv_body[tls_type] = tls_values

                    tag_type = tls_mappings[tls_type]
                    if tag_type is not None:
                        for tls_value in tls_values:
                            tls_section.add_tag(tag_type, tls_value)

                elif tls_type == "ja3":
                    kv_body.setdefault('ja3_hash', [])
                    kv_body.setdefault('ja3_string', [])

                    for ja3_entry in tls_values:
                        ja3_hash = ja3_entry.get("hash")
                        ja3_string = ja3_entry.get("string")
                        if ja3_hash:
                            kv_body['ja3_hash'].append(ja3_hash)
                            tls_section.add_tag('network.tls.ja3_hash',
                                                ja3_hash)
                        if ja3_string:
                            kv_body['ja3_string'].append(ja3_string)
                            tls_section.add_tag('network.tls.ja3_string',
                                                ja3_string)

                else:
                    kv_body[tls_type] = tls_values
                    # stick a message in the logs about a new TLS type found in suricata logs
                    self.log.info(
                        f"Found new TLS type {tls_type} with values {tls_values}"
                    )
            tls_section.set_body(json.dumps(kv_body))

        # Create the result sections if there are any hits
        if len(alerts) > 0:
            for signature_id, signature_details in signatures.items():
                signature = signature_details['signature']
                attributes = signature_details['attributes']
                section = ResultSection(f'{signature_id}: {signature}')
                heur_id = 3
                if any(x in signature for x in self.config.get("sure_score")):
                    heur_id = 1
                elif any(x in signature
                         for x in self.config.get("vhigh_score")):
                    heur_id = 2

                section.set_heuristic(heur_id)
                if signature_details['al_signature']:
                    section.add_tag("file.rule.suricata",
                                    signature_details['al_signature'])
                for timestamp, src_ip, src_port, dest_ip, dest_port in alerts[
                        signature_id][:10]:
                    section.add_line(
                        f"{timestamp} {src_ip}:{src_port} -> {dest_ip}:{dest_port}"
                    )
                if len(alerts[signature_id]) > 10:
                    section.add_line(
                        f'And {len(alerts[signature_id]) - 10} more flows')

                # Tag IPs/Domains/URIs associated to signature
                for flow in alerts[signature_id]:
                    dest_ip = flow[3]
                    section.add_tag('network.dynamic.ip', dest_ip)
                    if dest_ip in reverse_lookup.keys():
                        section.add_tag('network.dynamic.domain',
                                        reverse_lookup[dest_ip])
                    [
                        section.add_tag('network.dynamic.uri', uri)
                        for uri in urls
                        if dest_ip in uri or (reverse_lookup.get(dest_ip) and
                                              reverse_lookup[dest_ip] in uri)
                    ]

                # Add a tag for the signature id and the message
                section.add_tag('network.signature.signature_id',
                                str(signature_id))
                section.add_tag('network.signature.message', signature)
                [
                    section.add_tag('network.static.uri', attr['uri'])
                    for attr in attributes if attr.get('uri')
                ]
                # Tag malware_family
                for malware_family in signature_details['malware_family']:
                    section.add_tag('attribution.family', malware_family)

                result.add_section(section)
                self.ontology.add_result_part(
                    Signature,
                    data=dict(
                        name=signature_details['al_signature'],
                        type="SURICATA",
                        malware_families=signature_details['malware_family']
                        or None,
                        attributes=attributes))

            # Add the original Suricata output as a supplementary file in the result
            request.add_supplementary(
                os.path.join(self.working_directory, 'eve.json'),
                'SuricataEventLog.json', 'json')

        # Add the stats.log to the result, which can be used to determine service success
        if os.path.exists(os.path.join(self.working_directory, 'stats.log')):
            request.add_supplementary(
                os.path.join(self.working_directory, 'stats.log'), 'stats.log',
                'log')

        request.result = result
 def _report_embedded_xdp(self, file_res, chunk_number, binary, leftover):
     res_section = ResultSection([f"Found {chunk_number}", "Embedded PDF (in XDP)"])
     res_section.set_heuristic(1)
     res_section.add_tag('file.behavior', "Embedded PDF (in XDP)")
     file_res.add_section(res_section)
def tag_data(data, data_deobfuscated, result_ioc, result_formula):
    pattern = PatternMatch()

    # Get all IoCs without deobfuscation
    ioc_dict = {}
    formulas = collections.OrderedDict()
    for line in data:
        if line[:4] == 'CELL':
            split_value = line.split(',', 1)
            cell = split_value[0].split(':')[1].strip()
            formula = split_value[1].rsplit(',', 1)[0].strip()

            # Add formula to list of formulas if it contains IoC(s)
            if pattern.ioc_match(formula, cell, ioc_dict):
                formulas[cell] = formula

    # Get all IoCs after deobfuscation
    ioc_deobfuscated_dict = {}
    formulas_deobfuscated = collections.OrderedDict()
    for line in data_deobfuscated:
        split_value = line.split(':', 1)
        cell = split_value[0].strip()
        formula = split_value[1].strip()

        # Add formula to list of deobfuscated formulas if it contains IoC(s)
        if pattern.ioc_match(formula, cell, ioc_deobfuscated_dict):
            formulas_deobfuscated[cell] = formula

    # Remove duplicate IoCs (found both before AND after deobfuscation)
    for ioc_tag, values in ioc_deobfuscated_dict.copy().items():
        for ioc_details in values.copy():
            if ioc_tag in ioc_dict and ioc_details in ioc_dict[ioc_tag]:
                ioc_deobfuscated_dict[ioc_tag].remove(ioc_details)
                # Remove ioc_tag if no IoCs are associated with it
                if len(ioc_deobfuscated_dict[ioc_tag]) == 0:
                    del ioc_deobfuscated_dict[ioc_tag]

    # Remove duplicate formulas from the same cell (found both before AND after deobfuscation)
    for cell, formula in formulas_deobfuscated.copy().items():
        if cell in formulas and formula in formulas[cell]:
            del formulas_deobfuscated[cell]

    # Create the appropriate result subsections for formulas
    formulas_subsection = ResultSection('Formulas')
    formulas_deobfuscated_subsection = ResultSection('Deobfuscated Formulas')
    formulas_deobfuscated_subsection.set_heuristic(5)
    if formulas:
        result_formula.add_subsection(formulas_subsection)
    if formulas_deobfuscated:
        result_formula.add_subsection(formulas_deobfuscated_subsection)

    # Generate result subsections for IoCs found without deobfuscation
    heuristics = [1, 2]
    for ioc_tag, values in ioc_dict.items():
        for ioc_details in values:
            ioc = ioc_details[0]
            title = ioc_details[1]
            heuristic = heuristics[ioc_details[2]]

            ioc_subsection = get_result_subsection(result_ioc, title,
                                                   heuristic)
            ioc_subsection.add_tag(ioc_tag, ioc)
            pattern = re.compile('(\\n|^)' + re.escape(ioc) + '(\\n|$)')
            if ioc_subsection.body is not None and not pattern.search(
                    ioc_subsection.body):
                ioc_subsection.add_line(ioc)
            elif ioc_subsection.body is None:
                ioc_subsection.add_line(ioc)

            formulas_subsection.add_tag(ioc_tag, ioc)

    # Generate result subsections for deobfuscated IoCs
    heuristics = [3, 4]
    for ioc_tag, values in ioc_deobfuscated_dict.items():
        for ioc_details in values:
            ioc = ioc_details[0]
            title = 'Deobfuscated ' + ioc_details[1]
            heuristic = heuristics[ioc_details[2]]

            ioc_subsection = get_result_subsection(result_ioc, title,
                                                   heuristic)
            ioc_subsection.add_tag(ioc_tag, ioc)
            pattern = re.compile('(\\n|^)' + re.escape(ioc) + '(\\n|$)')
            if ioc_subsection.body is not None and not pattern.search(
                    ioc_subsection.body):
                ioc_subsection.add_line(ioc)
            elif ioc_subsection.body is None:
                ioc_subsection.add_line(ioc)

            formulas_deobfuscated_subsection.add_tag(ioc_tag, ioc)

    # Populate 'Formulas' result subsection with all suspicious formulas found without deobfuscation
    for cell, formula in formulas.items():
        # Only add complete formulas
        if "FORMULA(" in formula:
            cell_referenced = formula.rsplit(',', 1)[1][:-1]
            if cell_referenced not in formulas.keys():
                formulas_subsection.add_line(cell + ": " + formula)
        else:
            formulas_subsection.add_line(cell + ": " + formula)

    # Populate 'Deobfuscated Formulas' result subsection with all deobfuscated suspicious formulas
    for cell, formula in formulas_deobfuscated.items():
        # Only add complete formulas
        if "FORMULA(" in formula:
            cell_referenced = formula.rsplit(',', 1)[1][:-1]
            if cell_referenced not in formulas_deobfuscated.keys():
                formulas_deobfuscated_subsection.add_line(cell + ": " +
                                                          formula)
        else:
            formulas_deobfuscated_subsection.add_line(cell + ": " + formula)
    def execute(self, request):
        """Main Module. See README for details."""
        max_size = self.config.get('MAX_PDF_SIZE', 3000000)
        request.result = result = Result()
        if (os.path.getsize(request.file_path)
                or 0) < max_size or request.deep_scan:
            path = request.file_path
            working_dir = self.working_directory

            # CALL PDFID and identify all suspicious keyword streams
            additional_keywords = self.config.get('ADDITIONAL_KEYS', [])
            heur = deepcopy(self.config.get('HEURISTICS', []))
            all_errors = set()

            res_txt = "Main Document Results"
            res, contains_objstms, errors = self.analyze_pdf(
                request, res_txt, path, working_dir, heur, additional_keywords)
            result.add_section(res)

            for e in errors:
                all_errors.add(e)

            #  ObjStms: Treat all ObjStms like a standalone PDF document
            if contains_objstms:
                objstm_files = self.analyze_objstm(path, working_dir,
                                                   request.deep_scan)
                obj_cnt = 1
                for osf in objstm_files:
                    parent_obj = os.path.basename(osf).split("_")[1]
                    res_txt = "ObjStream Object {0} from Parent Object {1}".format(
                        obj_cnt, parent_obj)
                    # It is going to look suspicious as the service created the PDF
                    heur = [
                        x for x in heur
                        if 'plugin_suspicious_properties' not in x
                        and 'plugin_embeddedfile' not in x
                        and 'plugin_nameobfuscation' not in x
                    ]

                    res, contains_objstms, errors = self.analyze_pdf(
                        request,
                        res_txt,
                        osf,
                        working_dir,
                        heur,
                        additional_keywords,
                        get_malform=False)

                    obj_cnt += 1
                    result.add_section(res)

            if len(all_errors) > 0:
                erres = ResultSection(title_text="Errors Analyzing PDF")
                for e in all_errors:
                    erres.add_line(e)
                result.add_section(erres)

        else:
            section = ResultSection(
                "PDF Analysis of the file was skipped because the file is too big (limit is 3 MB)."
            )
            section.set_heuristic(10)
            result.add_section(section)
Example #17
0
    def cache_fields(self, field, parent_res):
        num_of_attempt = 15
        keep_trying = True
        previous_parser_error = None
        failed_again = False

        while keep_trying:
            # noinspection PyBroadException
            try:
                if field.is_field_set and field._getCurrentLength() > 0:
                    for _ in field:
                        pass

            except MissingField as e:
                res = ResultSection(
                    f"Hachoir lib COULD NOT get field '{e.key}' from "
                    f"'{e.field.path}'.  This file is either corrupted, "
                    f"patched or exploiting a vulnerability.",
                    parent=parent_res)

                res.set_heuristic(42)
            except ParserError as e:
                if previous_parser_error is None and previous_parser_error != str(
                        e):
                    previous_parser_error = str(e)
                    if str(e).startswith(
                            "OLE2: Unable to parse property of type "):
                        res = ResultSection(
                            f"Hachoir lib DID NOT successfully "
                            f"parse one of the property [{str(e)}].  This "
                            f"file is either corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(43)
                    elif str(e).startswith('Unable to add ') and str(
                            e).endswith(" is too large"):
                        res = ResultSection(
                            f"Hachoir lib determined that a field "
                            f"is overflowing the file [{str(e)}].  This "
                            f"file is either corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(44)
                    elif str(e).endswith(" is too large!"):
                        res = ResultSection(
                            f"Hachoir lib COULD NOT access a field "
                            f"[{str(e)}].  This file is either corrupted,"
                            f" patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(45)
                    elif str(e).startswith("Seek above field set end"):
                        res = ResultSection(
                            f"Hachoir lib determined that a field is "
                            f"overflowing the file [{str(e)}].  This "
                            f"file is either corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(44)
                    elif "FAT chain: Found a loop" in str(e):
                        if str(e).startswith('B'):
                            fat = 'BFAT'
                        else:
                            fat = 'SFAT'
                        res = ResultSection(
                            f"Hachoir lib found a loop when navigating "
                            f"through the {fat} [{str(e)}].  This file "
                            f"is either corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(46)
                    elif "FAT chain: Invalid block index" in str(e):
                        if str(e).startswith('B'):
                            fat = 'BFAT'
                        else:
                            fat = 'SFAT'
                        res = ResultSection(
                            f"Hachoir lib found an invalid block index "
                            f"in the {fat} [{str(e)}].  This file is "
                            f"either corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(47)
                    elif str(e).startswith("OLE2: Invalid endian value"):
                        res = ResultSection(
                            f"The stream endian field is not valid "
                            f"[{str(e)}].  This file is either "
                            f"corrupted, patched or exploiting a vulnerability.",
                            parent=parent_res)

                        res.set_heuristic(48)
                    else:
                        res = ResultSection(
                            f"Hachoir lib DID NOT successfully parse the entire file ... "
                            f"odd [{str(e)}].",
                            parent=parent_res)

                        res.set_heuristic(49)
                        backtrace = getBacktrace(None)
                        self.log.info(
                            f"{self.task.sid}/{self.task.sha256}\n{backtrace}")

            except Exception:
                if num_of_attempt == 15:
                    res = ResultSection(
                        "Hachoir lib DID NOT successfully parse the entire file ... odd.",
                        parent=parent_res)
                    res.set_heuristic(49)
                    backtrace = getBacktrace(None)
                    self.log.info(
                        f"{self.task.sid}/{self.task.sha256}\n{backtrace}")
                elif failed_again is False:
                    failed_again = True
                    ResultSection(
                        "Hachoir failed to parse the entire file after retrying.",
                        parent=parent_res)
                    backtrace = getBacktrace(None)
                    self.log.info(
                        f"{self.task.sid}/{self.task.sha256}\n{backtrace}")

            num_of_attempt -= 1
            keep_trying = num_of_attempt > 0
Example #18
0
    def dump_property(self, field, path, index, res, parent_res, is_orphan):
        if field['name'].value != '':
            name = field['name'].display[1:-1]
            p_type = field['type'].value

            if path[-1:] == '\\':
                abs_name = f"{path}{name}"
            else:
                abs_name = f"{path}\\{name}"

            prop_res = ResultSection(f"Property: {abs_name}",
                                     body_format=BODY_FORMAT.KEY_VALUE,
                                     body={})

            # if type is not: 1- storage, 2- stream an not 5- root, that is weird.
            if p_type != 1 and p_type != 2 and p_type != 5:
                self.invalid_properties_count += 1

            # for properties not storage (which should be seen like a folder)
            if p_type != 1:
                size = field['size'].value
            else:
                size = 0

            address = 0
            if size > 0:
                if field['size'].value < self.ole2parser[
                        'header/threshold'].value and index != '0':
                    # we first get the offset from the short block but then we need
                    # to map it back to the file, which is from root[X].
                    offset = field['start'].value * self.ole2parser.ss_size
                    keep_looping = True
                    root_index = 0
                    while keep_looping:
                        try:
                            current_root = self.ole2parser[
                                f"root[{root_index}]"]

                            if offset == 0 or current_root.size > offset:
                                address = current_root.address + offset
                                keep_looping = False
                            else:
                                offset -= current_root.size
                                root_index += 1

                        except MissingField:
                            keep_looping = False
                            address = None
                            if not is_orphan:
                                self.invalid_streams.append(
                                    field['name'].display)
                else:
                    address = HEADER_SIZE + field[
                        'start'].value * self.ole2parser.sector_size
            else:
                address = 0

            if address >= 0:
                prop_res.body['property_meta'] = \
                    f"offset: {hex(address // 8)} size: {hex(size)} / {field['type'].display} / " \
                    f"{field['decorator'].display} / id={index} left={field['left'].display} " \
                    f"right={field['right'].display} child={field['child'].display}"
            else:
                prop_res.body['property_meta'] = \
                    f"offset: could not map.. size: {hex(size)} / {field['type'].display} / " \
                    f"{field['decorator'].display} / id={index} left={field['left'].display} " \
                    f"right={field['right'].display} child={field['child'].display}"

            # for root or storage
            if p_type == 5 or p_type == 1:
                if field[
                        'clsid'].display != "Null GUID: 00000000-0000-0000-0000-000000000000":
                    clsid_desc = self.GUID_DESC.get(field['clsid'].display,
                                                    "unknown clsid")
                    prop_res.body[
                        "clsid"] = f"{field['clsid'].display} ({clsid_desc})"
                    prop_res.add_tag('file.ole.clsid', field['clsid'].display)
                if field['creation'].display != "1601-01-01 00:00:00":
                    prop_res.body["creation_date"] = field['creation'].display
                    prop_res.add_tag('file.date.creation',
                                     field['creation'].display)
                if field['lastmod'].display != "1601-01-01 00:00:00":
                    prop_res.body["last_modified_date"] = field[
                        'lastmod'].display
                    prop_res.add_tag('file.date.last_modified',
                                     field['lastmod'].display)

            # fixes up a bug:
            if name == '\\1CompObj':
                if p_type != 2:
                    res_error = ResultSection(
                        f"\\1CompObj type is '{p_type}' and it should be 2 (stream) "
                        f"... really suspicious.")
                    res_error.set_heuristic(41)
                    prop_res.add_subsection(res_error)
                    size = field['size'].value

                # Apparently, we can get to this point and have office_root_entry_parser set to None.
                # Not sure what we should do about that but trying to use that member variable seems
                # like a bad idea...
                if self.office_root_entry_parser is not None:
                    temp_field = None
                    for f in self.office_root_entry_parser.createFields():
                        if f.name.startswith('compobj'):
                            temp_field = f

                    # cache all the sub-fields....
                    for _ in temp_field:
                        pass

                    self.parse_field(temp_field, prop_res,
                                     self.PARSING_MODE_DISPLAY, parent_res)

            if size > 0 and index != '0':
                field_with_other_parser = self.additional_parsing_fields.get(
                    address, None)

                if field_with_other_parser:
                    # noinspection PyTypeChecker
                    self.parse_field(field_with_other_parser, prop_res,
                                     self.PARSING_MODE_DISPLAY, parent_res)

            if len(prop_res.body) > 1:
                prop_res.body = json.dumps(prop_res.body)
                res.add_subsection(prop_res)
    def execute(self, request):
        self.result = Result()
        request.result = self.result
        self.request = request

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions = []
        external_functions = []
        tmp_iocs = []
        output_results = {}

        # Running ViperMonkey
        try:
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             'vipermonkey_compat.py2'), request.file_path
            ])
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            stdout, _ = p.communicate()

            # Read output
            if stdout:
                for l in stdout.splitlines():
                    if l.startswith(b"{") and l.endswith(b"}"):
                        try:
                            output_results = json.loads(l)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                l.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][], if error, return is None
                if type(output_results.get('vmonkey_values')) == dict:
                    '''
                    Structure of variable "actions" is as follows:
                    [action, description, parameter]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameter: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    '''
                    actions = output_results['vmonkey_values']['actions']
                    external_functions = output_results['vmonkey_values'][
                        'external_funcs']
                    tmp_iocs = output_results['vmonkey_values']['tmp_iocs']
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            raise

        # Add vmonkey log as a supplemental file
        if 'stdout' in output_results:
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f'{request.sid}_vipermonkey_output.log')
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results['stdout'])

            self.request.add_supplementary(temp_log_copy,
                                           'vipermonkey_output.log',
                                           'ViperMonkey log output')
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1))

        if len(actions) > 0:
            # Creating action section
            action_section = ResultSection('Recorded Actions:',
                                           parent=self.result)
            action_section.add_tag('technique.macro', 'Contains VBA Macro(s)')
            for action in actions:  # Creating action sub-sections for each action
                cur_action = action[0]
                cur_description = action[2] if action[2] else cur_action

                # Entry point actions have an empty description field, re-organize result section for this case
                if cur_action == 'Found Entry Point':
                    sub_action_section = ResultSection('Found Entry Point',
                                                       parent=action_section)
                    sub_action_section.add_line(action[1])
                else:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(cur_description,
                                                       parent=action_section)
                    if cur_description == 'Shell function':
                        sub_action_section.set_heuristic(2)

                    # Parameters are sometimes stored as a list, account for this
                    if isinstance(action[1], list):
                        for item in action[1]:
                            # Parameters includes more than strings (booleans for example)
                            if isinstance(item, str):
                                # Check for PowerShell
                                self.extract_powershell(
                                    item, sub_action_section)
                        # Join list items into single string
                        param = ', '.join(str(a) for a in action[1])

                    else:
                        param = action[1]
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(param, str):
                            self.extract_powershell(param, sub_action_section)

                    sub_action_section.add_line(f'Action: {cur_action}')
                    sub_action_section.add_line(f'Parameters: {param}')

                    # If decoded is true, possible base64 string has been found
                    self.check_for_b64(param, sub_action_section)

                    # Add urls/ips found in parameter to respective lists
                    self.find_ip(param)

        # Check tmp_iocs
        res_temp_iocs = ResultSection('Runtime temporary IOCs')
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs)
            self.check_for_b64(ioc, res_temp_iocs)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection('Discovered PowerShell code in file',
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            vba_builtin_dict = {}
            dict_path = os.path.join(os.path.dirname(__file__),
                                     'VBA_built_ins.txt')
            with open(dict_path, 'r') as f:
                for line in f:
                    line = line.strip()
                    if re.search(r'^#', line):
                        continue
                    if line:
                        line = line.split(';')
                        vba_builtin_dict[line[0].strip()] = line[1].strip()

            external_func_section = ResultSection(
                'VBA functions called',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtin_dict:
                    external_func_section.add_line(func + ': ' +
                                                   vba_builtin_dict[func])
                else:
                    external_func_section.add_line(func)
Example #20
0
    def section_builder(self, parser, field_dict, result, parsertype="MWCP"):
        json_body = {}
        malware_name = ''
        malware_types = []
        mitre_group = ''
        mitre_att = ''
        category = 'malware'
        # get malware names from parser objects
        if parsertype == "RATDecoder":
            malware_name = parser
        if parsertype == "MWCP":
            for name, obj in self.file_parsers.items():
                if parser in obj.parser_list:
                    malware_name = obj.malware
                    malware_types = obj.malware_types
                    mitre_att = obj.mitre_att
                    mitre_group = obj.mitre_group
                    category = obj.category
                    for item in [
                            'classification', 'mitre_group', 'mitre_att',
                            'malware', 'malware_types', 'category'
                    ]:
                        val = getattr(obj, item, None)
                        if val:
                            json_body[item] = val
                    break
        parser_section = ResultSection(f"{parsertype} : {parser}")

        parser_section = classification_checker(parser_section, parser,
                                                self.file_parsers)
        if len(field_dict) > 0:  # if any decoder output exists raise heuristic
            parser_section.set_body(json.dumps(json_body),
                                    body_format=BODY_FORMAT.KEY_VALUE)
            parser_section.set_heuristic(HEURISTICS_MAP.get(category, 1),
                                         attack_id=mitre_att)
            parser_section.add_tag("source", parsertype)

            if malware_name:
                parser_section.add_tag('attribution.implant',
                                       malware_name.upper())
            if mitre_group:
                parser_section.add_tag('attribution.actor',
                                       mitre_group.upper())
            for malware_type in malware_types:
                parser_section.add_tag('attribution.family',
                                       malware_type.upper())
        # Create subsections and attach them to the main parser_section
        subsection_builder(parser_section, field_dict)

        other_key = "other"
        if other_key in field_dict:
            other_content = field_dict[other_key]
            other_section = ResultSection(f"Other metadata found",
                                          body_format=BODY_FORMAT.KEY_VALUE,
                                          body=json.dumps(other_content))
            parser_section.add_subsection(other_section)

        for field in field_dict:
            if field != other_key and field not in FIELD_TAG_MAP:
                self.log.debug(f"{field} does not exist in FIELD_TAG_MAP")
        result.add_section(parser_section)
    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                                  '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                                  'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id("T1066")
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2)
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA,
                                              body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL,
                                        body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"}))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            ip2 = get_random_ip()
            ip3 = get_random_ip()
            urls = [
                {"url": f"https://{host1}/"},
                {"url": f"https://{host2}/"},
                {"url": f"https://{ip1}/"},
                {"url": f"https://{ip2}/"},
                {"url": f"https://{ip3}/"}]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(urls))

            url_sub_section = ResultSection('Example of a url section with multiple links',
                                            body=json.dumps(urls), body_format=BODY_FORMAT.URL,
                                            heuristic=url_heuristic)
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.ip", ip2)
            url_sub_section.add_tag("network.static.ip", ip3)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(b"This is some random text that we will format as an hexdump and you'll see "
                           b"that the hexdump formatting will be preserved by the memory dump section!")
            memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP,
                                            body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [
                        {"d1_key": "val", "d1_key2": "val2"},
                        {"d2_key": "val", "d2_key2": "val2"}
                    ],
                    "bool": True
                }
            }
            json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "children": [] NB: This list either is empty or contains more dictionaries that have the same
            #                          structure
            #     }
            nc_body = [
                {
                    "process_pid": 123,
                    "process_name": "evil.exe",
                    "command_line": "C:\\evil.exe",
                    "signatures": {},
                    "children": [
                        {
                            "process_pid": 321,
                            "process_name": "takeovercomputer.exe",
                            "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff",
                            "signatures": {"one":250},
                            "children": [
                                {
                                    "process_pid": 456,
                                    "process_name": "evenworsethanbefore.exe",
                                    "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                                    "signatures": {"one":10, "two":10, "three":10},
                                    "children": []
                                },
                                {
                                    "process_pid": 234,
                                    "process_name": "badfile.exe",
                                    "command_line": "C:\\badfile.exe -k nothing_to_see_here",
                                    "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10},
                                    "children": []
                                }
                            ]
                        },
                        {
                            "process_pid": 345,
                            "process_name": "benignexe.exe",
                            "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                            "signatures": {"one": 2000},
                            "children": []
                        }
                    ]
                },
                {
                    "process_pid": 987,
                    "process_name": "runzeroday.exe",
                    "command_line": "C:\\runzeroday.exe -f insert_bad_spelling",
                    "signatures": {},
                    "children": []
                }
            ]
            nc_section = ResultSection('Example of a PROCESS_TREE section',
                                       body_format=BODY_FORMAT.PROCESS_TREE,
                                       body=json.dumps(nc_body))
            result.add_section(nc_section)
            
            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_body = [
                {
                    "a_str": "Some string1",
                    "extra_column_here": "confirmed",
                    "a_bool": False,
                    "an_int": 101,
                },
                {
                    "a_str": "Some string2",
                    "a_bool": True,
                    "an_int": 102,
                },
                {
                    "a_str": "Some string3",
                    "a_bool": False,
                    "an_int": 103,
                },
                {
                    "a_str": "Some string4",
                    "a_bool": None,
                    "an_int": -1000000000000000000,
                    "extra_column_there": "confirmed",
                    "nested_table": {
                        "a_str": "Some string3",
                        "a_bool": False,
                        "nested_table_thats_too_deep": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "an_int": 103,
                        },
                    },
                },
            ]
            table_section = ResultSection('Example of a TABLE section',
                                          body_format=BODY_FORMAT.TABLE,
                                          body=json.dumps(table_body))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt", "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__"+data.encode())
            request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
Example #23
0
    def execute(self, request: ServiceRequest) -> None:
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values and request.get_param('extract_original_iocs'):
            ioc_res = ResultSection(
                "The following IOCs were found in the original file",
                parent=request.result,
                body_format=BODY_FORMAT.MEMORY_DUMP)
            for k, val in pat_values.items():
                for v in val:
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                        )
                        ioc_res.add_tag(k, v)

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list: List[Tuple[str, bytes]] = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if regex.match(regex.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            with ThreadPoolExecutor() as executor:
                threads = [
                    executor.submit(technique, layer)
                    for name, technique in techniques
                ]
                results = [thread.result() for thread in threads]
                for i in range(len(results)):
                    result = results[i]
                    if result:
                        layers_list.append((techniques[i][0], result))
                        # Looks like it worked, restart with new layer
                        layer = result
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    with ThreadPoolExecutor() as executor:
                        threads = [
                            executor.submit(technique, layer)
                            for name, technique in final_pass
                        ]
                        results = [thread.result() for thread in threads]
                        for i in range(len(results)):
                            result = results[i]
                            if result:
                                layers_list.append((techniques[i][0], result))
                    break
                for x in second_pass:
                    techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            else:  # num_layers >= 100
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags: Dict[str, List[bytes]] = {}

                for uri in pat_values.get('network.static.uri', []):
                    # Compare URIs without query string
                    uri = uri.split(b'?', 1)[0]
                    if uri not in request.file_contents:
                        diff_tags.setdefault('network.static.uri', [])
                        diff_tags['network.static.uri'].append(uri)

                if request.deep_scan or (len(clean) > 1000
                                         and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    file_name = f"{os.path.basename(request.file_name)}_decoded_final"
                    file_path = os.path.join(self.working_directory, file_name)
                    # Ensure directory exists before write
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    with open(file_path, 'wb+') as f:
                        f.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {file_path}"
                        )
                    request.add_extracted(file_path, file_name,
                                          "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for extracted in self.files_extracted:
                        file_name = os.path.basename(extracted)
                        ext_file_res.add_line(file_name)
                        request.add_extracted(
                            extracted, file_name,
                            "File of interest deobfuscated from sample")
Example #24
0
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop two embedded file which one generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(random.randint(1, 4),
                                       signature=get_random_phrase(
                                           1, 4).lower().replace(" ", "_"))
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            section_color_map = ResultSection(
                "Example of colormap result section",
                body_format=BODY_FORMAT.GRAPH_DATA,
                body=json.dumps(color_map_data))
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section',
                                        body_format=BODY_FORMAT.URL,
                                        body=json.dumps({
                                            "name":
                                            "Random url!",
                                            "url":
                                            f"https://{random_host}/"
                                        }))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            urls = [{
                "url": f"https://{host1}/"
            }, {
                "url": f"https://{host2}/"
            }, {
                "url": f"https://{ip1}/"
            }]
            url_sub_section = ResultSection(
                'Example of a url section with multiple links',
                body_format=BODY_FORMAT.URL,
                body=json.dumps(urls))
            url_sub_section.set_heuristic(random.randint(1, 4))
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultSection(
                'Example of a memory dump section',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section',
                                       body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultSection('Example of a JSON section',
                                         body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
    def peepdf_analysis(self, temp_filename, file_content, request):
        file_res = Result()
        try:
            res_list = []
            # js_stream = []
            f_list = []
            js_dump = []

            pdf_parser = PDFParser()
            ret, pdf_file = pdf_parser.parse(temp_filename, True, False, file_content)
            if ret == 0:
                stats_dict = pdf_file.getStats()

                if ", ".join(stats_dict['Errors']) == "Bad PDF header, %%EOF not found, PDF sections not found, No " \
                                                      "indirect objects found in the body":
                    # Not a PDF
                    return

                json_body = dict(
                    version=stats_dict['Version'],
                    binary=stats_dict['Binary'],
                    linearized=stats_dict['Linearized'],
                    encrypted=stats_dict['Encrypted'],
                )

                if stats_dict['Encryption Algorithms']:
                    temp = []
                    for algorithmInfo in stats_dict['Encryption Algorithms']:
                        temp.append(f"{algorithmInfo[0]} {str(algorithmInfo[1])} bits")
                    json_body["encryption_algorithms"] = temp

                json_body.update(dict(
                    updates=stats_dict['Updates'],
                    objects=stats_dict['Objects'],
                    streams=stats_dict['Streams'],
                    comments=stats_dict['Comments'],
                    errors={True: ", ".join(stats_dict['Errors']),
                            False: "None"}[len(stats_dict['Errors']) != 0]
                ))
                res = ResultSection("PDF File Information", body_format=BODY_FORMAT.KEY_VALUE,
                                    body=json.dumps(json_body))

                for version in range(len(stats_dict['Versions'])):
                    stats_version = stats_dict['Versions'][version]
                    v_json_body = dict(
                        catalog=stats_version['Catalog'] or "no",
                        info=stats_version['Info'] or "no",
                        objects=self.list_first_x(stats_version['Objects'][1]),
                    )

                    if stats_version['Compressed Objects'] is not None:
                        v_json_body['compressed_objects'] = self.list_first_x(stats_version['Compressed Objects'][1])

                    if stats_version['Errors'] is not None:
                        v_json_body['errors'] = self.list_first_x(stats_version['Errors'][1])

                    v_json_body['streams'] = self.list_first_x(stats_version['Streams'][1])

                    if stats_version['Xref Streams'] is not None:
                        v_json_body['xref_streams'] = self.list_first_x(stats_version['Xref Streams'][1])

                    if stats_version['Object Streams'] is not None:
                        v_json_body['object_streams'] = self.list_first_x(stats_version['Object Streams'][1])

                    if int(stats_version['Streams'][0]) > 0:
                        v_json_body['encoded'] = self.list_first_x(stats_version['Encoded'][1])
                        if stats_version['Decoding Errors'] is not None:
                            v_json_body['decoding_errors'] = self.list_first_x(stats_version['Decoding Errors'][1])

                    if stats_version['Objects with JS code'] is not None:
                        v_json_body['objects_with_js_code'] = \
                            self.list_first_x(stats_version['Objects with JS code'][1])
                        # js_stream.extend(stats_version['Objects with JS code'][1])

                    res_version = ResultSection(f"Version {str(version)}", parent=res,
                                                body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(v_json_body))

                    actions = stats_version['Actions']
                    events = stats_version['Events']
                    vulns = stats_version['Vulns']
                    elements = stats_version['Elements']
                    is_suspicious = False
                    if events is not None or actions is not None or vulns is not None or elements is not None:
                        res_suspicious = ResultSection('Suspicious elements', parent=res_version)
                        if events is not None:
                            for event in events:
                                res_suspicious.add_line(f"{event}: {self.list_first_x(events[event])}")
                            is_suspicious = True
                        if actions is not None:
                            for action in actions:
                                res_suspicious.add_line(f"{action}: {self.list_first_x(actions[action])}")
                            is_suspicious = True
                        if vulns is not None:
                            for vuln in vulns:
                                if vuln in vulnsDict:
                                    temp = [vuln, ' (']
                                    for vulnCVE in vulnsDict[vuln]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                            vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE
                                            temp.append(vulnCVE)
                                            cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                            if cve_found:
                                                res_suspicious.add_tag('attribution.exploit',
                                                                       vulnCVE[cve_found.start():cve_found.end()])
                                                res_suspicious.add_tag('file.behavior',
                                                                       vulnCVE[cve_found.start():cve_found.end()])
                                    temp.append('): ')
                                    temp.append(str(vulns[vuln]))
                                    res_suspicious.add_line(temp)
                                else:
                                    res_suspicious.add_line(f"{vuln}: {str(vulns[vuln])}")
                                is_suspicious = True
                        if elements is not None:
                            for element in elements:
                                if element in vulnsDict:
                                    temp = [element, ' (']
                                    for vulnCVE in vulnsDict[element]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                        vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE
                                        temp.append(vulnCVE)
                                        cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                        if cve_found:
                                            res_suspicious.add_tag('attribution.exploit',
                                                                   vulnCVE[cve_found.start():cve_found.end()])
                                            res_suspicious.add_tag('file.behavior',
                                                                   vulnCVE[cve_found.start():cve_found.end()])
                                    temp.append('): ')
                                    temp.append(str(elements[element]))
                                    res_suspicious.add_line(temp)
                                    is_suspicious = True
                                else:
                                    res_suspicious.add_line(f"\t\t{element}: {str(elements[element])}")
                                    is_suspicious = True
                    res_suspicious.set_heuristic(8) if is_suspicious else None

                    urls = stats_version['URLs']
                    if urls is not None:
                        res.add_line("")
                        res_url = ResultSection('Found URLs', parent=res)
                        for url in urls:
                            res_url.add_line(f"\t\t{url}")
                            res_url.set_heuristic(9)

                    for obj in stats_version['Objects'][1]:
                        cur_obj = pdf_file.getObject(obj, version)

                        if cur_obj.containsJScode:
                            cur_res = ResultSection(f"Object [{obj} {version}] contains {len(cur_obj.JSCode)} "
                                                    f"block of JavaScript")
                            score_modifier = 0

                            js_idx = 0
                            for js in cur_obj.JSCode:
                                sub_res = ResultSection('Block of JavaScript', parent=cur_res)
                                js_idx += 1
                                js_score = 0
                                js_code, unescaped_bytes, _, _, _ = analyseJS(js)

                                js_dump += [x for x in js_code]

                                # Malicious characteristics
                                big_buffs = self.get_big_buffs("".join(js_code))
                                if len(big_buffs) == 1:
                                    js_score += 500 * len(big_buffs)
                                if len(big_buffs) > 0:
                                    js_score += 500 * len(big_buffs)
                                has_eval, has_unescape = self.check_dangerous_func("".join(js_code))
                                if has_unescape:
                                    js_score += 100
                                if has_eval:
                                    js_score += 100

                                js_cmt = ""
                                if has_eval or has_unescape or len(big_buffs) > 0:
                                    score_modifier += js_score
                                    js_cmt = "Suspiciously malicious "
                                    cur_res.add_tag('file.behavior', "Suspicious JavaScript in PDF")
                                    sub_res.set_heuristic(7)
                                js_res = ResultSection(f"{js_cmt}JavaScript Code (block: {js_idx})", parent=sub_res)

                                if js_score > 0:
                                    temp_js_outname = f"object{obj}-{version}_{js_idx}.js"
                                    temp_js_path = os.path.join(self.working_directory, temp_js_outname)
                                    temp_js_bin = "".join(js_code).encode("utf-8")
                                    f = open(temp_js_path, "wb")
                                    f.write(temp_js_bin)
                                    f.close()
                                    f_list.append(temp_js_path)

                                    js_res.add_line(f"The JavaScript block was saved as {temp_js_outname}")
                                    if has_eval or has_unescape:
                                        analysis_res = ResultSection("[Suspicious Functions]", parent=js_res)
                                        if has_eval:
                                            analysis_res.add_line("eval: This JavaScript block uses eval() function "
                                                                  "which is often used to launch deobfuscated "
                                                                  "JavaScript code.")
                                            analysis_res.set_heuristic(3)
                                        if has_unescape:
                                            analysis_res.add_line("unescape: This JavaScript block uses unescape() "
                                                                  "function. It may be legitimate but it is definitely "
                                                                  "suspicious since malware often use this to "
                                                                  "deobfuscate code blocks.")
                                            analysis_res.set_heuristic(3)

                                    buff_idx = 0
                                    for buff in big_buffs:
                                        buff_idx += 1
                                        error, new_buff = unescape(buff)
                                        if error == 0:
                                            buff = new_buff

                                        if buff not in unescaped_bytes:
                                            temp_path_name = None
                                            if ";base64," in buff[:100] and "data:" in buff[:100]:
                                                temp_path_name = f"obj{obj}_unb64_{buff_idx}.buff"
                                                try:
                                                    buff = b64decode(buff.split(";base64,")[1].strip())
                                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                                    f = open(temp_path, "wb")
                                                    f.write(buff)
                                                    f.close()
                                                    f_list.append(temp_path)
                                                except Exception:
                                                    self.log.error("Found 'data:;base64, ' buffer "
                                                                   "but failed to base64 decode.")
                                                    temp_path_name = None

                                            if temp_path_name is not None:
                                                buff_cond = f" and was resubmitted as {temp_path_name}"
                                            else:
                                                buff_cond = ""
                                            buff_res = ResultSection(
                                                f"A {len(buff)} bytes buffer was found in the JavaScript "
                                                f"block{buff_cond}. Here are the first 256 bytes.",
                                                parent=js_res, body=hexdump(bytes(buff[:256], "utf-8")),
                                                body_format=BODY_FORMAT.MEMORY_DUMP)
                                            buff_res.set_heuristic(2)

                                processed_sc = []
                                sc_idx = 0
                                for sc in unescaped_bytes:
                                    if sc not in processed_sc:
                                        sc_idx += 1
                                        processed_sc.append(sc)

                                        try:
                                            sc = sc.decode("hex")
                                        except Exception:
                                            pass

                                        shell_score = 500
                                        temp_path_name = f"obj{obj}_unescaped_{sc_idx}.buff"

                                        shell_res = ResultSection(f"Unknown unescaped {len(sc)} bytes JavaScript "
                                                                  f"buffer (id: {sc_idx}) was resubmitted as "
                                                                  f"{temp_path_name}. Here are the first 256 bytes.",
                                                                  parent=js_res)
                                        shell_res.set_body(hexdump(sc[:256]), body_format=BODY_FORMAT.MEMORY_DUMP)

                                        temp_path = os.path.join(self.working_directory, temp_path_name)
                                        f = open(temp_path, "wb")
                                        f.write(sc)
                                        f.close()
                                        f_list.append(temp_path)

                                        cur_res.add_tag('file.behavior', "Unescaped JavaScript Buffer")
                                        shell_res.set_heuristic(6)
                                        score_modifier += shell_score

                            if score_modifier > 0:
                                res_list.append(cur_res)

                        elif cur_obj.type == "stream":
                            if cur_obj.isEncodedStream and cur_obj.filter is not None:
                                data = cur_obj.decodedStream
                                encoding = cur_obj.filter.value.replace("[", "").replace("]", "").replace("/",
                                                                                                          "").strip()
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            else:
                                data = cur_obj.rawStream
                                encoding = None
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            if otype:
                                otype = otype.value.replace("/", "").lower()
                            if sub_type:
                                sub_type = sub_type.value.replace("/", "").lower()
                            if length:
                                length = length.value

                            if otype == "embeddedfile":
                                if len(data) > 4096:
                                    if encoding is not None:
                                        temp_encoding_str = f"_{encoding}"
                                    else:
                                        temp_encoding_str = ""

                                    cur_res = ResultSection(
                                        f'Embedded file found ({length} bytes) [obj: {obj} {version}] '
                                        f'and dumped for analysis {f"(Type: {otype}) " if otype is not None else ""}'
                                        f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}'
                                        f'{f"(Encoded with {encoding})" if encoding is not None else ""}'
                                    )

                                    temp_path_name = f"EmbeddedFile_{obj}{temp_encoding_str}.obj"
                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                    f = open(temp_path, "wb")
                                    f.write(data)
                                    f.close()
                                    f_list.append(temp_path)

                                    cur_res.add_line(f"The EmbeddedFile object was saved as {temp_path_name}")
                                    res_list.append(cur_res)

                            elif otype not in BANNED_TYPES:
                                cur_res = ResultSection(
                                    f'Unknown stream found [obj: {obj} {version}] '
                                    f'{f"(Type: {otype}) " if otype is not None else ""}'
                                    f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}'
                                    f'{f"(Encoded with {encoding})" if encoding is not None else ""}'
                                )
                                for line in val.splitlines():
                                    cur_res.add_line(line)

                                emb_res = ResultSection('First 256 bytes', parent=cur_res)
                                first_256 = data[:256]
                                if isinstance(first_256, str):
                                    first_256 = first_256.encode()
                                emb_res.set_body(hexdump(first_256), BODY_FORMAT.MEMORY_DUMP)
                                res_list.append(cur_res)
                        else:
                            pass

                file_res.add_section(res)

                for results in res_list:
                    file_res.add_section(results)

                if js_dump:
                    js_dump_res = ResultSection('Full JavaScript dump')

                    temp_js_dump = "javascript_dump.js"
                    temp_js_dump_path = os.path.join(self.working_directory, temp_js_dump)
                    try:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump).encode("utf-8")
                    except UnicodeDecodeError:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump)
                    temp_js_dump_sha1 = hashlib.sha1(temp_js_dump_bin).hexdigest()
                    f = open(temp_js_dump_path, "wb")
                    f.write(temp_js_dump_bin)
                    f.flush()
                    f.close()
                    f_list.append(temp_js_dump_path)

                    js_dump_res.add_line(f"The JavaScript dump was saved as {temp_js_dump}")
                    js_dump_res.add_line(f"The SHA-1 for the JavaScript dump is {temp_js_dump_sha1}")

                    js_dump_res.add_tag('file.pdf.javascript.sha1', temp_js_dump_sha1)
                    file_res.add_section(js_dump_res)

                for filename in f_list:
                    request.add_extracted(filename, os.path.basename(filename),
                                          f"Dumped from {os.path.basename(temp_filename)}")

            else:
                res = ResultSection("ERROR: Could not parse file with PeePDF.")
                file_res.add_section(res)
        finally:
            request.result = file_res
            try:
                del pdf_file
            except Exception:
                pass

            try:
                del pdf_parser
            except Exception:
                pass

            gc.collect()
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultTextSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # You can tag data to a section, tagging is used to to quickly find defining information about a file
            text_section.add_tag("attribution.implant", "ResultSample")
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id(
                random.choice(list(software_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(attack_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(group_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(revoke_map.keys())))
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=2)
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # Even if the section was added to the results you can still modify it by adding a subsection for example
            ResultSection(
                "Example of sub-section without a body added later in processing",
                parent=text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            cmap_values = [random.random() * cmap_max for _ in range(50)]
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultGraphSection(
                "Example of colormap result section",
                classification=cl_engine.RESTRICTED)
            section_color_map.set_colormap(cmap_min, cmap_max, cmap_values)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultURLSection('Example of a simple url section')
            url_section.add_url(f"https://{random_host}/", name="Random url!")

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            hosts = [get_random_host() for _ in range(2)]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(hosts))

            url_sub_section = ResultURLSection(
                'Example of a url sub-section with multiple links',
                heuristic=url_heuristic,
                classification=cl_engine.RESTRICTED)
            for host in hosts:
                url_sub_section.add_url(f"https://{host}/")
                url_sub_section.add_tag("network.static.domain", host)

            # You can keep nesting sections if you really need to
            ips = [get_random_ip() for _ in range(3)]
            url_sub_sub_section = ResultURLSection(
                'Exemple of a two level deep sub-section')
            for ip in ips:
                url_sub_sub_section.add_url(f"https://{ip}/")
                url_sub_sub_section.add_tag("network.static.ip", ip)

            # Since url_sub_sub_section is a sub-section of url_sub_section
            # we will add it as a sub-section of url_sub_section not to the main result itself
            url_sub_section.add_subsection(url_sub_sub_section)

            # Invalid sections will be ignored, and an error will apear in the logs
            # Sub-sections of invalid sections will be ignored too
            invalid_section = ResultSection("")
            ResultSection(
                "I won't make it to the report because my parent is invalid :(",
                parent=invalid_section)
            url_sub_section.add_subsection(invalid_section)

            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)

            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultMemoryDumpSection(
                'Example of a memory dump section', body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a dictionary (only str, int, and booleans are allowed)
            kv_section = ResultKeyValueSection(
                'Example of a KEY_VALUE section')
            # You can add items individually
            kv_section.set_item('key', "value")
            # Or simply add them in bulk
            kv_section.update_items({
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            })
            result.add_section(kv_section)

            # ==================================================================
            # ORDERED_KEY_VALUE section:
            #     This section provides the same functionality as the KEY_VALUE section except the order of the fields
            #     are garanteed to be preserved in the order in which the fields are added to the section. Also with
            #     this section, you can repeat the same key name multiple times
            oredered_kv_section = ResultOrderedKeyValueSection(
                'Example of an ORDERED_KEY_VALUE section')
            # You can add items individually
            for x in range(random.randint(3, 6)):
                oredered_kv_section.add_item(f'key{x}', f"value{x}")

            result.add_section(oredered_kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultJSONSection('Example of a JSON section')
            # You can set the json result to a specific value
            json_section.set_json(json_body)
            # You can also update specific parts after the fact
            json_section.update_json({
                'an_int': 1000,
                'updated_key': 'updated_value'
            })

            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "signatures": {}  This dict has the signature name as a key and the score as it's value
            #       "children": []    NB: This list either is empty or contains more dictionaries that have the same
            #                             structure
            #     }
            process_tree_section = ResultProcessTreeSection(
                'Example of a PROCESS_TREE section')
            # You can use the ProcessItem class to create the processes to add to the result section
            evil_process = ProcessItem(123, "evil.exe", "c:\\evil.exe")
            evil_process_child_1 = ProcessItem(
                321, "takeovercomputer.exe",
                "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff")
            # You can add child processes to the ProcessItem objects
            evil_process_child_1.add_child_process(
                ProcessItem(
                    456,
                    "evenworsethanbefore.exe",
                    "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                    signatures={
                        "one": 10,
                        "two": 10,
                        "three": 10
                    }))
            evil_process_child_1.add_child_process(
                ProcessItem(234,
                            "badfile.exe",
                            "C:\\badfile.exe -k nothing_to_see_here",
                            signatures={
                                "one": 1000,
                                "two": 10,
                                "three": 10,
                                "four": 10,
                                "five": 10
                            }))

            # You can add signatures that hit on a ProcessItem Object
            evil_process_child_1.add_signature('one', 250)

            # Or even directly create the ProcessItem object with the signature in it
            evil_process_child_2 = ProcessItem(
                345,
                "benignexe.exe",
                "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                signatures={"one": 2000})

            # You can also add counts for network, file and registry events to a ProcessItem object
            evil_process_child_2.add_network_events(4)
            evil_process_child_2.add_file_events(7000)
            evil_process_child_2.add_registry_events(10)

            # You can also indicate if the process tree item has been safelisted
            benign_process = ProcessItem(678, "trustme.exe", "C:\\trustme.exe")
            benign_process.safelist()

            evil_process.add_child_process(evil_process_child_1)
            evil_process.add_child_process(evil_process_child_2)

            # Add your processes to the result section via the add_process function
            process_tree_section.add_process(evil_process)
            process_tree_section.add_process(
                ProcessItem(987, "runzeroday.exe",
                            "C:\\runzeroday.exe -f insert_bad_spelling"))
            process_tree_section.add_process(benign_process)

            result.add_section(process_tree_section)

            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_section = ResultTableSection('Example of a TABLE section')
            # Use the TableRow class to help adding row to the Table section
            table_section.add_row(
                TableRow(a_str="Some string1",
                         extra_column_here="confirmed",
                         a_bool=False,
                         an_int=101))
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string2",
                        "a_bool": True,
                        "an_int": "to_be_overriden_by_kwargs"
                    },
                    an_int=102))
            table_section.add_row(
                TableRow(a_str="Some string3", a_bool=False, an_int=103))
            # Valid values for the items in the TableRow are: str, int, bool, None, or dict of those values
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string4",
                        "a_bool": None,
                        "an_int": -1000000000000000000
                    }, {
                        "extra_column_there": "confirmed",
                        "nested_key_value_pair": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "nested_kv_thats_too_deep": {
                                "a_str": "Some string3",
                                "a_bool": False,
                                "an_int": 103,
                            },
                        }
                    }))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__" + data.encode())
            request.add_extracted(temp_path,
                                  "classified.doc",
                                  "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(url_sub_section.body)
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Zeroize on safe tags
            #     When this feature is turned on, the section will get its score set to zero if all its tags
            #     were safelisted by the safelisting engine
            zero_section = ResultSection('Example of zeroize-able section',
                                         zeroize_on_tag_safe=True)
            zero_section.set_heuristic(2)
            zero_section.add_line(
                "This section will have a zero score if all tags are safelisted."
            )
            zero_section.add_tag('network.static.ip', '127.0.0.1')
            result.add_section(zero_section)

            # ==================================================================
            # Auto-collapse
            #     When this feature is turned on, the section will be collapsed when first displayed
            collapse_section = ResultSection(
                'Example of auto-collapse section', auto_collapse=True)
            collapse_section.set_heuristic(2)
            collapse_section.add_line(
                "This section was collapsed when first loaded in the UI")
            result.add_section(collapse_section)

            # ==================================================================
            # Image Section
            #     This type of section allows the service writer to display images to the user
            image_section = ResultImageSection(request,
                                               'Example of Image section')
            for x in range(6):
                image_section.add_image(f'data/000{x+1}.jpg',
                                        f'000{x+1}.jpg',
                                        f'ResultSample screenshot 000{x+1}',
                                        ocr_heuristic_id=6)
            result.add_section(image_section)

            # ==================================================================
            # Multi Section
            #     This type of section allows the service writer to display multiple section types
            #     in the same result section. Here's a concrete exemple of this:
            multi_section = ResultMultiSection(
                'Example of Multi-typed section')
            multi_section.add_section_part(
                TextSectionBody(
                    body="We have detected very high entropy multiple sections "
                    "of your file, this section is most-likely packed or "
                    "encrypted.\n\nHere are affected sections:"))
            section_count = random.randint(1, 4)
            for x in range(section_count):
                multi_section.add_section_part(
                    KVSectionBody(section_name=f".UPX{x}",
                                  offset=f'0x00{8+x}000',
                                  size='4196 bytes'))
                graph_part = GraphSectionBody()
                graph_part.set_colormap(
                    0, 8, [7 + random.random() for _ in range(20)])
                multi_section.add_section_part(graph_part)
                if x != section_count - 1:
                    multi_section.add_section_part(DividerSectionBody())
                multi_section.add_tag("file.pe.sections.name", f".UPX{x}")

            multi_section.set_heuristic(5)
            result.add_section(multi_section)

            # ==================================================================
            # Propagate temporary submission data to other services
            #   Sometimes two service can work in tandem were one extra some piece of information the other
            #   one uses to do it's work. This is how a service can set temporary data that other
            #   services that subscribe to can use.
            request.temp_submission_data['kv_section'] = kv_section.body
            request.temp_submission_data[
                'process_tree_section'] = process_tree_section.body
            request.temp_submission_data['url_section'] = url_sub_section.body

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
    def analyze_pdf(self,
                    request,
                    res_txt,
                    path,
                    working_dir,
                    heur,
                    additional_keywords,
                    get_malform=True):
        """Extract metadata, keyword objects and content of interest from a PDF sample using PDFId, PDFId plugins,
        and PDF Parser.

        Args:
            request: AL request object.
            res_txt: Header string for AL result section title.
            path: Original PDF sample path.
            working_dir: AL working directory.
            heur: List of plugins to run on PDFId results (provided in service configuration).
            additional_keywords: List of additional keywords to be searched (provided in service configuration).
            get_malform: Extract malformed objects from PDF.

        Returns:
            AL result object, AL heuristics list to add to result, list of object streams (objstms), and an errors list.
        """
        triage_keywords = set()
        all_errors = set()
        embed_present = False
        objstms = False
        res = ResultSection(title_text=res_txt)
        carved_extracted_shas = set()

        if request.deep_scan:
            run_pdfparse = True
        else:
            run_pdfparse = False

        # Run PDFId
        try:
            pdfid_result, errors = self.get_pdfid(path, additional_keywords,
                                                  heur, request.deep_scan)
        except Exception as e:
            raise NonRecoverableError(e)
        # Parse PDFId results
        pdfidres = ResultSection(title_text="PDFID Results", parent=res)
        if len(pdfid_result) == 0:
            pdfidres.add_line(
                "No results generated for file. Please see errors.")
        else:
            # Do not run for objstms, which are being analyzed when get_malform == False
            if get_malform:
                version = pdfid_result.get("PDFID", None)
                if version:
                    pdfidres.add_line(version[0])
                properties = pdfid_result.get("Properties", None)
                if properties:
                    pres = ResultSection(title_text="PDF Properties",
                                         parent=pdfidres)
                    for plist in properties:
                        pres.add_line("{0}: {1}".format(plist[0], plist[1]))
                        if plist[0] == "/ModDate":
                            pres.add_tag('file.pdf.date.modified', plist[1])
                        elif plist[0] == "/CreationDate":
                            pres.add_tag('file.date.creation', plist[1])
                        elif plist[0] == "/LastModified":
                            pres.add_tag('file.date.last_modified', plist[1])
                        elif plist[0] == "/SourceModified":
                            pres.add_tag('file.pdf.date.source_modified',
                                         plist[1])
                        elif plist[0] == "/pdfx":
                            pres.add_tag('file.pdf.date.pdfx', plist[1])
                entropy = pdfid_result.get("Entropy", None)
                if entropy:
                    enres = ResultSection(title_text="Entropy",
                                          parent=pdfidres)
                    for enlist in entropy:
                        enres.add_line("{0}: {1}, ({2})".format(
                            enlist[0], enlist[1], enlist[2]))
            flags = pdfid_result.get("Flags", None)
            if flags:
                fres = ResultSection(title_text="PDF Keyword Flags",
                                     parent=pdfidres)
                for flist in flags:
                    if flist[0] == "/ObjStm":
                        objstms = True
                    if len(flist) == 3:
                        fres.add_line(
                            "{0}:Count: {1}, Hex-Encoded Count: {2}".format(
                                flist[0], flist[1], flist[2]))
                    else:
                        fres.add_line("{0}:Count: {1}".format(
                            flist[0], flist[1]))
                    fres.add_tag('file.string.extracted',
                                 flist[0].replace("/", "", 1))
                    if flist[0] in additional_keywords:
                        triage_keywords.add(flist[0].replace("/", "", 1))

            plugin = pdfid_result.get("Plugin", [])

            # If any plugin results, or flagged keywords found, run PDF Parser
            if plugin or len(triage_keywords) > 0:
                run_pdfparse = True

            for pllist in plugin:
                pl_name, pl_heur, pl_text = pllist
                pl_heur = int(pl_heur)
                pl_text = pl_text[14:]
                if not pl_text or pl_text == "None":
                    continue

                if pl_name in ['EmbeddedFile', 'Name Obfuscation']:
                    modres = ResultSection(title_text=pl_text, parent=pdfidres)

                    if pl_heur > 0:
                        modres.set_heuristic(pl_heur)

                    if pl_name == 'EmbeddedFile':
                        embed_present = True

                elif pl_name in ['Triage', 'Suspicious Properties']:
                    javascript_found = False
                    for line in pl_text.splitlines():
                        lineres = ResultSection(title_text=line)
                        # Triage results
                        if '/JavaScript' in line:
                            triage_keywords.add('JavaScript')
                            if not javascript_found:
                                lineres.set_heuristic(19)
                                javascript_found = True
                        elif '/JS' in line:
                            triage_keywords.add('JS')
                            if not javascript_found:
                                lineres.set_heuristic(19)
                                javascript_found = True
                        elif '/JBIG2Decode' in line:
                            triage_keywords.add('JBIG2Decode')
                            lineres.set_heuristic(3)
                        elif '/Colors > 2^24' in line:
                            triage_keywords.add('Colors > 2^24')
                            lineres.set_heuristic(20)
                        elif '/AA' in line:
                            triage_keywords.add('AA')
                            lineres.set_heuristic(1)
                        elif '/Launch' in line:
                            triage_keywords.add('Launch')
                            lineres.set_heuristic(1)
                        elif '/OpenAction' in line:
                            triage_keywords.add('OpenAction')
                            lineres.set_heuristic(1)
                        elif '/GoToE' in line:
                            triage_keywords.add('GoToE')
                            lineres.set_heuristic(21)
                        elif '/GoToR' in line:
                            triage_keywords.add('GoToR')
                            lineres.set_heuristic(22)
                        elif '/Encrypt' in line:
                            triage_keywords.add('Encrypt')
                            lineres.set_heuristic(11)
                        elif '/AcroForm' in line:
                            triage_keywords.add('AcroForm')
                            lineres.set_heuristic(4)
                        elif '/RichMedia' in line:
                            triage_keywords.add('RichMedia')
                            lineres.set_heuristic(5)
                        elif '/XFA' in line:
                            triage_keywords.add('XFA')
                            lineres.set_heuristic(23)
                        elif '/Annot' in line:
                            triage_keywords.add('Annot')
                            lineres.set_heuristic(25)
                        elif '/ObjStm' in line:
                            triage_keywords.add('ObjStm')
                            lineres.set_heuristic(7)
                        elif '/URI' in line:
                            triage_keywords.add('URI')
                            lineres.set_heuristic(24)

                        # Suspicious properties results
                        elif "eof2" in line:
                            lineres.set_heuristic(2)
                        elif "eof5" in line:
                            lineres.set_heuristic(17)
                        elif "page" in line:
                            lineres.set_heuristic(26)
                        elif "entropy" in line:
                            lineres.set_heuristic(12)
                        elif "obj/endobj" in line:
                            lineres.set_heuristic(13)
                        elif "stream/endstream" in line:
                            lineres.set_heuristic(14)

                        if lineres.heuristic is not None:
                            pdfidres.add_subsection(lineres)

        for e in errors:
            all_errors.add(e)
            if e.startswith('Error running plugin'):
                self.log.warn(e)

        if run_pdfparse:
            # CALL PDF parser and extract further information
            pdf_parserres = ResultSection(title_text="PDF Parser Results")
            # STATISTICS
            # Do not run for objstms, which are being analyzed when get_malform == False
            if get_malform:
                options = {
                    "stats": True,
                }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                if pdf_parser_result:
                    if len(pdf_parser_result) == 0:
                        pdf_parserres.add_line(
                            "No statistical results generated for file. Please see errors."
                        )
                    else:
                        version = pdf_parser_result.get("version", None)
                        if version and version[0] != '0':
                            pdf_parserres.add_line(version[0])
                        stats = pdf_parser_result.get("stats", None)
                        if stats:
                            sres = ResultSection(
                                title_text="PDF Statistcs",
                                parent=pdf_parserres,
                                body_format=BODY_FORMAT.MEMORY_DUMP)
                            for p in stats:
                                sres.add_line(p)
                    for e in errors:
                        all_errors.add(e)

            # Triage plugin -- search sample for keywords and carve content or extract object (if it contains a stream)
            carved_content = {}  # Format { "objnum": [{keyword: content list}}
            obj_extract_triage = set()
            jbig_objs = set()

            for keyword in triage_keywords:
                # ObjStms handled differently
                if keyword == 'ObjStm':
                    continue

                options = {
                    "search": keyword,
                }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                if pdf_parser_result:
                    for p in pdf_parser_result['parts']:
                        content = ""
                        references = []
                        # Trailer will be extracted anyways, try and grab all references anyways -- will be messy
                        if p.startswith("trailer:"):
                            # Grab the content after the keyword
                            # Check that keyword actually in content
                            if "/{}".format(keyword) in p:
                                try:
                                    content = p.split(keyword, 1)[1].replace(
                                        '>>++>>', '').split("/", 1)[0].strip()
                                    references = re.findall(
                                        "[0-9]* [0-9]* R", content)
                                except Exception:
                                    continue
                        # If not trailer, should be object
                        elif 'Referencing:' in p:
                            # Grab the content after the keyword
                            if '>>++>>' in p:
                                try:
                                    content = p.split(keyword, 1)[1].replace(
                                        '>>++>>', '').strip()
                                except Exception:
                                    try:
                                        content = p.split("\n", 3)[3]
                                    except Exception:
                                        content = p
                            else:
                                try:
                                    content = p.split("\n", 3)[3]
                                except Exception:
                                    content = p
                            # Sometimes the content is the same keyword with references (i.e "/URI /URI 10 0 R"
                            if content.startswith("/{}".format(keyword)):
                                try:
                                    content = re.sub("/{}[ ]*".format(keyword),
                                                     "", content, 1)
                                except Exception:
                                    pass
                            try:
                                references = p.split("\n", 3)[2].replace(
                                    'Referencing:', '').strip().split(", ")
                            except Exception:
                                pass
                        # Only extract JBIG2Decode objects with deep scan, but always report on their presence
                        if keyword == "JBIG2Decode" and "/Filter" in p and "Contains stream" in p:
                            try:
                                objnum = p.split("\n", 1)[0].split(" ")[1]
                                if request.deep_scan:
                                    obj_extract_triage.add(objnum)
                                jbig_objs.add(objnum)
                                continue
                            except Exception as e:
                                self.log.debug(e)
                                continue
                        # If no content, then keyword likely points to reference objects, so grab those
                        if content == '':
                            if len(references) > 0:
                                content = references
                            else:
                                # Something is wrong, drop it.
                                continue
                        else:
                            while True:
                                # Multiple references might be in a list, i.e. /Annot # # R vs. /Annots [# # R # # R]
                                islist = re.match(
                                    r"[s]?[ ]?\[([0-9]* [0-9]* R[ \\rn]{0,8})*\]",
                                    content)
                                if islist:
                                    content = re.sub(
                                        r"[\[\]]", "",
                                        islist.group(0).replace(
                                            "s ", '').replace("R ",
                                                              "R,")).split(",")
                                    break
                                # References might be with instructions, i.e. [# # R /FitH null]
                                withinst = re.match(
                                    r"[s]?[ \\']{0,3}\[[ ]?([0-9]* [0-9]* R)[ \\rn]{1,8}"
                                    r"[/a-zA-Z0-9 ]*[ ]?\]", content)
                                if withinst:
                                    content = [withinst.group(1)]
                                    break
                                content = [content]
                                break
                        for c in content:
                            # If keyword = Javascript and content starts with '/JS', disregard as 'JS' will be extracted
                            if "JS" in triage_keywords and keyword == "JavaScript" and "/JS" in c[
                                    0:5]:
                                continue
                            if c in references or re.match(
                                    "[0-9]* [0-9]* R", c):
                                try:
                                    ref_obj = c.split(" ", 1)[0]
                                    options = {
                                        "object": ref_obj,
                                        "get_object_detail": True
                                    }
                                    pdf_parser_subresult, err = self.get_pdf_parser(
                                        path, working_dir, options)

                                    if pdf_parser_subresult:
                                        for sub_p in pdf_parser_subresult[
                                                'parts']:
                                            sub_references = sub_p.split("\n", 3)[2].replace('Referencing:', '')\
                                                .strip().split(", ")
                                            ptyp = sub_p.split(
                                                "\n", 2)[1].replace(
                                                    'Type:',
                                                    '').strip().replace(
                                                        "/", "")
                                            # If the object contains a stream, extract the object.
                                            if "Contains stream" in sub_p:
                                                try:
                                                    objnum = sub_p.split(
                                                        "\n",
                                                        1)[0].split(" ")[1]
                                                    obj_extract_triage.add(
                                                        objnum)
                                                except Exception:
                                                    pass
                                            # Or if the object Type is the keyword, grab all referenced objects.
                                            elif sub_references[0] != '' and len(sub_references) >= 1 \
                                                    and ptyp == keyword:
                                                for sr in sub_references:
                                                    try:
                                                        objnum = sr.split(
                                                            " ", 1)[0]
                                                        obj_extract_triage.add(
                                                            objnum)
                                                    except Exception:
                                                        pass
                                            # If not, extract object detail in to carved output
                                            elif pdf_parser_subresult[
                                                    'obj_details'] != "":
                                                try:
                                                    objnum = sub_p.split(
                                                        "\n",
                                                        1)[0].split(" ")[1]
                                                    if objnum in carved_content:
                                                        carved_content[objnum]\
                                                            .append({keyword: pdf_parser_subresult['obj_details']})
                                                    else:
                                                        carved_content[objnum] = \
                                                            [{keyword: pdf_parser_subresult['obj_details']}]
                                                except Exception:
                                                    continue

                                    for e in err:
                                        errors.add(e)
                                except Exception:
                                    # If none of that work, just extract the original object for examination.
                                    try:
                                        objnum = p.split("\n",
                                                         1)[0].split(" ")[1]
                                        obj_extract_triage.add(objnum)
                                    except Exception:
                                        pass
                            # If content does not look like a reference:
                            else:
                                if p.startswith("trailer:"):
                                    continue
                                objnum = p.split("\n", 1)[0].split(" ")[1]
                                # If the object contains a stream extract the object
                                if p.split("\n", 4)[3] == "Contains stream":
                                    obj_extract_triage.add(objnum)
                                else:
                                    # Or just carve the content
                                    if objnum in carved_content:
                                        carved_content[objnum].append(
                                            {keyword: c})
                                    else:
                                        carved_content[objnum] = [{keyword: c}]

                    for e in errors:
                        all_errors.add(e)

            # Add carved content to result output
            show_content_of_interest = False
            if len(carved_content) > 0 or len(jbig_objs) > 0:
                carres = ResultSection(title_text="Content of Interest")
            else:
                carres = None

            if len(jbig_objs) > 0:
                jbigres = ResultSection(
                    title_text=
                    "The following Object IDs are JBIG2DECODE streams:",
                    body_format=BODY_FORMAT.MEMORY_DUMP,
                    parent=carres)
                jbigres.add_line(', '.join(map(str, jbig_objs)))
                show_content_of_interest = True

            if len(carved_content) > 0:
                for k, l in sorted(carved_content.items()):
                    for d in l:
                        for keyw, con in d.items():
                            subres = ResultSection(
                                title_text="Object {0}: Hits for Keyword '{1}':"
                                .format(k, keyw))
                            subres.set_heuristic(8)

                            con_bytes = con.encode()
                            if len(con) < 500:
                                subres.body_format = BODY_FORMAT.MEMORY_DUMP
                                subres.add_line(con)

                                # Check for IOC content
                                patterns = PatternMatch()
                                st_value = patterns.ioc_match(con_bytes,
                                                              bogon_ip=True)
                                if len(st_value) > 0:
                                    carres.add_subsection(subres)
                                    show_content_of_interest = True
                                    for ty, val in st_value.items():
                                        if val == "":
                                            asc_asc = unicodedata.normalize(
                                                'NFKC',
                                                val).encode('ascii', 'ignore')
                                            subres.add_tag(ty, asc_asc)
                                        else:
                                            ulis = list(set(val))
                                            for v in ulis:
                                                subres.add_tag(ty, v)
                            else:
                                crv_sha = hashlib.sha256(con_bytes).hexdigest()

                                if crv_sha not in carved_extracted_shas:
                                    f_name = "carved_content_obj_{}_{}".format(
                                        k, crv_sha[0:7])
                                    subres.add_lines([
                                        "Content over 500 bytes it will be extracted for analysis",
                                        "Name: {} - SHA256: {}".format(
                                            f_name, crv_sha)
                                    ])
                                    carres.add_subsection(subres)
                                    show_content_of_interest = True
                                    crvf = os.path.join(
                                        self.working_directory, f_name)
                                    with open(crvf, 'wb') as f:
                                        f.write(con_bytes)
                                    request.add_extracted(
                                        crvf, os.path.basename(crvf),
                                        "Extracted content from object {}".
                                        format(k))
                                    carved_extracted_shas.add(crv_sha)

            if show_content_of_interest:
                pdf_parserres.add_subsection(carres)

            # ELEMENTS
            # Do not show for objstms
            if get_malform:
                if request.deep_scan:
                    options = {
                        "verbose": True,
                        "nocanonicalizedoutput": True,
                        "get_malform": get_malform
                    }
                elif embed_present:
                    options = {
                        "verbose": True,
                        "elements": "ctsi",
                        "type": "/EmbeddedFile",
                        "get_malform": get_malform
                    }
                else:
                    options = {
                        "verbose": True,
                        "elements": "cst",
                        "get_malform": get_malform
                    }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                embed_extracted = set()
                if pdf_parser_result:
                    if len(pdf_parser_result) == 0:
                        pdf_parserres.add_line(
                            "No structure information generated for file. Please see errors."
                        )
                    else:
                        # PDF Parser will write any malformed content over 100 bytes to a file
                        files = pdf_parser_result.get("files", None)
                        if files:
                            for f, l in files.items():
                                if f == 'malformed':
                                    if len(l) > 0:
                                        pdf_parserres.set_heuristic(6)
                                    for i in l:
                                        request.add_extracted(
                                            i, os.path.basename(i),
                                            "Extracted malformed content in PDF Parser Analysis."
                                        )

                        parts = pdf_parser_result.get("parts", None)
                        # Extract service will extract the sample's embedded files.
                        # However we want to make note of them so that they are not extracted again below
                        if parts:
                            for p in sorted(parts):
                                if "Type: /EmbeddedFile" in p:
                                    getobj = p.split("\n", 1)[0].split(" ")[1]
                                    embed_extracted.add(getobj)

                # Extract objects collected from above analysis
                obj_to_extract = obj_extract_triage - embed_extracted - jbig_objs

                if len(obj_to_extract) > 0:
                    options = {
                        "filter": True,
                        "object": obj_to_extract,
                        "dump": "extracted_obj_",
                    }
                    pdf_parser_result, errors = self.get_pdf_parser(
                        path, working_dir, options)

                    if pdf_parser_result:
                        files = pdf_parser_result.get("files", None)
                        extracted_files = []
                        if files:
                            for f, l in files.items():
                                if f == 'embedded':
                                    for i in l:
                                        f_name = os.path.basename(i)
                                        obj_id = f_name.replace(
                                            "extracted_obj_", "")
                                        extracted_files.append(
                                            "Extracted object {} as {}".format(
                                                obj_id, f_name))
                                        request.add_extracted(
                                            i, f_name,
                                            "Object {} extracted in PDF Parser Analysis."
                                            .format(obj_id))
                        for e in errors:
                            all_errors.add(e)

                        if extracted_files:
                            extract_res = ResultSection(
                                title_text="Extracted embedded objects",
                                parent=pdf_parserres)
                            extract_res.set_heuristic(9)
                            extract_res.add_lines(extracted_files)

                # Extract jbig2decode objects in deep scan mode
                if request.deep_scan and len(jbig_objs) > 0:
                    options = {
                        "object": jbig_objs,
                        "dump": "extracted_jb_obj_",
                    }
                    pdf_parser_result, errors = self.get_pdf_parser(
                        path, working_dir, options)

                    if pdf_parser_result:
                        extracted_jb = []
                        files = pdf_parser_result.get("files", None)
                        if files:
                            for f, l in files.items():
                                if f == 'embedded':
                                    for i in l:
                                        f_name = os.path.basename(i)
                                        obj_id = f_name.replace(
                                            "extracted_jb_obj_", "")
                                        extracted_jb.append(
                                            "JBIG2DECODE object {} extracted as {}"
                                            .format(obj_id, f_name))
                                        request.add_extracted(
                                            i, f_name,
                                            "JBIG2DECODE object {} extracted in PDF Parser Analysis."
                                            .format(obj_id))

                        for e in errors:
                            all_errors.add(e)

                        if extracted_jb:
                            jbig_extract_res = ResultSection(
                                title_text="Extracted JBIG2Decode objects",
                                parent=pdf_parserres)
                            jbig_extract_res.set_heuristic(9)
                            jbig_extract_res.add_lines(extracted_jb)

            if len(pdf_parserres.subsections) > 0:
                res.add_subsection(pdf_parserres)

        return res, objstms, all_errors
    def test_process_ttps(intezer_static_class_instance,
                          dummy_api_interface_class, mocker):
        from intezer_static import ALIntezerApi
        from intezer_sdk.api import IntezerApi
        from intezer_sdk.errors import UnsupportedOnPremiseVersion
        from assemblyline_v4_service.common.result import ResultSection, ResultTableSection, TableRow
        from requests import HTTPError
        mocker.patch.object(intezer_static_class_instance,
                            "get_api_interface",
                            return_value=dummy_api_interface_class)
        intezer_static_class_instance.start()
        parent_res_sec = ResultSection("blah")

        mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(IntezerApi,
                            "get_dynamic_ttps",
                            side_effect=HTTPError("FORBIDDEN"))
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(IntezerApi,
                            "get_dynamic_ttps",
                            side_effect=UnsupportedOnPremiseVersion())
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "blah",
                                "description": "blah",
                                "data": [],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: blah", "blah")
        correct_res_sec.set_heuristic(4)
        correct_res_sec.heuristic.add_signature_id("blah", 10)
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "InjectionInterProcess",
                                "description": "blah",
                                "data": [],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: InjectionInterProcess",
                                        "blah")
        correct_res_sec.set_heuristic(7)
        correct_res_sec.heuristic.add_signature_id("InjectionInterProcess", 10)
        correct_res_sec.heuristic.add_attack_id("T1055")
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "enumerates_running_processes",
                                "description": "blah",
                                "data": [{
                                    "wow": "print me!"
                                }],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection(
            "Signature: enumerates_running_processes", "blah")
        correct_res_sec.set_heuristic(8)
        correct_res_sec.heuristic.add_signature_id(
            "enumerates_running_processes", 10)
        correct_res_sec.heuristic.add_attack_id("T1057")
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name":
                                "blah",
                                "description":
                                "blah",
                                "data": [
                                    {
                                        "IP": "blah 2.2.2.2 blah"
                                    },
                                ],
                                "severity":
                                1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: blah", "blah")
        correct_res_sec.add_line("\tIP: blah 2.2.2.2 blah")
        correct_res_sec.set_heuristic(4)
        correct_res_sec.heuristic.add_signature_id("blah", 10)
        correct_ioc_res_sec = ResultTableSection(
            "IOCs found in signature marks")
        correct_ioc_res_sec.add_row(TableRow(ioc_type="ip", ioc="2.2.2.2"))
        correct_ioc_res_sec.add_tag("network.dynamic.ip", "2.2.2.2")
        correct_res_sec.add_subsection(correct_ioc_res_sec)
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)
    def execute(self, request):
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()
        before = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values:
            if request.get_param('extract_original_iocs'):
                ioc_res = ResultSection(
                    "The following IOCs were found in the original file",
                    parent=request.result,
                    body_format=BODY_FORMAT.MEMORY_DUMP)
            else:
                ioc_res = None
            for k, val in pat_values.items():
                if val == "":
                    asc_asc = unicodedata.normalize('NFKC', val).encode(
                        'ascii', 'ignore')
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(asc_asc)}"
                        )
                        ioc_res.add_tag(k, asc_asc)
                    before.add((k, asc_asc))
                else:
                    for v in val:
                        if ioc_res:
                            ioc_res.add_line(
                                f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_res.add_tag(k, v)
                        before.add((k, v))

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if re.match(re.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            for name, technique in techniques:
                res = technique(layer)
                if res:
                    layers_list.append((name, res))
                    # Looks like it worked, restart with new layer
                    layer = res
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    for name, technique in final_pass:
                        res = technique(layer)
                        if res:
                            layers_list.append((name, res))
                    break
                else:
                    for x in second_pass:
                        techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)
            heur_id = None

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            elif num_layers >= 100:
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags = {}

                for k, val in pat_values.items():
                    if val == "":
                        asc_asc = unicodedata.normalize('NFKC', val).encode(
                            'ascii', 'ignore')
                        if (k, asc_asc) not in before:
                            diff_tags.setdefault(k, [])
                            diff_tags[k].append(asc_asc)
                    else:
                        for v in val:
                            if (k, v) not in before:
                                diff_tags.setdefault(k, [])
                                diff_tags[k].append(v)

                if request.deep_scan or \
                        (len(clean) > 1000 and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    fn = f"{request.file_name}_decoded_final"
                    fp = os.path.join(self.working_directory, fn)
                    with open(fp, 'wb') as dcf:
                        dcf.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {fp}")
                    request.add_extracted(fp, fn, "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for f in self.files_extracted:
                        ext_file_res.add_line(os.path.basename(f))
                        request.add_extracted(
                            f, os.path.basename(f),
                            "File of interest deobfuscated from sample")
Example #30
0
    def _add_resultinfo_for_match(self, result: Result, match):
        """
        Parse from Yara signature match and add information to the overall AL service result. This module determines
        result score and identifies any AL tags that should be added (i.e. IMPLANT_NAME, THREAT_ACTOR, etc.).

        Args:
            result: AL ResultSection object.
            match: Yara rules Match object item.

        Returns:
            None.
        """
        almeta = YaraMetadata(match)
        self._normalize_metadata(almeta)

        section = ResultSection('', classification=almeta.classification)
        if self.deep_scan or almeta.al_status != "NOISY":
            section.set_heuristic(self.YARA_HEURISTICS_MAP.get(
                almeta.category, 1),
                                  signature=f'{match.namespace}.{match.rule}',
                                  attack_id=almeta.mitre_att)
        section.add_tag(f'file.rule.{self.name.lower()}',
                        f'{match.namespace}.{match.rule}')

        title_elements = [
            f"[{match.namespace}] {match.rule}",
        ]

        if almeta.actor_type:
            section.add_tag('attribution.actor', almeta.actor_type)

        for tag in almeta.tags:
            section.add_tag(tag['type'], tag['value'])

        # Malware Tags
        implant_title_elements = []
        for (implant_name, implant_family) in almeta.malwares:
            if implant_name:
                implant_title_elements.append(implant_name)
                section.add_tag('attribution.implant', implant_name)
            if implant_family:
                implant_title_elements.append(implant_family)
                section.add_tag('attribution.family', implant_family)
        if implant_title_elements:
            title_elements.append(
                f"- Implant(s): {', '.join(implant_title_elements)}")

        # Threat Actor metadata
        for actor in almeta.actors:
            title_elements.append(actor)
            section.add_tag('attribution.actor', actor)

        # Exploit / CVE metadata
        if almeta.exploits:
            title_elements.append(
                f"- Exploit(s): {', '.join(almeta.exploits)}")
        for exploit in almeta.exploits:
            section.add_tag('attribution.exploit', exploit)

        # Include technique descriptions in the section behavior
        for (category, name) in almeta.techniques:
            descriptor = self.TECHNIQUE_DESCRIPTORS.get(category, None)
            if descriptor:
                technique_type, technique_description = descriptor
                section.add_tag(technique_type, name)
                almeta.behavior.add(technique_description)

        for (category, name) in almeta.infos:
            descriptor = self.INFO_DESCRIPTORS.get(category, None)
            if descriptor:
                info_type, info_description = descriptor
                section.add_tag(info_type, name)
                almeta.behavior.add(info_description)

        # Summaries
        if almeta.behavior:
            title_elements.append(f"- Behavior: {', '.join(almeta.behavior)}")
        for element in almeta.behavior:
            section.add_tag('file.behavior', element)

        title = " ".join(title_elements)
        section.title_text = title

        json_body = dict(name=match.rule, )

        for item in [
                'id', 'version', 'author', 'description', 'source', 'malware',
                'info', 'technique', 'tool', 'exploit', 'actor', 'category',
                'mitre_att'
        ]:
            val = almeta.__dict__.get(item, None)
            if val:
                json_body[item] = val

        string_match_data = self._add_string_match_data(match)
        if string_match_data:
            json_body['string_hits'] = string_match_data

        section.set_body(json.dumps(json_body),
                         body_format=BODY_FORMAT.KEY_VALUE)

        result.add_section(section)