Example #1
0
def stack_result(section: List[bytes]) -> Optional[ResultSection]:
    """ Generates a ResultSection from floss stacked strings output section """
    result = ResultSection('FLARE FLOSS Sacked Strings',
                           body_format=BODY_FORMAT.MEMORY_DUMP,
                           heuristic=Heuristic(3))
    assert result.heuristic
    strings = section[1:]

    if not strings:
        return None

    groups = group_strings(s.decode() for s in strings)
    for group in groups:
        res = ResultSection(
            f"Group: '{min(group, key=len)}' Strings: {len(group)}",
            body='\n'.join(group),
            body_format=BODY_FORMAT.MEMORY_DUMP)
        for string in group:
            ioc_tag(string.encode(), res, just_network=len(group) > 1000)
        result.add_subsection(res)

    if any(res.tags for res in result.subsections):
        result.heuristic.add_signature_id('stacked_ioc')

    return result
    def _create_random_section(self):
        # choose a random body format
        body_format = random.choice(FORMAT_LIST)

        # create a section with a random title
        section = ResultSection(get_random_phrase(3, 7), body_format=body_format)

        # choose random amount of lines in the body
        for _ in range(1, 5):
            # generate random line
            section.add_line(get_random_phrase(5, 10))

        # choose random amount of tags
        tags = flatten(get_random_tags())
        for key, val in tags.items():
            for v in val:
                section.add_tag(key, v)

        # set a heuristic a third of the time
        if random.choice([False, False, True]):
            section.set_heuristic(random.randint(1, 4))

        # Create random sub-sections
        if random.choice([False, False, True]):
            section.add_subsection(self._create_random_section())

        return section
Example #3
0
    def parse_results(response: Dict[str, Any]):
        res = Result()
        response = response['data']

        url_section = ResultSection('VirusTotal report permalink',
                                    body_format=BODY_FORMAT.URL,
                                    body=json.dumps(
                                        {"url": response['links']['self']}))
        res.add_section(url_section)
        response = response['attributes']
        scans = response['last_analysis_results']
        av_hits = ResultSection('Anti-Virus Detections')
        av_hits.add_line(
            f'Found {response["last_analysis_stats"]["malicious"]} AV hit(s) from '
            f'{len(response["last_analysis_results"].keys())}')
        for majorkey, subdict in sorted(scans.items()):
            if subdict['category'] == "malicious":
                virus_name = subdict['result']
                av_hit_section = AvHitSection(majorkey, virus_name)
                av_hit_section.set_heuristic(
                    1, signature=f'{majorkey}.{virus_name}')
                av_hit_section.add_tag('av.virus_name', virus_name)
                av_hits.add_subsection(av_hit_section)

        res.add_section(av_hits)

        return res
 def test_section_traverser(tags, correct_tags):
     from assemblyline_v4_service.common.section_reducer import _section_traverser
     from assemblyline_v4_service.common.result import ResultSection
     section = ResultSection("blah")
     subsection = ResultSection("subblah")
     subsection.tags = tags
     section.add_subsection(subsection)
     assert _section_traverser(section).subsections[0].tags == correct_tags
 def test_section_traverser(tags, correct_tags):
     from assemblyline_v4_service.common.section_reducer import _section_traverser
     from assemblyline_v4_service.common.result import ResultSection
     section = ResultSection("blah")
     subsection = ResultSection("subblah")
     for t_type, t_values in tags.items():
         for t_value in t_values:
             subsection.add_tag(t_type, t_value)
     section.add_subsection(subsection)
     assert _section_traverser(section).subsections[0].tags == correct_tags
Example #6
0
 def test_parse_results(response, correct_res_secs,
                        metadefender_class_instance):
     from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic
     metadefender_class_instance.blocklist = ["a"]
     metadefender_class_instance.sig_score_revision_map = {}
     metadefender_class_instance.kw_score_revision_map = {}
     metadefender_class_instance.current_node = "http://blah"
     metadefender_class_instance.nodes[
         metadefender_class_instance.current_node] = {
             "engine_map": {
                 "z": {
                     "version": "blah",
                     "def_time": "blah"
                 },
                 "y": {
                     "version": "blah",
                     "def_time": "blah"
                 }
             },
             "queue_times": [],
             "file_count": 0
         }
     correct_result = Result()
     for correct_res_sec in correct_res_secs:
         section = ResultSection(
             correct_res_sec["title_text"],
             body_format=BODY_FORMAT.TEXT if
             not correct_res_sec.get("body_format") else BODY_FORMAT.JSON,
             body=correct_res_sec.get("body"))
         for subsec in correct_res_sec.get("subsections", []):
             subsection = ResultSection(
                 subsec["title_text"],
                 body=subsec["body"],
                 body_format=BODY_FORMAT.KEY_VALUE,
                 tags=subsec.get("tags"),
             )
             if subsec.get("heuristic"):
                 subsection.set_heuristic(subsec["heuristic"]["heur_id"])
                 print(subsec["heuristic"]["signatures"])
                 for key in subsec["heuristic"]["signatures"].keys():
                     subsection.heuristic.add_signature_id(key)
             section.add_subsection(subsection)
         correct_result.add_section(section)
     actual_result = metadefender_class_instance.parse_results(response)
     for index, section in enumerate(actual_result.sections):
         assert check_section_equality(section,
                                       correct_result.sections[index])
Example #7
0
    def _process_iocs(
        self,
        analysis_id: str,
        file_verdict_map: Dict[str, str],
        parent_result_section: ResultSection,
    ) -> None:
        """
        This method retrieves and parses IOCs for an analysis
        :param analysis_id: The ID for the analysis which we will be retrieving
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_result_section: The result section that the network
        result section will be added to, if applicable
        :return: None
        """
        iocs = self.client.get_iocs(analysis_id)
        file_iocs = iocs["files"]
        network_iocs = iocs["network"]

        if file_iocs:
            for file in file_iocs:
                file_verdict_map[file["sha256"]] = file["verdict"]

        if network_iocs:
            network_section = ResultTextSection(
                "Network Communication Observed")
            for network in network_iocs:
                ioc = network["ioc"]
                type = network["type"]
                if type == NetworkIOCTypes.IP.value:
                    network_section.add_tag("network.dynamic.ip", ioc)
                elif type == NetworkIOCTypes.DOMAIN.value:
                    network_section.add_tag("network.dynamic.domain", ioc)
                elif type not in NetworkIOCTypes.TYPES.value:
                    self.log.debug(
                        f"The network IOC type of {type} is not in {NetworkIOCTypes.TYPES.value}. Network item: {network}"
                    )
                network_section.add_line(f"IOC: {ioc}")
            parent_result_section.add_subsection(network_section)
Example #8
0
def subsection_builder(parent_section: ResultSection = None,
                       fields: dict = {}):
    for mwcp_field, mwcp_field_data in fields.items():
        if mwcp_field in FIELD_TAG_MAP:
            tag = FIELD_TAG_MAP[mwcp_field]
            table_body = []
            table_section = ResultSection(
                f"Extracted {mwcp_field.capitalize()}")
            if tag:
                for x in mwcp_field_data:
                    table_section.add_tag(tag, x)
                # Tag everything that we can
            # Add data to section body
            for line in mwcp_field_data:
                if type(line) is str:
                    table_body.append({mwcp_field: line})
                elif type(line) is list:
                    for item in line:
                        table_body.append({mwcp_field: item})
            table_section.set_body(body_format=BODY_FORMAT.TABLE,
                                   body=json.dumps(table_body))

            parent_section.add_subsection(table_section)
Example #9
0
    def _handle_artefact(artefact: Artefact = None,
                         artefacts_result_section: ResultSection = None):
        if artefact is None:
            raise Exception("Artefact cannot be None")

        # This is a dict who's key-value pairs follow the format {regex: result_section_title}
        artefact_map = {
            HOLLOWSHUNTER_EXE_REGEX:
            "HollowsHunter Injected Portable Executable",
            HOLLOWSHUNTER_SHC_REGEX: "HollowsHunter Shellcode",
            HOLLOWSHUNTER_DLL_REGEX: "HollowsHunter DLL",
        }
        artefact_result_section = None

        for regex, title in artefact_map.items():
            pattern = compile(regex)
            if pattern.match(artefact.name):
                artefact_result_section = ResultSection(title)
                artefact_result_section.add_tag("dynamic.process.file_name",
                                                artefact.path)

        if artefact_result_section is not None:
            artefacts_result_section.add_subsection(artefact_result_section)
 def execute(self, request: ServiceRequest) -> None:
     result = Result()
     self.hits = {}  # clear the hits dict
     path = request.file_path
     file_name = request.file_name
     self.log.info(f" Executing {file_name}")
     self.log.info(f"Number of rules {len(self.sigma_parser.rules)}")
     self.sigma_parser.register_callback(self.sigma_hit)
     self.sigma_parser.check_logfile(path)
     if len(self.hits) > 0:
         hit_section = ResultSection('Events detected as suspicious')
         # group alerts together
         for id, events in self.hits.items():
             title = self.sigma_parser.rules[id].title
             section = SigmaHitSection(title, events)
             tags = self.sigma_parser.rules[id].tags
             if tags:
                 for tag in tags:
                     name = tag[7:]
                     if name.startswith(('t', 'g', 's')):
                         attack_id = name.upper()
             source = events[0]['signature_source']
             if attack_id:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       attack_id=attack_id,
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             else:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             for event in events:
                 # add the event data as a subsection
                 section.add_subsection(EventDataSection(event))
             hit_section.add_subsection(section)
         result.add_section(hit_section)
     request.result = result
Example #11
0
    def _process_families(self, families: List[Dict[str, str]],
                          sub_sha256: str, file_verdict_map: Dict[str, str],
                          parent_section: ResultSection) -> None:
        """
        This method handles the "families" list, cutting out boring details and assigning verdicts
        :param families: A list of details for families
        :param sub_sha256: The hash of the sub analysis file
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_section: The result section that the network
        :return: None
        """
        family_section = ResultTableSection("Family Details")
        for family in families:
            processed_family = self._process_details(
                family.copy(), UNINTERESTING_FAMILY_KEYS)
            family_section.add_row(TableRow(**processed_family))
            family_type = family["family_type"]
            if family_type not in FAMILIES_TO_NOT_TAG:
                family_section.add_tag("attribution.family",
                                       family["family_name"])

            # Overwrite value if not malicious
            if family_type in MALICIOUS_FAMILY_TYPES and (
                    sub_sha256 not in file_verdict_map or
                    file_verdict_map[sub_sha256] != Verdicts.MALICIOUS.value):
                file_verdict_map[sub_sha256] = Verdicts.MALICIOUS.value

            # Only overwrite value if value is not already malicious
            elif family_type in SUSPICIOUS_FAMILY_TYPES and (
                    sub_sha256 not in file_verdict_map
                    or file_verdict_map[sub_sha256]
                    not in Verdicts.MALICIOUS_VERDICTS.value):
                file_verdict_map[sub_sha256] = Verdicts.SUSPICIOUS.value

        if family_section.body:
            parent_section.add_subsection(family_section)
Example #12
0
    def _process_ttps(
        self,
        analysis_id: str,
        parent_result_section: ResultSection,
    ) -> None:
        """
        This method retrieves and parses TTPs for an analysis
        :param analysis_id: The ID for the analysis which we will be retrieving
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_result_section: The result section that the network
        result section will be added to, if applicable
        :return: None
        """
        # Note: These TTPs are essentially signatures
        ttps = self.client.get_dynamic_ttps(analysis_id)

        if not ttps:
            return

        sigs_res = ResultSection("Signatures")
        for ttp in ttps:
            sig_name = ttp['name']
            sig_res = ResultTextSection(f"Signature: {sig_name}")
            sig_res.add_line(ttp['description'])

            heur_id = get_heur_id_for_signature_name(sig_name)
            if heur_id == GENERIC_HEURISTIC_ID:
                self.log.debug(
                    f"{sig_name} does not have a category assigned to it")

            sig_res.set_heuristic(heur_id)
            sig_res.heuristic.add_signature_id(
                sig_name, TTP_SEVERITY_TRANSLATION[ttp['severity']])

            for aid in get_attack_ids_for_signature_name(sig_name):
                sig_res.heuristic.add_attack_id(aid)

            if sig_name in SILENT_SIGNATURES:
                sigs_res.add_subsection(sig_res)
                continue

            ioc_table = ResultTableSection("IOCs found in signature marks")
            self._process_ttp_data(ttp['data'], sig_res, ioc_table)

            if ioc_table.body:
                sig_res.add_subsection(ioc_table)

            sigs_res.add_subsection(sig_res)

        if sigs_res.subsections:
            parent_result_section.add_subsection(sigs_res)
    def analyze_pdf(self,
                    request,
                    res_txt,
                    path,
                    working_dir,
                    heur,
                    additional_keywords,
                    get_malform=True):
        """Extract metadata, keyword objects and content of interest from a PDF sample using PDFId, PDFId plugins,
        and PDF Parser.

        Args:
            request: AL request object.
            res_txt: Header string for AL result section title.
            path: Original PDF sample path.
            working_dir: AL working directory.
            heur: List of plugins to run on PDFId results (provided in service configuration).
            additional_keywords: List of additional keywords to be searched (provided in service configuration).
            get_malform: Extract malformed objects from PDF.

        Returns:
            AL result object, AL heuristics list to add to result, list of object streams (objstms), and an errors list.
        """
        triage_keywords = set()
        all_errors = set()
        embed_present = False
        objstms = False
        res = ResultSection(title_text=res_txt)
        carved_extracted_shas = set()

        if request.deep_scan:
            run_pdfparse = True
        else:
            run_pdfparse = False

        # Run PDFId
        try:
            pdfid_result, errors = self.get_pdfid(path, additional_keywords,
                                                  heur, request.deep_scan)
        except Exception as e:
            raise NonRecoverableError(e)
        # Parse PDFId results
        pdfidres = ResultSection(title_text="PDFID Results", parent=res)
        if len(pdfid_result) == 0:
            pdfidres.add_line(
                "No results generated for file. Please see errors.")
        else:
            # Do not run for objstms, which are being analyzed when get_malform == False
            if get_malform:
                version = pdfid_result.get("PDFID", None)
                if version:
                    pdfidres.add_line(version[0])
                properties = pdfid_result.get("Properties", None)
                if properties:
                    pres = ResultSection(title_text="PDF Properties",
                                         parent=pdfidres)
                    for plist in properties:
                        pres.add_line("{0}: {1}".format(plist[0], plist[1]))
                        if plist[0] == "/ModDate":
                            pres.add_tag('file.pdf.date.modified', plist[1])
                        elif plist[0] == "/CreationDate":
                            pres.add_tag('file.date.creation', plist[1])
                        elif plist[0] == "/LastModified":
                            pres.add_tag('file.date.last_modified', plist[1])
                        elif plist[0] == "/SourceModified":
                            pres.add_tag('file.pdf.date.source_modified',
                                         plist[1])
                        elif plist[0] == "/pdfx":
                            pres.add_tag('file.pdf.date.pdfx', plist[1])
                entropy = pdfid_result.get("Entropy", None)
                if entropy:
                    enres = ResultSection(title_text="Entropy",
                                          parent=pdfidres)
                    for enlist in entropy:
                        enres.add_line("{0}: {1}, ({2})".format(
                            enlist[0], enlist[1], enlist[2]))
            flags = pdfid_result.get("Flags", None)
            if flags:
                fres = ResultSection(title_text="PDF Keyword Flags",
                                     parent=pdfidres)
                for flist in flags:
                    if flist[0] == "/ObjStm":
                        objstms = True
                    if len(flist) == 3:
                        fres.add_line(
                            "{0}:Count: {1}, Hex-Encoded Count: {2}".format(
                                flist[0], flist[1], flist[2]))
                    else:
                        fres.add_line("{0}:Count: {1}".format(
                            flist[0], flist[1]))
                    fres.add_tag('file.string.extracted',
                                 flist[0].replace("/", "", 1))
                    if flist[0] in additional_keywords:
                        triage_keywords.add(flist[0].replace("/", "", 1))

            plugin = pdfid_result.get("Plugin", [])

            # If any plugin results, or flagged keywords found, run PDF Parser
            if plugin or len(triage_keywords) > 0:
                run_pdfparse = True

            for pllist in plugin:
                pl_name, pl_heur, pl_text = pllist
                pl_heur = int(pl_heur)
                pl_text = pl_text[14:]
                if not pl_text or pl_text == "None":
                    continue

                if pl_name in ['EmbeddedFile', 'Name Obfuscation']:
                    modres = ResultSection(title_text=pl_text, parent=pdfidres)

                    if pl_heur > 0:
                        modres.set_heuristic(pl_heur)

                    if pl_name == 'EmbeddedFile':
                        embed_present = True

                elif pl_name in ['Triage', 'Suspicious Properties']:
                    javascript_found = False
                    for line in pl_text.splitlines():
                        lineres = ResultSection(title_text=line)
                        # Triage results
                        if '/JavaScript' in line:
                            triage_keywords.add('JavaScript')
                            if not javascript_found:
                                lineres.set_heuristic(19)
                                javascript_found = True
                        elif '/JS' in line:
                            triage_keywords.add('JS')
                            if not javascript_found:
                                lineres.set_heuristic(19)
                                javascript_found = True
                        elif '/JBIG2Decode' in line:
                            triage_keywords.add('JBIG2Decode')
                            lineres.set_heuristic(3)
                        elif '/Colors > 2^24' in line:
                            triage_keywords.add('Colors > 2^24')
                            lineres.set_heuristic(20)
                        elif '/AA' in line:
                            triage_keywords.add('AA')
                            lineres.set_heuristic(1)
                        elif '/Launch' in line:
                            triage_keywords.add('Launch')
                            lineres.set_heuristic(1)
                        elif '/OpenAction' in line:
                            triage_keywords.add('OpenAction')
                            lineres.set_heuristic(1)
                        elif '/GoToE' in line:
                            triage_keywords.add('GoToE')
                            lineres.set_heuristic(21)
                        elif '/GoToR' in line:
                            triage_keywords.add('GoToR')
                            lineres.set_heuristic(22)
                        elif '/Encrypt' in line:
                            triage_keywords.add('Encrypt')
                            lineres.set_heuristic(11)
                        elif '/AcroForm' in line:
                            triage_keywords.add('AcroForm')
                            lineres.set_heuristic(4)
                        elif '/RichMedia' in line:
                            triage_keywords.add('RichMedia')
                            lineres.set_heuristic(5)
                        elif '/XFA' in line:
                            triage_keywords.add('XFA')
                            lineres.set_heuristic(23)
                        elif '/Annot' in line:
                            triage_keywords.add('Annot')
                            lineres.set_heuristic(25)
                        elif '/ObjStm' in line:
                            triage_keywords.add('ObjStm')
                            lineres.set_heuristic(7)
                        elif '/URI' in line:
                            triage_keywords.add('URI')
                            lineres.set_heuristic(24)

                        # Suspicious properties results
                        elif "eof2" in line:
                            lineres.set_heuristic(2)
                        elif "eof5" in line:
                            lineres.set_heuristic(17)
                        elif "page" in line:
                            lineres.set_heuristic(26)
                        elif "entropy" in line:
                            lineres.set_heuristic(12)
                        elif "obj/endobj" in line:
                            lineres.set_heuristic(13)
                        elif "stream/endstream" in line:
                            lineres.set_heuristic(14)

                        if lineres.heuristic is not None:
                            pdfidres.add_subsection(lineres)

        for e in errors:
            all_errors.add(e)
            if e.startswith('Error running plugin'):
                self.log.warn(e)

        if run_pdfparse:
            # CALL PDF parser and extract further information
            pdf_parserres = ResultSection(title_text="PDF Parser Results")
            # STATISTICS
            # Do not run for objstms, which are being analyzed when get_malform == False
            if get_malform:
                options = {
                    "stats": True,
                }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                if pdf_parser_result:
                    if len(pdf_parser_result) == 0:
                        pdf_parserres.add_line(
                            "No statistical results generated for file. Please see errors."
                        )
                    else:
                        version = pdf_parser_result.get("version", None)
                        if version and version[0] != '0':
                            pdf_parserres.add_line(version[0])
                        stats = pdf_parser_result.get("stats", None)
                        if stats:
                            sres = ResultSection(
                                title_text="PDF Statistcs",
                                parent=pdf_parserres,
                                body_format=BODY_FORMAT.MEMORY_DUMP)
                            for p in stats:
                                sres.add_line(p)
                    for e in errors:
                        all_errors.add(e)

            # Triage plugin -- search sample for keywords and carve content or extract object (if it contains a stream)
            carved_content = {}  # Format { "objnum": [{keyword: content list}}
            obj_extract_triage = set()
            jbig_objs = set()

            for keyword in triage_keywords:
                # ObjStms handled differently
                if keyword == 'ObjStm':
                    continue

                options = {
                    "search": keyword,
                }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                if pdf_parser_result:
                    for p in pdf_parser_result['parts']:
                        content = ""
                        references = []
                        # Trailer will be extracted anyways, try and grab all references anyways -- will be messy
                        if p.startswith("trailer:"):
                            # Grab the content after the keyword
                            # Check that keyword actually in content
                            if "/{}".format(keyword) in p:
                                try:
                                    content = p.split(keyword, 1)[1].replace(
                                        '>>++>>', '').split("/", 1)[0].strip()
                                    references = re.findall(
                                        "[0-9]* [0-9]* R", content)
                                except Exception:
                                    continue
                        # If not trailer, should be object
                        elif 'Referencing:' in p:
                            # Grab the content after the keyword
                            if '>>++>>' in p:
                                try:
                                    content = p.split(keyword, 1)[1].replace(
                                        '>>++>>', '').strip()
                                except Exception:
                                    try:
                                        content = p.split("\n", 3)[3]
                                    except Exception:
                                        content = p
                            else:
                                try:
                                    content = p.split("\n", 3)[3]
                                except Exception:
                                    content = p
                            # Sometimes the content is the same keyword with references (i.e "/URI /URI 10 0 R"
                            if content.startswith("/{}".format(keyword)):
                                try:
                                    content = re.sub("/{}[ ]*".format(keyword),
                                                     "", content, 1)
                                except Exception:
                                    pass
                            try:
                                references = p.split("\n", 3)[2].replace(
                                    'Referencing:', '').strip().split(", ")
                            except Exception:
                                pass
                        # Only extract JBIG2Decode objects with deep scan, but always report on their presence
                        if keyword == "JBIG2Decode" and "/Filter" in p and "Contains stream" in p:
                            try:
                                objnum = p.split("\n", 1)[0].split(" ")[1]
                                if request.deep_scan:
                                    obj_extract_triage.add(objnum)
                                jbig_objs.add(objnum)
                                continue
                            except Exception as e:
                                self.log.debug(e)
                                continue
                        # If no content, then keyword likely points to reference objects, so grab those
                        if content == '':
                            if len(references) > 0:
                                content = references
                            else:
                                # Something is wrong, drop it.
                                continue
                        else:
                            while True:
                                # Multiple references might be in a list, i.e. /Annot # # R vs. /Annots [# # R # # R]
                                islist = re.match(
                                    r"[s]?[ ]?\[([0-9]* [0-9]* R[ \\rn]{0,8})*\]",
                                    content)
                                if islist:
                                    content = re.sub(
                                        r"[\[\]]", "",
                                        islist.group(0).replace(
                                            "s ", '').replace("R ",
                                                              "R,")).split(",")
                                    break
                                # References might be with instructions, i.e. [# # R /FitH null]
                                withinst = re.match(
                                    r"[s]?[ \\']{0,3}\[[ ]?([0-9]* [0-9]* R)[ \\rn]{1,8}"
                                    r"[/a-zA-Z0-9 ]*[ ]?\]", content)
                                if withinst:
                                    content = [withinst.group(1)]
                                    break
                                content = [content]
                                break
                        for c in content:
                            # If keyword = Javascript and content starts with '/JS', disregard as 'JS' will be extracted
                            if "JS" in triage_keywords and keyword == "JavaScript" and "/JS" in c[
                                    0:5]:
                                continue
                            if c in references or re.match(
                                    "[0-9]* [0-9]* R", c):
                                try:
                                    ref_obj = c.split(" ", 1)[0]
                                    options = {
                                        "object": ref_obj,
                                        "get_object_detail": True
                                    }
                                    pdf_parser_subresult, err = self.get_pdf_parser(
                                        path, working_dir, options)

                                    if pdf_parser_subresult:
                                        for sub_p in pdf_parser_subresult[
                                                'parts']:
                                            sub_references = sub_p.split("\n", 3)[2].replace('Referencing:', '')\
                                                .strip().split(", ")
                                            ptyp = sub_p.split(
                                                "\n", 2)[1].replace(
                                                    'Type:',
                                                    '').strip().replace(
                                                        "/", "")
                                            # If the object contains a stream, extract the object.
                                            if "Contains stream" in sub_p:
                                                try:
                                                    objnum = sub_p.split(
                                                        "\n",
                                                        1)[0].split(" ")[1]
                                                    obj_extract_triage.add(
                                                        objnum)
                                                except Exception:
                                                    pass
                                            # Or if the object Type is the keyword, grab all referenced objects.
                                            elif sub_references[0] != '' and len(sub_references) >= 1 \
                                                    and ptyp == keyword:
                                                for sr in sub_references:
                                                    try:
                                                        objnum = sr.split(
                                                            " ", 1)[0]
                                                        obj_extract_triage.add(
                                                            objnum)
                                                    except Exception:
                                                        pass
                                            # If not, extract object detail in to carved output
                                            elif pdf_parser_subresult[
                                                    'obj_details'] != "":
                                                try:
                                                    objnum = sub_p.split(
                                                        "\n",
                                                        1)[0].split(" ")[1]
                                                    if objnum in carved_content:
                                                        carved_content[objnum]\
                                                            .append({keyword: pdf_parser_subresult['obj_details']})
                                                    else:
                                                        carved_content[objnum] = \
                                                            [{keyword: pdf_parser_subresult['obj_details']}]
                                                except Exception:
                                                    continue

                                    for e in err:
                                        errors.add(e)
                                except Exception:
                                    # If none of that work, just extract the original object for examination.
                                    try:
                                        objnum = p.split("\n",
                                                         1)[0].split(" ")[1]
                                        obj_extract_triage.add(objnum)
                                    except Exception:
                                        pass
                            # If content does not look like a reference:
                            else:
                                if p.startswith("trailer:"):
                                    continue
                                objnum = p.split("\n", 1)[0].split(" ")[1]
                                # If the object contains a stream extract the object
                                if p.split("\n", 4)[3] == "Contains stream":
                                    obj_extract_triage.add(objnum)
                                else:
                                    # Or just carve the content
                                    if objnum in carved_content:
                                        carved_content[objnum].append(
                                            {keyword: c})
                                    else:
                                        carved_content[objnum] = [{keyword: c}]

                    for e in errors:
                        all_errors.add(e)

            # Add carved content to result output
            show_content_of_interest = False
            if len(carved_content) > 0 or len(jbig_objs) > 0:
                carres = ResultSection(title_text="Content of Interest")
            else:
                carres = None

            if len(jbig_objs) > 0:
                jbigres = ResultSection(
                    title_text=
                    "The following Object IDs are JBIG2DECODE streams:",
                    body_format=BODY_FORMAT.MEMORY_DUMP,
                    parent=carres)
                jbigres.add_line(', '.join(map(str, jbig_objs)))
                show_content_of_interest = True

            if len(carved_content) > 0:
                for k, l in sorted(carved_content.items()):
                    for d in l:
                        for keyw, con in d.items():
                            subres = ResultSection(
                                title_text="Object {0}: Hits for Keyword '{1}':"
                                .format(k, keyw))
                            subres.set_heuristic(8)

                            con_bytes = con.encode()
                            if len(con) < 500:
                                subres.body_format = BODY_FORMAT.MEMORY_DUMP
                                subres.add_line(con)

                                # Check for IOC content
                                patterns = PatternMatch()
                                st_value = patterns.ioc_match(con_bytes,
                                                              bogon_ip=True)
                                if len(st_value) > 0:
                                    carres.add_subsection(subres)
                                    show_content_of_interest = True
                                    for ty, val in st_value.items():
                                        if val == "":
                                            asc_asc = unicodedata.normalize(
                                                'NFKC',
                                                val).encode('ascii', 'ignore')
                                            subres.add_tag(ty, asc_asc)
                                        else:
                                            ulis = list(set(val))
                                            for v in ulis:
                                                subres.add_tag(ty, v)
                            else:
                                crv_sha = hashlib.sha256(con_bytes).hexdigest()

                                if crv_sha not in carved_extracted_shas:
                                    f_name = "carved_content_obj_{}_{}".format(
                                        k, crv_sha[0:7])
                                    subres.add_lines([
                                        "Content over 500 bytes it will be extracted for analysis",
                                        "Name: {} - SHA256: {}".format(
                                            f_name, crv_sha)
                                    ])
                                    carres.add_subsection(subres)
                                    show_content_of_interest = True
                                    crvf = os.path.join(
                                        self.working_directory, f_name)
                                    with open(crvf, 'wb') as f:
                                        f.write(con_bytes)
                                    request.add_extracted(
                                        crvf, os.path.basename(crvf),
                                        "Extracted content from object {}".
                                        format(k))
                                    carved_extracted_shas.add(crv_sha)

            if show_content_of_interest:
                pdf_parserres.add_subsection(carres)

            # ELEMENTS
            # Do not show for objstms
            if get_malform:
                if request.deep_scan:
                    options = {
                        "verbose": True,
                        "nocanonicalizedoutput": True,
                        "get_malform": get_malform
                    }
                elif embed_present:
                    options = {
                        "verbose": True,
                        "elements": "ctsi",
                        "type": "/EmbeddedFile",
                        "get_malform": get_malform
                    }
                else:
                    options = {
                        "verbose": True,
                        "elements": "cst",
                        "get_malform": get_malform
                    }
                pdf_parser_result, errors = self.get_pdf_parser(
                    path, working_dir, options)

                embed_extracted = set()
                if pdf_parser_result:
                    if len(pdf_parser_result) == 0:
                        pdf_parserres.add_line(
                            "No structure information generated for file. Please see errors."
                        )
                    else:
                        # PDF Parser will write any malformed content over 100 bytes to a file
                        files = pdf_parser_result.get("files", None)
                        if files:
                            for f, l in files.items():
                                if f == 'malformed':
                                    if len(l) > 0:
                                        pdf_parserres.set_heuristic(6)
                                    for i in l:
                                        request.add_extracted(
                                            i, os.path.basename(i),
                                            "Extracted malformed content in PDF Parser Analysis."
                                        )

                        parts = pdf_parser_result.get("parts", None)
                        # Extract service will extract the sample's embedded files.
                        # However we want to make note of them so that they are not extracted again below
                        if parts:
                            for p in sorted(parts):
                                if "Type: /EmbeddedFile" in p:
                                    getobj = p.split("\n", 1)[0].split(" ")[1]
                                    embed_extracted.add(getobj)

                # Extract objects collected from above analysis
                obj_to_extract = obj_extract_triage - embed_extracted - jbig_objs

                if len(obj_to_extract) > 0:
                    options = {
                        "filter": True,
                        "object": obj_to_extract,
                        "dump": "extracted_obj_",
                    }
                    pdf_parser_result, errors = self.get_pdf_parser(
                        path, working_dir, options)

                    if pdf_parser_result:
                        files = pdf_parser_result.get("files", None)
                        extracted_files = []
                        if files:
                            for f, l in files.items():
                                if f == 'embedded':
                                    for i in l:
                                        f_name = os.path.basename(i)
                                        obj_id = f_name.replace(
                                            "extracted_obj_", "")
                                        extracted_files.append(
                                            "Extracted object {} as {}".format(
                                                obj_id, f_name))
                                        request.add_extracted(
                                            i, f_name,
                                            "Object {} extracted in PDF Parser Analysis."
                                            .format(obj_id))
                        for e in errors:
                            all_errors.add(e)

                        if extracted_files:
                            extract_res = ResultSection(
                                title_text="Extracted embedded objects",
                                parent=pdf_parserres)
                            extract_res.set_heuristic(9)
                            extract_res.add_lines(extracted_files)

                # Extract jbig2decode objects in deep scan mode
                if request.deep_scan and len(jbig_objs) > 0:
                    options = {
                        "object": jbig_objs,
                        "dump": "extracted_jb_obj_",
                    }
                    pdf_parser_result, errors = self.get_pdf_parser(
                        path, working_dir, options)

                    if pdf_parser_result:
                        extracted_jb = []
                        files = pdf_parser_result.get("files", None)
                        if files:
                            for f, l in files.items():
                                if f == 'embedded':
                                    for i in l:
                                        f_name = os.path.basename(i)
                                        obj_id = f_name.replace(
                                            "extracted_jb_obj_", "")
                                        extracted_jb.append(
                                            "JBIG2DECODE object {} extracted as {}"
                                            .format(obj_id, f_name))
                                        request.add_extracted(
                                            i, f_name,
                                            "JBIG2DECODE object {} extracted in PDF Parser Analysis."
                                            .format(obj_id))

                        for e in errors:
                            all_errors.add(e)

                        if extracted_jb:
                            jbig_extract_res = ResultSection(
                                title_text="Extracted JBIG2Decode objects",
                                parent=pdf_parserres)
                            jbig_extract_res.set_heuristic(9)
                            jbig_extract_res.add_lines(extracted_jb)

            if len(pdf_parserres.subsections) > 0:
                res.add_subsection(pdf_parserres)

        return res, objstms, all_errors
Example #14
0
    def LSB_chisquare(self):
        pixels = self.binary_pixels

        x_points = []
        y_points = []

        # Use image if not in AL
        if self.request is None:
            plt.switch_backend('agg')
            plt.axis([0, self.pixel_count / 8, -0.1, 1.1])
            plt.title('Chi Square Test')
            plt.grid(True)

        index = 0
        success = False

        try:
            # If greyscale, only one set of pixels to process
            if self.channels_to_process == 1:
                while len(pixels) != 0:
                    self.log.debug(len(pixels))
                    # In bytes
                    x_location = (self.chunk *
                                  self.channels_to_process) * index / 8
                    x_points.append(x_location)

                    obs_pixel_set = []
                    exp_pixel_set = []
                    # Let's grab some PoVs!!! Yay!!!
                    for i in range(0, 255, 2):
                        # Get counts
                        v1 = pixels[:self.chunk].count(
                            str('{0:08b}').format(i))
                        v2 = pixels[:self.chunk].count(
                            str('{0:08b}').format(i + 1))
                        # Add observed values
                        if v1 == 0 and v2 == 0:
                            continue
                        obs_pixel_set.append(v1)
                        obs_pixel_set.append(v2)
                        # Calculate expected values of pairs
                        expected = float((v1 + v2) * 0.5)
                        exp_pixel_set.extend([expected] * 2)

                    if len(obs_pixel_set) == 0:
                        y_points.append(0)
                    else:
                        y_points.append(
                            round(
                                chisquare(np.array(obs_pixel_set),
                                          f_exp=np.array(exp_pixel_set))[1],
                                4))

            else:
                # If not greyscale, test each colour channel separately per chunk and then average
                while len(pixels) != 0:
                    x_location = (self.chunk *
                                  self.channels_to_process) * index / 8
                    x_points.append(x_location)

                    # Grab channel (i.e. R,G,B) pixels
                    colours = self.get_colours(pixels[:self.chunk])
                    counts = []
                    lsb_counts = []

                    for c, pixels_flat in iter(colours.items()):
                        obs_pixel_set = []
                        exp_pixel_set = []
                        # Let's grab some PoVs!!! Yay!!!
                        for i in range(0, 255, 2):
                            # Get counts
                            v1 = pixels_flat[:self.chunk].count(
                                str('{0:08b}').format(i))
                            v2 = pixels_flat[:self.chunk].count(
                                str('{0:08b}').format(i + 1))
                            # Add observed values
                            if v1 == 0 and v2 == 0:
                                continue
                            obs_pixel_set.append(v1)
                            obs_pixel_set.append(v2)
                            # Calculate expected values of pairs
                            expected = float((v1 + v2) * 0.5)
                            exp_pixel_set.extend([expected] * 2)

                        if len(obs_pixel_set) == 0:
                            counts.append(0)
                            if self.request is None:
                                plt.scatter(x_location,
                                            0,
                                            color=c,
                                            marker='^',
                                            s=50)

                        else:
                            chi = round(
                                chisquare(np.array(obs_pixel_set),
                                          f_exp=np.array(exp_pixel_set))[1], 6)
                            counts.append(chi)
                            if self.request is None:
                                plt.scatter(x_location,
                                            chi,
                                            color=c,
                                            marker='^',
                                            s=50)
                        # Additionally, collect the LSBs for additional randomness testing.
                        # Idea from http://guillermito2.net/stegano/tools/
                        lsb = []
                        for pbyte in pixels_flat:
                            lsb.append(float(pbyte[-1]))
                        lsb_avg_value = float(round(sum(lsb) / len(lsb), 1))
                        if self.request is None:
                            plt.scatter(x_location,
                                        lsb_avg_value,
                                        color='k',
                                        marker='.',
                                        s=10)
                        lsb_counts.append(lsb_avg_value)

                    # Average significance counts for the colours and round two 2 decimals
                    y_points.append(
                        round(sum(counts) / self.channels_to_process, 2))

                    index += 1
                    pixels = pixels[self.chunk:]
                    success = True
        except:
            success = False

        if success:
            if self.request is None:
                plt.plot(x_points, y_points, 'm--', linewidth=1.0)
                lsb_chi_path = path.join(self.working_directory,
                                         "LSB_chiqquare_attack.png")
                plt.savefig(lsb_chi_path, bbox_inches='tight')
                plt.show()
            else:
                chi_graph_data = {
                    'type': 'colormap',
                    'data': {
                        'domain': [0, 100],
                        'values': [y * 100 for y in y_points]
                    }
                }

                chires = ResultSection('LSB Chi Square Analysis.\t')

                chires.add_subsection(
                    ResultSection('Colour Map.'
                                  '0==Not random, '
                                  '100==Random'.format(self.chunk_bytes),
                                  body_format=BODY_FORMAT.GRAPH_DATA,
                                  body=json.dumps(chi_graph_data)))

                pval_res = self.detect_sig_changes(y_points)
                if pval_res:
                    chires.add_subsection(pval_res)
                self.working_result.add_subsection(chires)

        return
Example #15
0
    def execute(self, request):
        """Main Module. See README for details."""
        request.result = Result()
        self.result = request.result
        wrk_dir = self.working_directory
        ipa_path = request.file_path
        self.known_keys = None
        self.reported_keys = {}

        # Determine if PK container has IPA content to parse
        try:
            ipa_file = zipfile.ZipFile(ipa_path)
        except zipfile.BadZipfile:
            # Return if files cannot be extracted
            return
        # isipa returns False if Info.plist not found, or returns Info.plist path
        name_list, isipa = self.isipa(ipa_file)

        if not isipa:
            return

        # Extract Files of interest using 7zip (some files can be AES encrypted which standard zipfile library does not
        # support)
        extract_success = False
        try:
            self.extract_archive(ipa_path)
            extract_success = True
        except Exception as e:
            self.log.error(f"Could not extract IPA file due to 7zip error {e}")

        if not extract_success:
            return

        with open(os.path.join(os.path.dirname(__file__), "keys.json"), 'r') as f:
            keys_dict = json.load(f)
            self.known_keys = keys_dict['glossary']

        patterns = PatternMatch()

        # Info.plist
        main_exe = None
        res = ResultSection("Info.plist")
        info_plist_path = os.path.join(wrk_dir, isipa)

        isempty, plist_dict = self.gen_plist_extract(info_plist_path, patterns)

        if plist_dict is None:
            res.add_line("Info.plist in sample cannot be parsed. Sample may be corrupt.")

        elif isempty:
            res.add_line("Empty Info.plist file. Archive contents may be encrypted.")

        else:
            # Grab the main executable name
            if plist_dict.get("CFBundleExecutable", None):
                i = plist_dict["CFBundleExecutable"]
                try:
                    main_exe = (i, f"Name of bundle's main executable file: {i}")
                    res.add_line(main_exe[1])
                except UnicodeEncodeError:
                    i = i.encode('utf8', 'replace')
                    main_exe = (i, f"Name of bundle's main executable file: {i}")
                    res.add_line(main_exe[1])

            iden_key_res, unk_key_res = self.parse_plist(plist_dict)
            if iden_key_res:
                res.add_subsection(iden_key_res)
            if unk_key_res:
                res.add_subsection(unk_key_res)
            request.result.add_section(res)

        # PkgInfo file
        pkg_types = {
            'APPL': 'application',
            'FMWK': 'frameworks',
            'BNDL': 'loadable bundle'
        }
        pattern = re.compile(r'Payload/[^/]*.app/PkgInfo')
        for fn in name_list:
            m = pattern.match(fn)
            if m is not None:
                res = ResultSection("PkgInfo Details")
                pkg_info_path = os.path.join(wrk_dir, m.group())
                with open(pkg_info_path, 'r') as f:
                    pkg_info = f.read()
                if pkg_info == "":
                    res.add_line("Empty PkgInfo file. Archive contents may be encrypted.")
                elif len(pkg_info) == 8:
                    # noinspection PyBroadException
                    try:
                        pkgtype = pkg_info[0:4]
                        if pkgtype in pkg_types:
                            pkgtype = pkg_types[pkgtype]
                        creator_code = pkg_info[4:]
                        res = ResultSection("PkgInfo Details")
                        res.add_line(f"Package Type: {pkgtype}; Application Signature: {creator_code}")
                    except Exception:
                        continue
                request.result.add_section(res)

        if main_exe:
            main_exe_reg = (rf'.*{main_exe[0]}$', f"Main executable file {main_exe[0]}")
        else:
            main_exe_reg = ('$', 'Place holder for missing main executable name.')

        fextract_regs = [
            main_exe_reg,
            (r'Payload.*\.(?:crt|cer|der|key|p12|p7b|p7c|pem|pfx)$', "Certificate or key file"),
            (r'Payload.*libswift[^\/]\.dylib$', "Swift code library files"),
            (r'Payload\/META-INF\/.*ZipMetadata.plist$', "IPA archive content info"),
            (r'Payload.*mobileprovision$', "Provisioning profile for limiting app uploads"),
            (r'.*plist$', "Plist information file"),
        ]

        empty_file_msg = "Empty file. Archive contents may be encrypted."
        int_files = {}
        plist_res = ResultSection("Other Plist File Information (displaying new key-value pairs only)")
        for root, dirs, files in os.walk(wrk_dir):
            for name in files:
                full_path = safe_str(os.path.join(root, name))
                if os.path.getsize(full_path) == 0:
                    if int_files.get(empty_file_msg, None):
                        int_files[empty_file_msg].append(full_path)
                    else:
                        int_files[empty_file_msg] = []
                        int_files[empty_file_msg].append(full_path)
                else:
                    for p, desc in fextract_regs:
                        pattern = re.compile(p)
                        m = pattern.match(full_path)
                        if m is not None:
                            # Already identify main executable file above
                            if not desc.startswith("Main executable file "):
                                if desc.startswith("Plist"):
                                    pres = ResultSection(f"{full_path.replace(wrk_dir, '')}")
                                    isempty, plist_parsed = self.gen_plist_extract(full_path, patterns)
                                    if not isempty and plist_parsed:
                                        iden_key_res, unk_key_res = self.parse_plist(plist_parsed)
                                        # If all keys have already been reported, skip this plist
                                        if not iden_key_res and not unk_key_res:
                                            continue
                                        if iden_key_res:
                                            pres.add_subsection(iden_key_res)
                                        if unk_key_res:
                                            pres.add_subsection(unk_key_res)
                                        plist_res.add_subsection(pres)
                                elif int_files.get(desc, None):
                                    int_files[desc].append(full_path)
                                else:
                                    int_files[desc] = []
                                    int_files[desc].append(full_path)
                            break

        if len(plist_res.subsections) > 0:
            request.result.add_section(plist_res)

        if len(int_files) > 0:
            intf_sec = ResultSection("Files of interest", parent=res)
            for intf_d, intf_p in int_files.items():
                intf_subsec = ResultSection(intf_d, parent=intf_sec)
                for f in intf_p:
                    intf_subsec.add_line(f.replace(f"{wrk_dir}/", ""))
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                                  '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                                  'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id("T1066")
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2)
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA,
                                              body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL,
                                        body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"}))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            ip2 = get_random_ip()
            ip3 = get_random_ip()
            urls = [
                {"url": f"https://{host1}/"},
                {"url": f"https://{host2}/"},
                {"url": f"https://{ip1}/"},
                {"url": f"https://{ip2}/"},
                {"url": f"https://{ip3}/"}]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(urls))

            url_sub_section = ResultSection('Example of a url section with multiple links',
                                            body=json.dumps(urls), body_format=BODY_FORMAT.URL,
                                            heuristic=url_heuristic)
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.ip", ip2)
            url_sub_section.add_tag("network.static.ip", ip3)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(b"This is some random text that we will format as an hexdump and you'll see "
                           b"that the hexdump formatting will be preserved by the memory dump section!")
            memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP,
                                            body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [
                        {"d1_key": "val", "d1_key2": "val2"},
                        {"d2_key": "val", "d2_key2": "val2"}
                    ],
                    "bool": True
                }
            }
            json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "children": [] NB: This list either is empty or contains more dictionaries that have the same
            #                          structure
            #     }
            nc_body = [
                {
                    "process_pid": 123,
                    "process_name": "evil.exe",
                    "command_line": "C:\\evil.exe",
                    "signatures": {},
                    "children": [
                        {
                            "process_pid": 321,
                            "process_name": "takeovercomputer.exe",
                            "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff",
                            "signatures": {"one":250},
                            "children": [
                                {
                                    "process_pid": 456,
                                    "process_name": "evenworsethanbefore.exe",
                                    "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                                    "signatures": {"one":10, "two":10, "three":10},
                                    "children": []
                                },
                                {
                                    "process_pid": 234,
                                    "process_name": "badfile.exe",
                                    "command_line": "C:\\badfile.exe -k nothing_to_see_here",
                                    "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10},
                                    "children": []
                                }
                            ]
                        },
                        {
                            "process_pid": 345,
                            "process_name": "benignexe.exe",
                            "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                            "signatures": {"one": 2000},
                            "children": []
                        }
                    ]
                },
                {
                    "process_pid": 987,
                    "process_name": "runzeroday.exe",
                    "command_line": "C:\\runzeroday.exe -f insert_bad_spelling",
                    "signatures": {},
                    "children": []
                }
            ]
            nc_section = ResultSection('Example of a PROCESS_TREE section',
                                       body_format=BODY_FORMAT.PROCESS_TREE,
                                       body=json.dumps(nc_body))
            result.add_section(nc_section)
            
            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_body = [
                {
                    "a_str": "Some string1",
                    "extra_column_here": "confirmed",
                    "a_bool": False,
                    "an_int": 101,
                },
                {
                    "a_str": "Some string2",
                    "a_bool": True,
                    "an_int": 102,
                },
                {
                    "a_str": "Some string3",
                    "a_bool": False,
                    "an_int": 103,
                },
                {
                    "a_str": "Some string4",
                    "a_bool": None,
                    "an_int": -1000000000000000000,
                    "extra_column_there": "confirmed",
                    "nested_table": {
                        "a_str": "Some string3",
                        "a_bool": False,
                        "nested_table_thats_too_deep": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "an_int": 103,
                        },
                    },
                },
            ]
            table_section = ResultSection('Example of a TABLE section',
                                          body_format=BODY_FORMAT.TABLE,
                                          body=json.dumps(table_body))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt", "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__"+data.encode())
            request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
Example #17
0
    def LSB_averages(self):
        # Additionally, collect the LSBs for additional randomness testing.
        # Idea from http://guillermito2.net/stegano/tools/
        # Right now only supports AL
        if not self.request:
            return

        pixels = self.binary_pixels
        lsb_points = []
        success = False

        try:
            # If greyscale, only one set of pixels to process
            if self.channels_to_process == 1:
                while len(pixels) != 0:
                    lsb = []
                    for pbyte in pixels:
                        lsb.append(float(pbyte[-1]))
                    lsb_avg_value = round(sum(lsb) / len(lsb), 1)
                    lsb_points.append(lsb_avg_value)
                    pixels = pixels[self.chunk:]
                    success = True

            else:
                lsb_points_channels = {}
                # If not greyscale, test each colour channel separately per chunk and then average
                while len(pixels) != 0:
                    # Grab channel (i.e. R,G,B) pixels
                    colours = self.get_colours(pixels[:self.chunk])
                    lsb_counts = []

                    for c, pixels_flat in iter(colours.items()):
                        lsb = []
                        for pbyte in pixels_flat:
                            lsb.append(float(pbyte[-1]))
                        lsb_avg_value = float(round(sum(lsb) / len(lsb), 1))
                        lsb_counts.append(lsb_avg_value)
                        if lsb_points_channels.get(c, None):
                            lsb_points_channels[c].append(lsb_avg_value)
                        else:
                            lsb_points_channels[c] = []
                            lsb_points_channels[c].append(lsb_avg_value)

                    # Average lsb counts for the colours and round two 2 decimals
                    lsb_points.append(
                        round(sum(lsb_counts) / self.channels_to_process, 2))

                    pixels = pixels[self.chunk:]
                    success = True
        except:
            success = False

        if success:
            lsb_graph_data = {
                'type': 'colormap',
                'data': {
                    'domain': [0, 100],
                    'values': [y * 100 for y in lsb_points]
                }
            }

            lsbres = ResultSection('LSB Average Value Analysis.\t')

            lsbres.add_subsection(
                ResultSection('Overall'
                              'Closer to 0.5==Random, '
                              'Closer to 0/100==Not Random.'.format(
                                  self.chunk_bytes),
                              body_format=BODY_FORMAT.GRAPH_DATA,
                              body=json.dumps(lsb_graph_data)))

            pval_res = self.detect_sig_changes(lsb_points, thr_counter=0.80)
            if pval_res:
                lsbres.add_subsection(pval_res)

            self.working_result.add_subsection(lsbres)

        return
    def parse_results(self, response: Dict[str, Any]) -> Result:
        """
        This method parses the response JSON containing the scan results so that it will be displayed nicely in
        Assemblyline
        :param response: The raw results from the MetaDefender scan
        :return: The Result object to be used when displaying in Assemblyline
        """
        res = Result()
        scan_results = response.get('scan_results', response)
        virus_name = ""
        process_results = response.get('process_info', response)
        hit = False
        fail = False
        processed = {}
        if scan_results is not None and scan_results.get(
                'progress_percentage') == 100:
            no_threat_detected = []
            av_hits = ResultSection('AV Detections as Infected or Suspicious')
            av_fails = ResultSection('Failed to Scan or No Threats Detected')

            scans = scan_results.get('scan_details', scan_results)
            av_scan_times = []
            modified_scans = {
                key: value
                for key, value in scans.items()
                if key not in ["progress_percentage"]
            }
            for majorkey, subdict in sorted(modified_scans.items()):
                if majorkey in self.blocklist:
                    continue
                heur_id = None
                if subdict['scan_result_i'] == 1:  # File is infected
                    virus_name = subdict['threat_found']
                    if virus_name:
                        heur_id = 1
                elif subdict['scan_result_i'] == 2:  # File is suspicious
                    virus_name = subdict['threat_found']
                    if virus_name:
                        heur_id = 2
                elif subdict['scan_result_i'] == 10 or subdict[
                        'scan_result_i'] == 3:  # File was not scanned or failed
                    # noinspection PyBroadException
                    try:
                        engine = self.nodes[self.current_node]['engine_map'][
                            self._format_engine_name(majorkey)]
                    except Exception:
                        engine = None
                    fail = True
                    av_fails.add_subsection(AvErrorSection(majorkey, engine))
                elif subdict['scan_result_i'] == 0:  # No threat detected
                    no_threat_detected.append(majorkey)
                    fail = True

                if heur_id is not None:
                    virus_name = virus_name.replace("a variant of ", "")
                    engine = self.nodes[self.current_node]['engine_map'][
                        self._format_engine_name(majorkey)]
                    av_hit_section = AvHitSection(majorkey, virus_name, engine,
                                                  heur_id,
                                                  self.sig_score_revision_map,
                                                  self.kw_score_revision_map,
                                                  self.safelist_match)
                    av_hits.add_subsection(av_hit_section)
                    hit = True

                av_scan_times.append(self._format_engine_name(majorkey))
                av_scan_times.append(subdict['scan_time'])

            if hit:
                res.add_section(av_hits)

            # Only creat a result section for "No Threat Detected" if there was at least one hit
            if hit and fail:
                if no_threat_detected:
                    ResultSection(
                        "No Threat Detected by AV Engine(s)",
                        body_format=BODY_FORMAT.KEY_VALUE,
                        body=json.dumps(
                            dict(no_threat_detected=no_threat_detected)),
                        parent=av_fails)

                res.add_section(av_fails)

            file_size = response['file_info']['file_size']
            queue_time = response['process_info']['queue_time']
            processing_time = response['process_info']['processing_time']
            self.log.info(
                f"File successfully scanned by node ({self.current_node}). File size: {file_size} B."
                f"Queue time: {queue_time} ms. Processing time: {processing_time} ms. "
                f"AV scan times: {str(av_scan_times)}")

            # Add the queue time to a list, which will be later used to calculate average queue time
            self.nodes[self.current_node]['queue_times'].append(queue_time)
            self.nodes[self.current_node]['file_count'] += 1
        if process_results is not None and process_results.get(
                'progress_percentage') == 100:
            hit = False
            fail = False
            processed = process_results.get('post_processing', process_results)
            if processed['actions_failed']:
                fail = True
            elif processed['actions_ran']:
                hit = True
        # add cdr json extracted
        if hit:
            cdr_json_section = ResultSection('CDR Successfully Executed',
                                             body_format=BODY_FORMAT.JSON,
                                             body=json.dumps(processed))
            res.add_section(cdr_json_section)
        if fail:
            cdr_fails = ResultSection('CDR Failed or No Malicious Files Found')
            res.add_section(cdr_fails)

        return res
Example #19
0
    def _handle_subanalyses(self, request: ServiceRequest, sha256: str,
                            analysis_id: str, file_verdict_map: Dict[str, str],
                            parent_section: ResultSection) -> None:
        """
        This method handles the subanalyses for a given analysis ID
        :param request: The service request object
        :param sha256: The hash of the given file
        :param analysis_id: The ID for the analysis which we will be retrieving
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_result_section: The result section that the network
        result section will be added to, if applicable
        :return: None
        """
        so = SandboxOntology()

        # This boolean is used to determine if we should try to download another file
        can_we_download_files = True

        # These sets will be used as we work through the process trees
        process_path_set = set()
        command_line_set = set()

        # Now let's get into the subanalyses for this sample
        sub_analyses = self.client.get_sub_analyses_by_id(analysis_id)

        for sub in sub_analyses:
            sub_analysis_id = sub["sub_analysis_id"]

            # Get the extraction info, which is basically the details of how the subanalysis object came to be
            extraction_info = sub.pop("extraction_info", None)

            # Processes is only present when the sample has undergone dynamic execution
            if extraction_info and "processes" not in extraction_info:
                extraction_info = None

            code_reuse = self.client.get_sub_analysis_code_reuse_by_id(
                analysis_id, sub_analysis_id)

            if code_reuse:
                families = code_reuse.pop("families", [])
            else:
                families = []

            if not families and not extraction_info:
                # Otherwise, boring!
                continue

            if families and not any(family["reused_gene_count"] > 1
                                    for family in families):
                # Most likely a false positive
                continue

            ###
            # If we have gotten to this point, then the sub analysis is worth reporting
            ###

            extraction_method = sub["source"].replace("_", " ")

            if extraction_method != "root":
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}, extracted via {extraction_method}"
                )
            else:
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}")

            metadata = self.client.get_sub_analysis_metadata_by_id(
                analysis_id, sub_analysis_id)
            processed_subanalysis = self._process_details(
                metadata.copy(), UNINTERESTING_SUBANALYSIS_KEYS)
            sub_kv_section.update_items(processed_subanalysis)
            parent_section.add_subsection(sub_kv_section)

            if code_reuse:
                code_reuse_kv_section = ResultKeyValueSection(
                    "Code reuse detected")
                code_reuse_kv_section.update_items(code_reuse)
                sub_kv_section.add_subsection(code_reuse_kv_section)

            sub_sha256 = sub["sha256"]
            if families:
                self._process_families(families, sub_sha256, file_verdict_map,
                                       sub_kv_section)

            if extraction_info:
                self._process_extraction_info(extraction_info["processes"],
                                              process_path_set,
                                              command_line_set, so)

            # Setting a heuristic here or downloading the file would be redundant if the hash matched the original file
            if sub_sha256 != sha256:
                self._set_heuristic_by_verdict(
                    sub_kv_section, file_verdict_map.get(sub_sha256))

                if can_we_download_files:
                    file_was_downloaded = self.client.download_file_by_sha256(
                        sub_sha256, self.working_directory)
                    if file_was_downloaded:
                        path = f"{self.working_directory}/{sub_sha256}.sample"
                        request.add_extracted(
                            path,
                            f"{sub_sha256}.sample",
                            f"Extracted via {extraction_method}",
                        )
                        self.log.debug(
                            f"Added {sub_sha256}.sample as an extracted file.")
                    else:
                        can_we_download_files = False

        process_tree_section = so.get_process_tree_result_section()
        for process_path in process_path_set:
            process_tree_section.add_tag("dynamic.process.file_name",
                                         process_path)
        for command_line in command_line_set:
            process_tree_section.add_tag("dynamic.process.command_line",
                                         command_line)
        if process_tree_section.body:
            parent_section.add_subsection(process_tree_section)
Example #20
0
    def dump_property(self, field, path, index, res, parent_res, is_orphan):
        if field['name'].value != '':
            name = field['name'].display[1:-1]
            p_type = field['type'].value

            if path[-1:] == '\\':
                abs_name = f"{path}{name}"
            else:
                abs_name = f"{path}\\{name}"

            prop_res = ResultSection(f"Property: {abs_name}",
                                     body_format=BODY_FORMAT.KEY_VALUE,
                                     body={})

            # if type is not: 1- storage, 2- stream an not 5- root, that is weird.
            if p_type != 1 and p_type != 2 and p_type != 5:
                self.invalid_properties_count += 1

            # for properties not storage (which should be seen like a folder)
            if p_type != 1:
                size = field['size'].value
            else:
                size = 0

            address = 0
            if size > 0:
                if field['size'].value < self.ole2parser[
                        'header/threshold'].value and index != '0':
                    # we first get the offset from the short block but then we need
                    # to map it back to the file, which is from root[X].
                    offset = field['start'].value * self.ole2parser.ss_size
                    keep_looping = True
                    root_index = 0
                    while keep_looping:
                        try:
                            current_root = self.ole2parser[
                                f"root[{root_index}]"]

                            if offset == 0 or current_root.size > offset:
                                address = current_root.address + offset
                                keep_looping = False
                            else:
                                offset -= current_root.size
                                root_index += 1

                        except MissingField:
                            keep_looping = False
                            address = None
                            if not is_orphan:
                                self.invalid_streams.append(
                                    field['name'].display)
                else:
                    address = HEADER_SIZE + field[
                        'start'].value * self.ole2parser.sector_size
            else:
                address = 0

            if address >= 0:
                prop_res.body['property_meta'] = \
                    f"offset: {hex(address // 8)} size: {hex(size)} / {field['type'].display} / " \
                    f"{field['decorator'].display} / id={index} left={field['left'].display} " \
                    f"right={field['right'].display} child={field['child'].display}"
            else:
                prop_res.body['property_meta'] = \
                    f"offset: could not map.. size: {hex(size)} / {field['type'].display} / " \
                    f"{field['decorator'].display} / id={index} left={field['left'].display} " \
                    f"right={field['right'].display} child={field['child'].display}"

            # for root or storage
            if p_type == 5 or p_type == 1:
                if field[
                        'clsid'].display != "Null GUID: 00000000-0000-0000-0000-000000000000":
                    clsid_desc = self.GUID_DESC.get(field['clsid'].display,
                                                    "unknown clsid")
                    prop_res.body[
                        "clsid"] = f"{field['clsid'].display} ({clsid_desc})"
                    prop_res.add_tag('file.ole.clsid', field['clsid'].display)
                if field['creation'].display != "1601-01-01 00:00:00":
                    prop_res.body["creation_date"] = field['creation'].display
                    prop_res.add_tag('file.date.creation',
                                     field['creation'].display)
                if field['lastmod'].display != "1601-01-01 00:00:00":
                    prop_res.body["last_modified_date"] = field[
                        'lastmod'].display
                    prop_res.add_tag('file.date.last_modified',
                                     field['lastmod'].display)

            # fixes up a bug:
            if name == '\\1CompObj':
                if p_type != 2:
                    res_error = ResultSection(
                        f"\\1CompObj type is '{p_type}' and it should be 2 (stream) "
                        f"... really suspicious.")
                    res_error.set_heuristic(41)
                    prop_res.add_subsection(res_error)
                    size = field['size'].value

                # Apparently, we can get to this point and have office_root_entry_parser set to None.
                # Not sure what we should do about that but trying to use that member variable seems
                # like a bad idea...
                if self.office_root_entry_parser is not None:
                    temp_field = None
                    for f in self.office_root_entry_parser.createFields():
                        if f.name.startswith('compobj'):
                            temp_field = f

                    # cache all the sub-fields....
                    for _ in temp_field:
                        pass

                    self.parse_field(temp_field, prop_res,
                                     self.PARSING_MODE_DISPLAY, parent_res)

            if size > 0 and index != '0':
                field_with_other_parser = self.additional_parsing_fields.get(
                    address, None)

                if field_with_other_parser:
                    # noinspection PyTypeChecker
                    self.parse_field(field_with_other_parser, prop_res,
                                     self.PARSING_MODE_DISPLAY, parent_res)

            if len(prop_res.body) > 1:
                prop_res.body = json.dumps(prop_res.body)
                res.add_subsection(prop_res)
Example #21
0
    def execute(self, request):
        request.result = Result()
        request.set_service_context(self.get_tool_version())
        temp_filename = request.file_path
        filename = os.path.basename(temp_filename)
        extract_dir = os.path.join(self.working_directory, f"{filename}_extracted")
        decompiled_dir = os.path.join(self.working_directory, f"{filename}_decompiled")
        file_res = request.result
        new_files = []
        supplementary_files = []
        imp_res_list = []
        res_list = []

        if request.file_type == "java/jar":
            self.decompile_jar(temp_filename, decompiled_dir)
            if self.jar_extract(temp_filename, extract_dir):
                # Analysis properties
                self.classloader_found = 0
                self.security_found = 0
                self.url_found = 0
                self.runtime_found = 0
                self.applet_found = 0

                self.manifest_tags = []
                self.signature_block_certs = []

                def analyze_file(root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir):
                    cur_file_path = os.path.join(root.decode('utf-8'), cf.decode('utf-8'))
                    with open(cur_file_path, "rb") as cur_file:
                        start_bytes = cur_file.read(24)

                        ##############################
                        # Executables in JAR
                        ##############################
                        cur_ext = os.path.splitext(cf)[1][1:].upper()
                        if start_bytes[:2] == b"MZ":
                            mz_res = dict(
                                title_text=f"Embedded executable file found: {cf} "
                                "There may be a malicious intent.",
                                heur_id=1,
                                tags=[('file.behavior', "Embedded PE")],
                                score_condition=APPLET_MZ,
                            )
                            imp_res_list.append(mz_res)

                        ##############################
                        # Launchable in JAR
                        ##############################
                        elif cur_ext in G_LAUNCHABLE_EXTENSIONS:
                            l_res = dict(
                                title_text=f"Launch-able file type found: {cf}"
                                "There may be a malicious intent.",
                                heur_id=2,
                                tags=[('file.behavior', "Launch-able file in JAR")],
                                score_condition=APPLET_MZ,
                            )
                            imp_res_list.append(l_res)

                        if cur_file_path.upper().endswith('.CLASS'):
                            self.analyse_class_file(file_res, cf, cur_file, cur_file_path,
                                                    start_bytes, imp_res_list, supplementary_files,
                                                    decompiled_dir, extract_dir)

                for root, _, files in os.walk(extract_dir.encode('utf-8')):
                    logging.info(f"Extracted: {root} - {files}")

                    # if the META-INF folder is encountered
                    if root.upper().endswith(b'META-INF'):  # only top level meta
                        self.analyse_meta_information(file_res, root, supplementary_files, extract_dir)
                        continue

                    with ThreadPoolExecutor() as executor:
                        for cf in files:
                            executor.submit(analyze_file, root, cf, file_res, imp_res_list,
                                            supplementary_files, decompiled_dir, extract_dir)

                res = ResultSection("Analysis of the JAR file")

                res_meta = ResultSection("[Meta Information]", parent=res)
                if len(self.manifest_tags) > 0:
                    res_manifest = ResultSection("Manifest File Information Extract",
                                                 parent=res_meta)
                    for tag, val in self.manifest_tags:
                        res_manifest.add_tag(tag, val)

                for res_cert in self.signature_block_certs:
                    res_meta.add_subsection(res_cert)

                if self.runtime_found > 0 \
                        or self.applet_found > 0 \
                        or self.classloader_found > 0 \
                        or self.security_found > 0 \
                        or self.url_found > 0:
                    res.add_line("All suspicious class files were saved as supplementary files.")

                res_class = ResultSection("[Suspicious classes]", parent=res)

                if self.runtime_found > 0:
                    ResultSection("Runtime Found",
                                  body=f"java/lang/Runtime: {self.runtime_found}",
                                  heuristic=Heuristic(10),
                                  parent=res_class)

                if self.applet_found > 0:
                    ResultSection("Applet Found",
                                  body=f"java/applet/Applet: {self.applet_found}",
                                  heuristic=Heuristic(6),
                                  parent=res_class)

                if self.classloader_found > 0:
                    ResultSection("Classloader Found",
                                  body=f"java/lang/ClassLoader: {self.classloader_found}",
                                  heuristic=Heuristic(7),
                                  parent=res_class)

                if self.security_found > 0:
                    ResultSection("Security Found",
                                  body=f"java/security/*: {self.security_found}",
                                  heuristic=Heuristic(8),
                                  parent=res_class)

                if self.url_found > 0:
                    ResultSection("URL Found",
                                  body=f"java/net/URL: {self.url_found}",
                                  heuristic=Heuristic(9),
                                  parent=res_class)

                res_list.append(res)

        # Add results if any
        self.recurse_add_res(file_res, imp_res_list, new_files)
        for res in res_list:
            file_res.add_section(res)

        # Submit embedded files
        if len(new_files) > 0:
            new_files = sorted(list(set(new_files)))
            txt = f"Extracted from 'JAR' file {filename}"
            for embed in new_files:
                request.add_extracted(embed, embed.replace(extract_dir + "/", "").replace(decompiled_dir + "/", ""),
                                      txt, safelist_interface=self.api_interface)

        if len(supplementary_files) > 0:
            supplementary_files = sorted(list(set(supplementary_files)))
            for path, name, desc in supplementary_files:
                request.add_supplementary(path, name, desc)
Example #22
0
    def section_builder(self, parser, field_dict, result, parsertype="MWCP"):
        json_body = {}
        malware_name = ''
        malware_types = []
        mitre_group = ''
        mitre_att = ''
        category = 'malware'
        # get malware names from parser objects
        if parsertype == "RATDecoder":
            malware_name = parser
        if parsertype == "MWCP":
            for name, obj in self.file_parsers.items():
                if parser in obj.parser_list:
                    malware_name = obj.malware
                    malware_types = obj.malware_types
                    mitre_att = obj.mitre_att
                    mitre_group = obj.mitre_group
                    category = obj.category
                    for item in [
                            'classification', 'mitre_group', 'mitre_att',
                            'malware', 'malware_types', 'category'
                    ]:
                        val = getattr(obj, item, None)
                        if val:
                            json_body[item] = val
                    break
        parser_section = ResultSection(f"{parsertype} : {parser}")

        parser_section = classification_checker(parser_section, parser,
                                                self.file_parsers)
        if len(field_dict) > 0:  # if any decoder output exists raise heuristic
            parser_section.set_body(json.dumps(json_body),
                                    body_format=BODY_FORMAT.KEY_VALUE)
            parser_section.set_heuristic(HEURISTICS_MAP.get(category, 1),
                                         attack_id=mitre_att)
            parser_section.add_tag("source", parsertype)

            if malware_name:
                parser_section.add_tag('attribution.implant',
                                       malware_name.upper())
            if mitre_group:
                parser_section.add_tag('attribution.actor',
                                       mitre_group.upper())
            for malware_type in malware_types:
                parser_section.add_tag('attribution.family',
                                       malware_type.upper())
        # Create subsections and attach them to the main parser_section
        subsection_builder(parser_section, field_dict)

        other_key = "other"
        if other_key in field_dict:
            other_content = field_dict[other_key]
            other_section = ResultSection(f"Other metadata found",
                                          body_format=BODY_FORMAT.KEY_VALUE,
                                          body=json.dumps(other_content))
            parser_section.add_subsection(other_section)

        for field in field_dict:
            if field != other_key and field not in FIELD_TAG_MAP:
                self.log.debug(f"{field} does not exist in FIELD_TAG_MAP")
        result.add_section(parser_section)
    def test_process_ttps(intezer_static_class_instance,
                          dummy_api_interface_class, mocker):
        from intezer_static import ALIntezerApi
        from intezer_sdk.api import IntezerApi
        from intezer_sdk.errors import UnsupportedOnPremiseVersion
        from assemblyline_v4_service.common.result import ResultSection, ResultTableSection, TableRow
        from requests import HTTPError
        mocker.patch.object(intezer_static_class_instance,
                            "get_api_interface",
                            return_value=dummy_api_interface_class)
        intezer_static_class_instance.start()
        parent_res_sec = ResultSection("blah")

        mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(IntezerApi,
                            "get_dynamic_ttps",
                            side_effect=HTTPError("FORBIDDEN"))
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(IntezerApi,
                            "get_dynamic_ttps",
                            side_effect=UnsupportedOnPremiseVersion())
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        assert parent_res_sec.subsections == []

        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "blah",
                                "description": "blah",
                                "data": [],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: blah", "blah")
        correct_res_sec.set_heuristic(4)
        correct_res_sec.heuristic.add_signature_id("blah", 10)
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "InjectionInterProcess",
                                "description": "blah",
                                "data": [],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: InjectionInterProcess",
                                        "blah")
        correct_res_sec.set_heuristic(7)
        correct_res_sec.heuristic.add_signature_id("InjectionInterProcess", 10)
        correct_res_sec.heuristic.add_attack_id("T1055")
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name": "enumerates_running_processes",
                                "description": "blah",
                                "data": [{
                                    "wow": "print me!"
                                }],
                                "severity": 1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection(
            "Signature: enumerates_running_processes", "blah")
        correct_res_sec.set_heuristic(8)
        correct_res_sec.heuristic.add_signature_id(
            "enumerates_running_processes", 10)
        correct_res_sec.heuristic.add_attack_id("T1057")
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)

        parent_res_sec = ResultSection("blah")
        mocker.patch.object(ALIntezerApi,
                            "get_dynamic_ttps",
                            return_value=[{
                                "name":
                                "blah",
                                "description":
                                "blah",
                                "data": [
                                    {
                                        "IP": "blah 2.2.2.2 blah"
                                    },
                                ],
                                "severity":
                                1
                            }])
        intezer_static_class_instance._process_ttps("blah", parent_res_sec)
        correct_res_sec = ResultSection("Signature: blah", "blah")
        correct_res_sec.add_line("\tIP: blah 2.2.2.2 blah")
        correct_res_sec.set_heuristic(4)
        correct_res_sec.heuristic.add_signature_id("blah", 10)
        correct_ioc_res_sec = ResultTableSection(
            "IOCs found in signature marks")
        correct_ioc_res_sec.add_row(TableRow(ioc_type="ip", ioc="2.2.2.2"))
        correct_ioc_res_sec.add_tag("network.dynamic.ip", "2.2.2.2")
        correct_res_sec.add_subsection(correct_ioc_res_sec)
        assert check_section_equality(
            parent_res_sec.subsections[0].subsections[0], correct_res_sec)
Example #24
0
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop two embedded file which one generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(random.randint(1, 4),
                                       signature=get_random_phrase(
                                           1, 4).lower().replace(" ", "_"))
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            section_color_map = ResultSection(
                "Example of colormap result section",
                body_format=BODY_FORMAT.GRAPH_DATA,
                body=json.dumps(color_map_data))
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section',
                                        body_format=BODY_FORMAT.URL,
                                        body=json.dumps({
                                            "name":
                                            "Random url!",
                                            "url":
                                            f"https://{random_host}/"
                                        }))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            urls = [{
                "url": f"https://{host1}/"
            }, {
                "url": f"https://{host2}/"
            }, {
                "url": f"https://{ip1}/"
            }]
            url_sub_section = ResultSection(
                'Example of a url section with multiple links',
                body_format=BODY_FORMAT.URL,
                body=json.dumps(urls))
            url_sub_section.set_heuristic(random.randint(1, 4))
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultSection(
                'Example of a memory dump section',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section',
                                       body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultSection('Example of a JSON section',
                                         body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result