def stack_result(section: List[bytes]) -> Optional[ResultSection]: """ Generates a ResultSection from floss stacked strings output section """ result = ResultSection('FLARE FLOSS Sacked Strings', body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(3)) assert result.heuristic strings = section[1:] if not strings: return None groups = group_strings(s.decode() for s in strings) for group in groups: res = ResultSection( f"Group: '{min(group, key=len)}' Strings: {len(group)}", body='\n'.join(group), body_format=BODY_FORMAT.MEMORY_DUMP) for string in group: ioc_tag(string.encode(), res, just_network=len(group) > 1000) result.add_subsection(res) if any(res.tags for res in result.subsections): result.heuristic.add_signature_id('stacked_ioc') return result
def _create_random_section(self): # choose a random body format body_format = random.choice(FORMAT_LIST) # create a section with a random title section = ResultSection(get_random_phrase(3, 7), body_format=body_format) # choose random amount of lines in the body for _ in range(1, 5): # generate random line section.add_line(get_random_phrase(5, 10)) # choose random amount of tags tags = flatten(get_random_tags()) for key, val in tags.items(): for v in val: section.add_tag(key, v) # set a heuristic a third of the time if random.choice([False, False, True]): section.set_heuristic(random.randint(1, 4)) # Create random sub-sections if random.choice([False, False, True]): section.add_subsection(self._create_random_section()) return section
def parse_results(response: Dict[str, Any]): res = Result() response = response['data'] url_section = ResultSection('VirusTotal report permalink', body_format=BODY_FORMAT.URL, body=json.dumps( {"url": response['links']['self']})) res.add_section(url_section) response = response['attributes'] scans = response['last_analysis_results'] av_hits = ResultSection('Anti-Virus Detections') av_hits.add_line( f'Found {response["last_analysis_stats"]["malicious"]} AV hit(s) from ' f'{len(response["last_analysis_results"].keys())}') for majorkey, subdict in sorted(scans.items()): if subdict['category'] == "malicious": virus_name = subdict['result'] av_hit_section = AvHitSection(majorkey, virus_name) av_hit_section.set_heuristic( 1, signature=f'{majorkey}.{virus_name}') av_hit_section.add_tag('av.virus_name', virus_name) av_hits.add_subsection(av_hit_section) res.add_section(av_hits) return res
def test_section_traverser(tags, correct_tags): from assemblyline_v4_service.common.section_reducer import _section_traverser from assemblyline_v4_service.common.result import ResultSection section = ResultSection("blah") subsection = ResultSection("subblah") subsection.tags = tags section.add_subsection(subsection) assert _section_traverser(section).subsections[0].tags == correct_tags
def test_section_traverser(tags, correct_tags): from assemblyline_v4_service.common.section_reducer import _section_traverser from assemblyline_v4_service.common.result import ResultSection section = ResultSection("blah") subsection = ResultSection("subblah") for t_type, t_values in tags.items(): for t_value in t_values: subsection.add_tag(t_type, t_value) section.add_subsection(subsection) assert _section_traverser(section).subsections[0].tags == correct_tags
def test_parse_results(response, correct_res_secs, metadefender_class_instance): from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic metadefender_class_instance.blocklist = ["a"] metadefender_class_instance.sig_score_revision_map = {} metadefender_class_instance.kw_score_revision_map = {} metadefender_class_instance.current_node = "http://blah" metadefender_class_instance.nodes[ metadefender_class_instance.current_node] = { "engine_map": { "z": { "version": "blah", "def_time": "blah" }, "y": { "version": "blah", "def_time": "blah" } }, "queue_times": [], "file_count": 0 } correct_result = Result() for correct_res_sec in correct_res_secs: section = ResultSection( correct_res_sec["title_text"], body_format=BODY_FORMAT.TEXT if not correct_res_sec.get("body_format") else BODY_FORMAT.JSON, body=correct_res_sec.get("body")) for subsec in correct_res_sec.get("subsections", []): subsection = ResultSection( subsec["title_text"], body=subsec["body"], body_format=BODY_FORMAT.KEY_VALUE, tags=subsec.get("tags"), ) if subsec.get("heuristic"): subsection.set_heuristic(subsec["heuristic"]["heur_id"]) print(subsec["heuristic"]["signatures"]) for key in subsec["heuristic"]["signatures"].keys(): subsection.heuristic.add_signature_id(key) section.add_subsection(subsection) correct_result.add_section(section) actual_result = metadefender_class_instance.parse_results(response) for index, section in enumerate(actual_result.sections): assert check_section_equality(section, correct_result.sections[index])
def _process_iocs( self, analysis_id: str, file_verdict_map: Dict[str, str], parent_result_section: ResultSection, ) -> None: """ This method retrieves and parses IOCs for an analysis :param analysis_id: The ID for the analysis which we will be retrieving :param file_verdict_map: A map of sha256s representing a file's contents, and the verdict for that file :param parent_result_section: The result section that the network result section will be added to, if applicable :return: None """ iocs = self.client.get_iocs(analysis_id) file_iocs = iocs["files"] network_iocs = iocs["network"] if file_iocs: for file in file_iocs: file_verdict_map[file["sha256"]] = file["verdict"] if network_iocs: network_section = ResultTextSection( "Network Communication Observed") for network in network_iocs: ioc = network["ioc"] type = network["type"] if type == NetworkIOCTypes.IP.value: network_section.add_tag("network.dynamic.ip", ioc) elif type == NetworkIOCTypes.DOMAIN.value: network_section.add_tag("network.dynamic.domain", ioc) elif type not in NetworkIOCTypes.TYPES.value: self.log.debug( f"The network IOC type of {type} is not in {NetworkIOCTypes.TYPES.value}. Network item: {network}" ) network_section.add_line(f"IOC: {ioc}") parent_result_section.add_subsection(network_section)
def subsection_builder(parent_section: ResultSection = None, fields: dict = {}): for mwcp_field, mwcp_field_data in fields.items(): if mwcp_field in FIELD_TAG_MAP: tag = FIELD_TAG_MAP[mwcp_field] table_body = [] table_section = ResultSection( f"Extracted {mwcp_field.capitalize()}") if tag: for x in mwcp_field_data: table_section.add_tag(tag, x) # Tag everything that we can # Add data to section body for line in mwcp_field_data: if type(line) is str: table_body.append({mwcp_field: line}) elif type(line) is list: for item in line: table_body.append({mwcp_field: item}) table_section.set_body(body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body)) parent_section.add_subsection(table_section)
def _handle_artefact(artefact: Artefact = None, artefacts_result_section: ResultSection = None): if artefact is None: raise Exception("Artefact cannot be None") # This is a dict who's key-value pairs follow the format {regex: result_section_title} artefact_map = { HOLLOWSHUNTER_EXE_REGEX: "HollowsHunter Injected Portable Executable", HOLLOWSHUNTER_SHC_REGEX: "HollowsHunter Shellcode", HOLLOWSHUNTER_DLL_REGEX: "HollowsHunter DLL", } artefact_result_section = None for regex, title in artefact_map.items(): pattern = compile(regex) if pattern.match(artefact.name): artefact_result_section = ResultSection(title) artefact_result_section.add_tag("dynamic.process.file_name", artefact.path) if artefact_result_section is not None: artefacts_result_section.add_subsection(artefact_result_section)
def execute(self, request: ServiceRequest) -> None: result = Result() self.hits = {} # clear the hits dict path = request.file_path file_name = request.file_name self.log.info(f" Executing {file_name}") self.log.info(f"Number of rules {len(self.sigma_parser.rules)}") self.sigma_parser.register_callback(self.sigma_hit) self.sigma_parser.check_logfile(path) if len(self.hits) > 0: hit_section = ResultSection('Events detected as suspicious') # group alerts together for id, events in self.hits.items(): title = self.sigma_parser.rules[id].title section = SigmaHitSection(title, events) tags = self.sigma_parser.rules[id].tags if tags: for tag in tags: name = tag[7:] if name.startswith(('t', 'g', 's')): attack_id = name.upper() source = events[0]['signature_source'] if attack_id: section.set_heuristic(get_heur_id(events[0]['score']), attack_id=attack_id, signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") else: section.set_heuristic(get_heur_id(events[0]['score']), signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") for event in events: # add the event data as a subsection section.add_subsection(EventDataSection(event)) hit_section.add_subsection(section) result.add_section(hit_section) request.result = result
def _process_families(self, families: List[Dict[str, str]], sub_sha256: str, file_verdict_map: Dict[str, str], parent_section: ResultSection) -> None: """ This method handles the "families" list, cutting out boring details and assigning verdicts :param families: A list of details for families :param sub_sha256: The hash of the sub analysis file :param file_verdict_map: A map of sha256s representing a file's contents, and the verdict for that file :param parent_section: The result section that the network :return: None """ family_section = ResultTableSection("Family Details") for family in families: processed_family = self._process_details( family.copy(), UNINTERESTING_FAMILY_KEYS) family_section.add_row(TableRow(**processed_family)) family_type = family["family_type"] if family_type not in FAMILIES_TO_NOT_TAG: family_section.add_tag("attribution.family", family["family_name"]) # Overwrite value if not malicious if family_type in MALICIOUS_FAMILY_TYPES and ( sub_sha256 not in file_verdict_map or file_verdict_map[sub_sha256] != Verdicts.MALICIOUS.value): file_verdict_map[sub_sha256] = Verdicts.MALICIOUS.value # Only overwrite value if value is not already malicious elif family_type in SUSPICIOUS_FAMILY_TYPES and ( sub_sha256 not in file_verdict_map or file_verdict_map[sub_sha256] not in Verdicts.MALICIOUS_VERDICTS.value): file_verdict_map[sub_sha256] = Verdicts.SUSPICIOUS.value if family_section.body: parent_section.add_subsection(family_section)
def _process_ttps( self, analysis_id: str, parent_result_section: ResultSection, ) -> None: """ This method retrieves and parses TTPs for an analysis :param analysis_id: The ID for the analysis which we will be retrieving :param file_verdict_map: A map of sha256s representing a file's contents, and the verdict for that file :param parent_result_section: The result section that the network result section will be added to, if applicable :return: None """ # Note: These TTPs are essentially signatures ttps = self.client.get_dynamic_ttps(analysis_id) if not ttps: return sigs_res = ResultSection("Signatures") for ttp in ttps: sig_name = ttp['name'] sig_res = ResultTextSection(f"Signature: {sig_name}") sig_res.add_line(ttp['description']) heur_id = get_heur_id_for_signature_name(sig_name) if heur_id == GENERIC_HEURISTIC_ID: self.log.debug( f"{sig_name} does not have a category assigned to it") sig_res.set_heuristic(heur_id) sig_res.heuristic.add_signature_id( sig_name, TTP_SEVERITY_TRANSLATION[ttp['severity']]) for aid in get_attack_ids_for_signature_name(sig_name): sig_res.heuristic.add_attack_id(aid) if sig_name in SILENT_SIGNATURES: sigs_res.add_subsection(sig_res) continue ioc_table = ResultTableSection("IOCs found in signature marks") self._process_ttp_data(ttp['data'], sig_res, ioc_table) if ioc_table.body: sig_res.add_subsection(ioc_table) sigs_res.add_subsection(sig_res) if sigs_res.subsections: parent_result_section.add_subsection(sigs_res)
def analyze_pdf(self, request, res_txt, path, working_dir, heur, additional_keywords, get_malform=True): """Extract metadata, keyword objects and content of interest from a PDF sample using PDFId, PDFId plugins, and PDF Parser. Args: request: AL request object. res_txt: Header string for AL result section title. path: Original PDF sample path. working_dir: AL working directory. heur: List of plugins to run on PDFId results (provided in service configuration). additional_keywords: List of additional keywords to be searched (provided in service configuration). get_malform: Extract malformed objects from PDF. Returns: AL result object, AL heuristics list to add to result, list of object streams (objstms), and an errors list. """ triage_keywords = set() all_errors = set() embed_present = False objstms = False res = ResultSection(title_text=res_txt) carved_extracted_shas = set() if request.deep_scan: run_pdfparse = True else: run_pdfparse = False # Run PDFId try: pdfid_result, errors = self.get_pdfid(path, additional_keywords, heur, request.deep_scan) except Exception as e: raise NonRecoverableError(e) # Parse PDFId results pdfidres = ResultSection(title_text="PDFID Results", parent=res) if len(pdfid_result) == 0: pdfidres.add_line( "No results generated for file. Please see errors.") else: # Do not run for objstms, which are being analyzed when get_malform == False if get_malform: version = pdfid_result.get("PDFID", None) if version: pdfidres.add_line(version[0]) properties = pdfid_result.get("Properties", None) if properties: pres = ResultSection(title_text="PDF Properties", parent=pdfidres) for plist in properties: pres.add_line("{0}: {1}".format(plist[0], plist[1])) if plist[0] == "/ModDate": pres.add_tag('file.pdf.date.modified', plist[1]) elif plist[0] == "/CreationDate": pres.add_tag('file.date.creation', plist[1]) elif plist[0] == "/LastModified": pres.add_tag('file.date.last_modified', plist[1]) elif plist[0] == "/SourceModified": pres.add_tag('file.pdf.date.source_modified', plist[1]) elif plist[0] == "/pdfx": pres.add_tag('file.pdf.date.pdfx', plist[1]) entropy = pdfid_result.get("Entropy", None) if entropy: enres = ResultSection(title_text="Entropy", parent=pdfidres) for enlist in entropy: enres.add_line("{0}: {1}, ({2})".format( enlist[0], enlist[1], enlist[2])) flags = pdfid_result.get("Flags", None) if flags: fres = ResultSection(title_text="PDF Keyword Flags", parent=pdfidres) for flist in flags: if flist[0] == "/ObjStm": objstms = True if len(flist) == 3: fres.add_line( "{0}:Count: {1}, Hex-Encoded Count: {2}".format( flist[0], flist[1], flist[2])) else: fres.add_line("{0}:Count: {1}".format( flist[0], flist[1])) fres.add_tag('file.string.extracted', flist[0].replace("/", "", 1)) if flist[0] in additional_keywords: triage_keywords.add(flist[0].replace("/", "", 1)) plugin = pdfid_result.get("Plugin", []) # If any plugin results, or flagged keywords found, run PDF Parser if plugin or len(triage_keywords) > 0: run_pdfparse = True for pllist in plugin: pl_name, pl_heur, pl_text = pllist pl_heur = int(pl_heur) pl_text = pl_text[14:] if not pl_text or pl_text == "None": continue if pl_name in ['EmbeddedFile', 'Name Obfuscation']: modres = ResultSection(title_text=pl_text, parent=pdfidres) if pl_heur > 0: modres.set_heuristic(pl_heur) if pl_name == 'EmbeddedFile': embed_present = True elif pl_name in ['Triage', 'Suspicious Properties']: javascript_found = False for line in pl_text.splitlines(): lineres = ResultSection(title_text=line) # Triage results if '/JavaScript' in line: triage_keywords.add('JavaScript') if not javascript_found: lineres.set_heuristic(19) javascript_found = True elif '/JS' in line: triage_keywords.add('JS') if not javascript_found: lineres.set_heuristic(19) javascript_found = True elif '/JBIG2Decode' in line: triage_keywords.add('JBIG2Decode') lineres.set_heuristic(3) elif '/Colors > 2^24' in line: triage_keywords.add('Colors > 2^24') lineres.set_heuristic(20) elif '/AA' in line: triage_keywords.add('AA') lineres.set_heuristic(1) elif '/Launch' in line: triage_keywords.add('Launch') lineres.set_heuristic(1) elif '/OpenAction' in line: triage_keywords.add('OpenAction') lineres.set_heuristic(1) elif '/GoToE' in line: triage_keywords.add('GoToE') lineres.set_heuristic(21) elif '/GoToR' in line: triage_keywords.add('GoToR') lineres.set_heuristic(22) elif '/Encrypt' in line: triage_keywords.add('Encrypt') lineres.set_heuristic(11) elif '/AcroForm' in line: triage_keywords.add('AcroForm') lineres.set_heuristic(4) elif '/RichMedia' in line: triage_keywords.add('RichMedia') lineres.set_heuristic(5) elif '/XFA' in line: triage_keywords.add('XFA') lineres.set_heuristic(23) elif '/Annot' in line: triage_keywords.add('Annot') lineres.set_heuristic(25) elif '/ObjStm' in line: triage_keywords.add('ObjStm') lineres.set_heuristic(7) elif '/URI' in line: triage_keywords.add('URI') lineres.set_heuristic(24) # Suspicious properties results elif "eof2" in line: lineres.set_heuristic(2) elif "eof5" in line: lineres.set_heuristic(17) elif "page" in line: lineres.set_heuristic(26) elif "entropy" in line: lineres.set_heuristic(12) elif "obj/endobj" in line: lineres.set_heuristic(13) elif "stream/endstream" in line: lineres.set_heuristic(14) if lineres.heuristic is not None: pdfidres.add_subsection(lineres) for e in errors: all_errors.add(e) if e.startswith('Error running plugin'): self.log.warn(e) if run_pdfparse: # CALL PDF parser and extract further information pdf_parserres = ResultSection(title_text="PDF Parser Results") # STATISTICS # Do not run for objstms, which are being analyzed when get_malform == False if get_malform: options = { "stats": True, } pdf_parser_result, errors = self.get_pdf_parser( path, working_dir, options) if pdf_parser_result: if len(pdf_parser_result) == 0: pdf_parserres.add_line( "No statistical results generated for file. Please see errors." ) else: version = pdf_parser_result.get("version", None) if version and version[0] != '0': pdf_parserres.add_line(version[0]) stats = pdf_parser_result.get("stats", None) if stats: sres = ResultSection( title_text="PDF Statistcs", parent=pdf_parserres, body_format=BODY_FORMAT.MEMORY_DUMP) for p in stats: sres.add_line(p) for e in errors: all_errors.add(e) # Triage plugin -- search sample for keywords and carve content or extract object (if it contains a stream) carved_content = {} # Format { "objnum": [{keyword: content list}} obj_extract_triage = set() jbig_objs = set() for keyword in triage_keywords: # ObjStms handled differently if keyword == 'ObjStm': continue options = { "search": keyword, } pdf_parser_result, errors = self.get_pdf_parser( path, working_dir, options) if pdf_parser_result: for p in pdf_parser_result['parts']: content = "" references = [] # Trailer will be extracted anyways, try and grab all references anyways -- will be messy if p.startswith("trailer:"): # Grab the content after the keyword # Check that keyword actually in content if "/{}".format(keyword) in p: try: content = p.split(keyword, 1)[1].replace( '>>++>>', '').split("/", 1)[0].strip() references = re.findall( "[0-9]* [0-9]* R", content) except Exception: continue # If not trailer, should be object elif 'Referencing:' in p: # Grab the content after the keyword if '>>++>>' in p: try: content = p.split(keyword, 1)[1].replace( '>>++>>', '').strip() except Exception: try: content = p.split("\n", 3)[3] except Exception: content = p else: try: content = p.split("\n", 3)[3] except Exception: content = p # Sometimes the content is the same keyword with references (i.e "/URI /URI 10 0 R" if content.startswith("/{}".format(keyword)): try: content = re.sub("/{}[ ]*".format(keyword), "", content, 1) except Exception: pass try: references = p.split("\n", 3)[2].replace( 'Referencing:', '').strip().split(", ") except Exception: pass # Only extract JBIG2Decode objects with deep scan, but always report on their presence if keyword == "JBIG2Decode" and "/Filter" in p and "Contains stream" in p: try: objnum = p.split("\n", 1)[0].split(" ")[1] if request.deep_scan: obj_extract_triage.add(objnum) jbig_objs.add(objnum) continue except Exception as e: self.log.debug(e) continue # If no content, then keyword likely points to reference objects, so grab those if content == '': if len(references) > 0: content = references else: # Something is wrong, drop it. continue else: while True: # Multiple references might be in a list, i.e. /Annot # # R vs. /Annots [# # R # # R] islist = re.match( r"[s]?[ ]?\[([0-9]* [0-9]* R[ \\rn]{0,8})*\]", content) if islist: content = re.sub( r"[\[\]]", "", islist.group(0).replace( "s ", '').replace("R ", "R,")).split(",") break # References might be with instructions, i.e. [# # R /FitH null] withinst = re.match( r"[s]?[ \\']{0,3}\[[ ]?([0-9]* [0-9]* R)[ \\rn]{1,8}" r"[/a-zA-Z0-9 ]*[ ]?\]", content) if withinst: content = [withinst.group(1)] break content = [content] break for c in content: # If keyword = Javascript and content starts with '/JS', disregard as 'JS' will be extracted if "JS" in triage_keywords and keyword == "JavaScript" and "/JS" in c[ 0:5]: continue if c in references or re.match( "[0-9]* [0-9]* R", c): try: ref_obj = c.split(" ", 1)[0] options = { "object": ref_obj, "get_object_detail": True } pdf_parser_subresult, err = self.get_pdf_parser( path, working_dir, options) if pdf_parser_subresult: for sub_p in pdf_parser_subresult[ 'parts']: sub_references = sub_p.split("\n", 3)[2].replace('Referencing:', '')\ .strip().split(", ") ptyp = sub_p.split( "\n", 2)[1].replace( 'Type:', '').strip().replace( "/", "") # If the object contains a stream, extract the object. if "Contains stream" in sub_p: try: objnum = sub_p.split( "\n", 1)[0].split(" ")[1] obj_extract_triage.add( objnum) except Exception: pass # Or if the object Type is the keyword, grab all referenced objects. elif sub_references[0] != '' and len(sub_references) >= 1 \ and ptyp == keyword: for sr in sub_references: try: objnum = sr.split( " ", 1)[0] obj_extract_triage.add( objnum) except Exception: pass # If not, extract object detail in to carved output elif pdf_parser_subresult[ 'obj_details'] != "": try: objnum = sub_p.split( "\n", 1)[0].split(" ")[1] if objnum in carved_content: carved_content[objnum]\ .append({keyword: pdf_parser_subresult['obj_details']}) else: carved_content[objnum] = \ [{keyword: pdf_parser_subresult['obj_details']}] except Exception: continue for e in err: errors.add(e) except Exception: # If none of that work, just extract the original object for examination. try: objnum = p.split("\n", 1)[0].split(" ")[1] obj_extract_triage.add(objnum) except Exception: pass # If content does not look like a reference: else: if p.startswith("trailer:"): continue objnum = p.split("\n", 1)[0].split(" ")[1] # If the object contains a stream extract the object if p.split("\n", 4)[3] == "Contains stream": obj_extract_triage.add(objnum) else: # Or just carve the content if objnum in carved_content: carved_content[objnum].append( {keyword: c}) else: carved_content[objnum] = [{keyword: c}] for e in errors: all_errors.add(e) # Add carved content to result output show_content_of_interest = False if len(carved_content) > 0 or len(jbig_objs) > 0: carres = ResultSection(title_text="Content of Interest") else: carres = None if len(jbig_objs) > 0: jbigres = ResultSection( title_text= "The following Object IDs are JBIG2DECODE streams:", body_format=BODY_FORMAT.MEMORY_DUMP, parent=carres) jbigres.add_line(', '.join(map(str, jbig_objs))) show_content_of_interest = True if len(carved_content) > 0: for k, l in sorted(carved_content.items()): for d in l: for keyw, con in d.items(): subres = ResultSection( title_text="Object {0}: Hits for Keyword '{1}':" .format(k, keyw)) subres.set_heuristic(8) con_bytes = con.encode() if len(con) < 500: subres.body_format = BODY_FORMAT.MEMORY_DUMP subres.add_line(con) # Check for IOC content patterns = PatternMatch() st_value = patterns.ioc_match(con_bytes, bogon_ip=True) if len(st_value) > 0: carres.add_subsection(subres) show_content_of_interest = True for ty, val in st_value.items(): if val == "": asc_asc = unicodedata.normalize( 'NFKC', val).encode('ascii', 'ignore') subres.add_tag(ty, asc_asc) else: ulis = list(set(val)) for v in ulis: subres.add_tag(ty, v) else: crv_sha = hashlib.sha256(con_bytes).hexdigest() if crv_sha not in carved_extracted_shas: f_name = "carved_content_obj_{}_{}".format( k, crv_sha[0:7]) subres.add_lines([ "Content over 500 bytes it will be extracted for analysis", "Name: {} - SHA256: {}".format( f_name, crv_sha) ]) carres.add_subsection(subres) show_content_of_interest = True crvf = os.path.join( self.working_directory, f_name) with open(crvf, 'wb') as f: f.write(con_bytes) request.add_extracted( crvf, os.path.basename(crvf), "Extracted content from object {}". format(k)) carved_extracted_shas.add(crv_sha) if show_content_of_interest: pdf_parserres.add_subsection(carres) # ELEMENTS # Do not show for objstms if get_malform: if request.deep_scan: options = { "verbose": True, "nocanonicalizedoutput": True, "get_malform": get_malform } elif embed_present: options = { "verbose": True, "elements": "ctsi", "type": "/EmbeddedFile", "get_malform": get_malform } else: options = { "verbose": True, "elements": "cst", "get_malform": get_malform } pdf_parser_result, errors = self.get_pdf_parser( path, working_dir, options) embed_extracted = set() if pdf_parser_result: if len(pdf_parser_result) == 0: pdf_parserres.add_line( "No structure information generated for file. Please see errors." ) else: # PDF Parser will write any malformed content over 100 bytes to a file files = pdf_parser_result.get("files", None) if files: for f, l in files.items(): if f == 'malformed': if len(l) > 0: pdf_parserres.set_heuristic(6) for i in l: request.add_extracted( i, os.path.basename(i), "Extracted malformed content in PDF Parser Analysis." ) parts = pdf_parser_result.get("parts", None) # Extract service will extract the sample's embedded files. # However we want to make note of them so that they are not extracted again below if parts: for p in sorted(parts): if "Type: /EmbeddedFile" in p: getobj = p.split("\n", 1)[0].split(" ")[1] embed_extracted.add(getobj) # Extract objects collected from above analysis obj_to_extract = obj_extract_triage - embed_extracted - jbig_objs if len(obj_to_extract) > 0: options = { "filter": True, "object": obj_to_extract, "dump": "extracted_obj_", } pdf_parser_result, errors = self.get_pdf_parser( path, working_dir, options) if pdf_parser_result: files = pdf_parser_result.get("files", None) extracted_files = [] if files: for f, l in files.items(): if f == 'embedded': for i in l: f_name = os.path.basename(i) obj_id = f_name.replace( "extracted_obj_", "") extracted_files.append( "Extracted object {} as {}".format( obj_id, f_name)) request.add_extracted( i, f_name, "Object {} extracted in PDF Parser Analysis." .format(obj_id)) for e in errors: all_errors.add(e) if extracted_files: extract_res = ResultSection( title_text="Extracted embedded objects", parent=pdf_parserres) extract_res.set_heuristic(9) extract_res.add_lines(extracted_files) # Extract jbig2decode objects in deep scan mode if request.deep_scan and len(jbig_objs) > 0: options = { "object": jbig_objs, "dump": "extracted_jb_obj_", } pdf_parser_result, errors = self.get_pdf_parser( path, working_dir, options) if pdf_parser_result: extracted_jb = [] files = pdf_parser_result.get("files", None) if files: for f, l in files.items(): if f == 'embedded': for i in l: f_name = os.path.basename(i) obj_id = f_name.replace( "extracted_jb_obj_", "") extracted_jb.append( "JBIG2DECODE object {} extracted as {}" .format(obj_id, f_name)) request.add_extracted( i, f_name, "JBIG2DECODE object {} extracted in PDF Parser Analysis." .format(obj_id)) for e in errors: all_errors.add(e) if extracted_jb: jbig_extract_res = ResultSection( title_text="Extracted JBIG2Decode objects", parent=pdf_parserres) jbig_extract_res.set_heuristic(9) jbig_extract_res.add_lines(extracted_jb) if len(pdf_parserres.subsections) > 0: res.add_subsection(pdf_parserres) return res, objstms, all_errors
def LSB_chisquare(self): pixels = self.binary_pixels x_points = [] y_points = [] # Use image if not in AL if self.request is None: plt.switch_backend('agg') plt.axis([0, self.pixel_count / 8, -0.1, 1.1]) plt.title('Chi Square Test') plt.grid(True) index = 0 success = False try: # If greyscale, only one set of pixels to process if self.channels_to_process == 1: while len(pixels) != 0: self.log.debug(len(pixels)) # In bytes x_location = (self.chunk * self.channels_to_process) * index / 8 x_points.append(x_location) obs_pixel_set = [] exp_pixel_set = [] # Let's grab some PoVs!!! Yay!!! for i in range(0, 255, 2): # Get counts v1 = pixels[:self.chunk].count( str('{0:08b}').format(i)) v2 = pixels[:self.chunk].count( str('{0:08b}').format(i + 1)) # Add observed values if v1 == 0 and v2 == 0: continue obs_pixel_set.append(v1) obs_pixel_set.append(v2) # Calculate expected values of pairs expected = float((v1 + v2) * 0.5) exp_pixel_set.extend([expected] * 2) if len(obs_pixel_set) == 0: y_points.append(0) else: y_points.append( round( chisquare(np.array(obs_pixel_set), f_exp=np.array(exp_pixel_set))[1], 4)) else: # If not greyscale, test each colour channel separately per chunk and then average while len(pixels) != 0: x_location = (self.chunk * self.channels_to_process) * index / 8 x_points.append(x_location) # Grab channel (i.e. R,G,B) pixels colours = self.get_colours(pixels[:self.chunk]) counts = [] lsb_counts = [] for c, pixels_flat in iter(colours.items()): obs_pixel_set = [] exp_pixel_set = [] # Let's grab some PoVs!!! Yay!!! for i in range(0, 255, 2): # Get counts v1 = pixels_flat[:self.chunk].count( str('{0:08b}').format(i)) v2 = pixels_flat[:self.chunk].count( str('{0:08b}').format(i + 1)) # Add observed values if v1 == 0 and v2 == 0: continue obs_pixel_set.append(v1) obs_pixel_set.append(v2) # Calculate expected values of pairs expected = float((v1 + v2) * 0.5) exp_pixel_set.extend([expected] * 2) if len(obs_pixel_set) == 0: counts.append(0) if self.request is None: plt.scatter(x_location, 0, color=c, marker='^', s=50) else: chi = round( chisquare(np.array(obs_pixel_set), f_exp=np.array(exp_pixel_set))[1], 6) counts.append(chi) if self.request is None: plt.scatter(x_location, chi, color=c, marker='^', s=50) # Additionally, collect the LSBs for additional randomness testing. # Idea from http://guillermito2.net/stegano/tools/ lsb = [] for pbyte in pixels_flat: lsb.append(float(pbyte[-1])) lsb_avg_value = float(round(sum(lsb) / len(lsb), 1)) if self.request is None: plt.scatter(x_location, lsb_avg_value, color='k', marker='.', s=10) lsb_counts.append(lsb_avg_value) # Average significance counts for the colours and round two 2 decimals y_points.append( round(sum(counts) / self.channels_to_process, 2)) index += 1 pixels = pixels[self.chunk:] success = True except: success = False if success: if self.request is None: plt.plot(x_points, y_points, 'm--', linewidth=1.0) lsb_chi_path = path.join(self.working_directory, "LSB_chiqquare_attack.png") plt.savefig(lsb_chi_path, bbox_inches='tight') plt.show() else: chi_graph_data = { 'type': 'colormap', 'data': { 'domain': [0, 100], 'values': [y * 100 for y in y_points] } } chires = ResultSection('LSB Chi Square Analysis.\t') chires.add_subsection( ResultSection('Colour Map.' '0==Not random, ' '100==Random'.format(self.chunk_bytes), body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(chi_graph_data))) pval_res = self.detect_sig_changes(y_points) if pval_res: chires.add_subsection(pval_res) self.working_result.add_subsection(chires) return
def execute(self, request): """Main Module. See README for details.""" request.result = Result() self.result = request.result wrk_dir = self.working_directory ipa_path = request.file_path self.known_keys = None self.reported_keys = {} # Determine if PK container has IPA content to parse try: ipa_file = zipfile.ZipFile(ipa_path) except zipfile.BadZipfile: # Return if files cannot be extracted return # isipa returns False if Info.plist not found, or returns Info.plist path name_list, isipa = self.isipa(ipa_file) if not isipa: return # Extract Files of interest using 7zip (some files can be AES encrypted which standard zipfile library does not # support) extract_success = False try: self.extract_archive(ipa_path) extract_success = True except Exception as e: self.log.error(f"Could not extract IPA file due to 7zip error {e}") if not extract_success: return with open(os.path.join(os.path.dirname(__file__), "keys.json"), 'r') as f: keys_dict = json.load(f) self.known_keys = keys_dict['glossary'] patterns = PatternMatch() # Info.plist main_exe = None res = ResultSection("Info.plist") info_plist_path = os.path.join(wrk_dir, isipa) isempty, plist_dict = self.gen_plist_extract(info_plist_path, patterns) if plist_dict is None: res.add_line("Info.plist in sample cannot be parsed. Sample may be corrupt.") elif isempty: res.add_line("Empty Info.plist file. Archive contents may be encrypted.") else: # Grab the main executable name if plist_dict.get("CFBundleExecutable", None): i = plist_dict["CFBundleExecutable"] try: main_exe = (i, f"Name of bundle's main executable file: {i}") res.add_line(main_exe[1]) except UnicodeEncodeError: i = i.encode('utf8', 'replace') main_exe = (i, f"Name of bundle's main executable file: {i}") res.add_line(main_exe[1]) iden_key_res, unk_key_res = self.parse_plist(plist_dict) if iden_key_res: res.add_subsection(iden_key_res) if unk_key_res: res.add_subsection(unk_key_res) request.result.add_section(res) # PkgInfo file pkg_types = { 'APPL': 'application', 'FMWK': 'frameworks', 'BNDL': 'loadable bundle' } pattern = re.compile(r'Payload/[^/]*.app/PkgInfo') for fn in name_list: m = pattern.match(fn) if m is not None: res = ResultSection("PkgInfo Details") pkg_info_path = os.path.join(wrk_dir, m.group()) with open(pkg_info_path, 'r') as f: pkg_info = f.read() if pkg_info == "": res.add_line("Empty PkgInfo file. Archive contents may be encrypted.") elif len(pkg_info) == 8: # noinspection PyBroadException try: pkgtype = pkg_info[0:4] if pkgtype in pkg_types: pkgtype = pkg_types[pkgtype] creator_code = pkg_info[4:] res = ResultSection("PkgInfo Details") res.add_line(f"Package Type: {pkgtype}; Application Signature: {creator_code}") except Exception: continue request.result.add_section(res) if main_exe: main_exe_reg = (rf'.*{main_exe[0]}$', f"Main executable file {main_exe[0]}") else: main_exe_reg = ('$', 'Place holder for missing main executable name.') fextract_regs = [ main_exe_reg, (r'Payload.*\.(?:crt|cer|der|key|p12|p7b|p7c|pem|pfx)$', "Certificate or key file"), (r'Payload.*libswift[^\/]\.dylib$', "Swift code library files"), (r'Payload\/META-INF\/.*ZipMetadata.plist$', "IPA archive content info"), (r'Payload.*mobileprovision$', "Provisioning profile for limiting app uploads"), (r'.*plist$', "Plist information file"), ] empty_file_msg = "Empty file. Archive contents may be encrypted." int_files = {} plist_res = ResultSection("Other Plist File Information (displaying new key-value pairs only)") for root, dirs, files in os.walk(wrk_dir): for name in files: full_path = safe_str(os.path.join(root, name)) if os.path.getsize(full_path) == 0: if int_files.get(empty_file_msg, None): int_files[empty_file_msg].append(full_path) else: int_files[empty_file_msg] = [] int_files[empty_file_msg].append(full_path) else: for p, desc in fextract_regs: pattern = re.compile(p) m = pattern.match(full_path) if m is not None: # Already identify main executable file above if not desc.startswith("Main executable file "): if desc.startswith("Plist"): pres = ResultSection(f"{full_path.replace(wrk_dir, '')}") isempty, plist_parsed = self.gen_plist_extract(full_path, patterns) if not isempty and plist_parsed: iden_key_res, unk_key_res = self.parse_plist(plist_parsed) # If all keys have already been reported, skip this plist if not iden_key_res and not unk_key_res: continue if iden_key_res: pres.add_subsection(iden_key_res) if unk_key_res: pres.add_subsection(unk_key_res) plist_res.add_subsection(pres) elif int_files.get(desc, None): int_files[desc].append(full_path) else: int_files[desc] = [] int_files[desc].append(full_path) break if len(plist_res.subsections) > 0: request.result.add_section(plist_res) if len(int_files) > 0: intf_sec = ResultSection("Files of interest", parent=res) for intf_d, intf_p in int_files.items(): intf_subsec = ResultSection(intf_d, parent=intf_sec) for f in intf_p: intf_subsec.add_line(f.replace(f"{wrk_dir}/", ""))
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop 3 embedded file which two generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))]) # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(3, signature="sig_one") # You can attach attack ids to heuristics after they where defined text_section.heuristic.add_attack_id("T1066") # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how # many time the signature fired by setting its frequency. If you call add_signature_id twice with the # same signature, this will effectively increase the frequency of the signature. text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2) text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3) text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_four", score=0) # The heuristic for text_section should have the following properties # 1. 1 attack ID: T1066 # 2. 4 signatures: sig_one, sig_two, sig_three and sig_four # 3. Signature frequencies are cumulative therefor they will be as follow: # - sig_one = 1 # - sig_two = 5 # - sig_three = 2 # - sig_four = 1 # 4. The score used by each heuristic is driven by the following rules: signature_score_map is higher # priority, then score value for the add_signature_id is in second place and finally the default # heuristic score is use. Therefor the score used to calculate the total score for the text_section is # as follow: # - sig_one: 10 -> heuristic default score # - sig_two: 20 -> score provided by the function add_signature_id # - sig_three: 30 -> score provided by the heuristic map # - sig_four: 40 -> score provided by the heuristic map because it's higher priority than the # function score # 5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210 # Make sure you add your section to the result result.add_section(text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } # The classification of a section can be set to any valid classification for your system section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL, body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"})) # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed host1 = get_random_host() host2 = get_random_host() ip1 = get_random_ip() ip2 = get_random_ip() ip3 = get_random_ip() urls = [ {"url": f"https://{host1}/"}, {"url": f"https://{host2}/"}, {"url": f"https://{ip1}/"}, {"url": f"https://{ip2}/"}, {"url": f"https://{ip3}/"}] # A heuristic can fire more then once without being associated to a signature url_heuristic = Heuristic(4, frequency=len(urls)) url_sub_section = ResultSection('Example of a url section with multiple links', body=json.dumps(urls), body_format=BODY_FORMAT.URL, heuristic=url_heuristic) url_sub_section.add_tag("network.static.ip", ip1) url_sub_section.add_tag("network.static.ip", ip2) url_sub_section.add_tag("network.static.ip", ip3) url_sub_section.add_tag("network.static.domain", host1) url_sub_section.add_tag("network.dynamic.domain", host2) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump(b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!") memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP, body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed) kv_body = { "a_str": "Some string", "a_bool": False, "an_int": 102, } kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(kv_body)) result.add_section(kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a json dump of a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [ {"d1_key": "val", "d1_key2": "val2"}, {"d2_key": "val", "d2_key2": "val2"} ], "bool": True } } json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON, body=json.dumps(json_body)) result.add_section(json_section) # ================================================================== # PROCESS_TREE section: # This section allows the service writer to list a bunch of dictionary objects that have nested lists # of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore # each dictionary must have be of the following format: # { # "process_pid": int, # "process_name": str, # "command_line": str, # "children": [] NB: This list either is empty or contains more dictionaries that have the same # structure # } nc_body = [ { "process_pid": 123, "process_name": "evil.exe", "command_line": "C:\\evil.exe", "signatures": {}, "children": [ { "process_pid": 321, "process_name": "takeovercomputer.exe", "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff", "signatures": {"one":250}, "children": [ { "process_pid": 456, "process_name": "evenworsethanbefore.exe", "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad", "signatures": {"one":10, "two":10, "three":10}, "children": [] }, { "process_pid": 234, "process_name": "badfile.exe", "command_line": "C:\\badfile.exe -k nothing_to_see_here", "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10}, "children": [] } ] }, { "process_pid": 345, "process_name": "benignexe.exe", "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"", "signatures": {"one": 2000}, "children": [] } ] }, { "process_pid": 987, "process_name": "runzeroday.exe", "command_line": "C:\\runzeroday.exe -f insert_bad_spelling", "signatures": {}, "children": [] } ] nc_section = ResultSection('Example of a PROCESS_TREE section', body_format=BODY_FORMAT.PROCESS_TREE, body=json.dumps(nc_body)) result.add_section(nc_section) # ================================================================== # TABLE section: # This section allows the service writer to have their content displayed in a table format in the UI # The body argument must be a list [] of dict {} objects. A dict object can have a key value pair # where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested # table within a cell. table_body = [ { "a_str": "Some string1", "extra_column_here": "confirmed", "a_bool": False, "an_int": 101, }, { "a_str": "Some string2", "a_bool": True, "an_int": 102, }, { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, { "a_str": "Some string4", "a_bool": None, "an_int": -1000000000000000000, "extra_column_there": "confirmed", "nested_table": { "a_str": "Some string3", "a_bool": False, "nested_table_thats_too_deep": { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, }, }, ] table_section = ResultSection('Example of a TABLE section', body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body)) result.add_section(table_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # Embedded files can also have their own classification! fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"CLASSIFIED!!!__"+data.encode()) request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look", classification=cl_engine.RESTRICTED) # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(urls)) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result
def LSB_averages(self): # Additionally, collect the LSBs for additional randomness testing. # Idea from http://guillermito2.net/stegano/tools/ # Right now only supports AL if not self.request: return pixels = self.binary_pixels lsb_points = [] success = False try: # If greyscale, only one set of pixels to process if self.channels_to_process == 1: while len(pixels) != 0: lsb = [] for pbyte in pixels: lsb.append(float(pbyte[-1])) lsb_avg_value = round(sum(lsb) / len(lsb), 1) lsb_points.append(lsb_avg_value) pixels = pixels[self.chunk:] success = True else: lsb_points_channels = {} # If not greyscale, test each colour channel separately per chunk and then average while len(pixels) != 0: # Grab channel (i.e. R,G,B) pixels colours = self.get_colours(pixels[:self.chunk]) lsb_counts = [] for c, pixels_flat in iter(colours.items()): lsb = [] for pbyte in pixels_flat: lsb.append(float(pbyte[-1])) lsb_avg_value = float(round(sum(lsb) / len(lsb), 1)) lsb_counts.append(lsb_avg_value) if lsb_points_channels.get(c, None): lsb_points_channels[c].append(lsb_avg_value) else: lsb_points_channels[c] = [] lsb_points_channels[c].append(lsb_avg_value) # Average lsb counts for the colours and round two 2 decimals lsb_points.append( round(sum(lsb_counts) / self.channels_to_process, 2)) pixels = pixels[self.chunk:] success = True except: success = False if success: lsb_graph_data = { 'type': 'colormap', 'data': { 'domain': [0, 100], 'values': [y * 100 for y in lsb_points] } } lsbres = ResultSection('LSB Average Value Analysis.\t') lsbres.add_subsection( ResultSection('Overall' 'Closer to 0.5==Random, ' 'Closer to 0/100==Not Random.'.format( self.chunk_bytes), body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(lsb_graph_data))) pval_res = self.detect_sig_changes(lsb_points, thr_counter=0.80) if pval_res: lsbres.add_subsection(pval_res) self.working_result.add_subsection(lsbres) return
def parse_results(self, response: Dict[str, Any]) -> Result: """ This method parses the response JSON containing the scan results so that it will be displayed nicely in Assemblyline :param response: The raw results from the MetaDefender scan :return: The Result object to be used when displaying in Assemblyline """ res = Result() scan_results = response.get('scan_results', response) virus_name = "" process_results = response.get('process_info', response) hit = False fail = False processed = {} if scan_results is not None and scan_results.get( 'progress_percentage') == 100: no_threat_detected = [] av_hits = ResultSection('AV Detections as Infected or Suspicious') av_fails = ResultSection('Failed to Scan or No Threats Detected') scans = scan_results.get('scan_details', scan_results) av_scan_times = [] modified_scans = { key: value for key, value in scans.items() if key not in ["progress_percentage"] } for majorkey, subdict in sorted(modified_scans.items()): if majorkey in self.blocklist: continue heur_id = None if subdict['scan_result_i'] == 1: # File is infected virus_name = subdict['threat_found'] if virus_name: heur_id = 1 elif subdict['scan_result_i'] == 2: # File is suspicious virus_name = subdict['threat_found'] if virus_name: heur_id = 2 elif subdict['scan_result_i'] == 10 or subdict[ 'scan_result_i'] == 3: # File was not scanned or failed # noinspection PyBroadException try: engine = self.nodes[self.current_node]['engine_map'][ self._format_engine_name(majorkey)] except Exception: engine = None fail = True av_fails.add_subsection(AvErrorSection(majorkey, engine)) elif subdict['scan_result_i'] == 0: # No threat detected no_threat_detected.append(majorkey) fail = True if heur_id is not None: virus_name = virus_name.replace("a variant of ", "") engine = self.nodes[self.current_node]['engine_map'][ self._format_engine_name(majorkey)] av_hit_section = AvHitSection(majorkey, virus_name, engine, heur_id, self.sig_score_revision_map, self.kw_score_revision_map, self.safelist_match) av_hits.add_subsection(av_hit_section) hit = True av_scan_times.append(self._format_engine_name(majorkey)) av_scan_times.append(subdict['scan_time']) if hit: res.add_section(av_hits) # Only creat a result section for "No Threat Detected" if there was at least one hit if hit and fail: if no_threat_detected: ResultSection( "No Threat Detected by AV Engine(s)", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps( dict(no_threat_detected=no_threat_detected)), parent=av_fails) res.add_section(av_fails) file_size = response['file_info']['file_size'] queue_time = response['process_info']['queue_time'] processing_time = response['process_info']['processing_time'] self.log.info( f"File successfully scanned by node ({self.current_node}). File size: {file_size} B." f"Queue time: {queue_time} ms. Processing time: {processing_time} ms. " f"AV scan times: {str(av_scan_times)}") # Add the queue time to a list, which will be later used to calculate average queue time self.nodes[self.current_node]['queue_times'].append(queue_time) self.nodes[self.current_node]['file_count'] += 1 if process_results is not None and process_results.get( 'progress_percentage') == 100: hit = False fail = False processed = process_results.get('post_processing', process_results) if processed['actions_failed']: fail = True elif processed['actions_ran']: hit = True # add cdr json extracted if hit: cdr_json_section = ResultSection('CDR Successfully Executed', body_format=BODY_FORMAT.JSON, body=json.dumps(processed)) res.add_section(cdr_json_section) if fail: cdr_fails = ResultSection('CDR Failed or No Malicious Files Found') res.add_section(cdr_fails) return res
def _handle_subanalyses(self, request: ServiceRequest, sha256: str, analysis_id: str, file_verdict_map: Dict[str, str], parent_section: ResultSection) -> None: """ This method handles the subanalyses for a given analysis ID :param request: The service request object :param sha256: The hash of the given file :param analysis_id: The ID for the analysis which we will be retrieving :param file_verdict_map: A map of sha256s representing a file's contents, and the verdict for that file :param parent_result_section: The result section that the network result section will be added to, if applicable :return: None """ so = SandboxOntology() # This boolean is used to determine if we should try to download another file can_we_download_files = True # These sets will be used as we work through the process trees process_path_set = set() command_line_set = set() # Now let's get into the subanalyses for this sample sub_analyses = self.client.get_sub_analyses_by_id(analysis_id) for sub in sub_analyses: sub_analysis_id = sub["sub_analysis_id"] # Get the extraction info, which is basically the details of how the subanalysis object came to be extraction_info = sub.pop("extraction_info", None) # Processes is only present when the sample has undergone dynamic execution if extraction_info and "processes" not in extraction_info: extraction_info = None code_reuse = self.client.get_sub_analysis_code_reuse_by_id( analysis_id, sub_analysis_id) if code_reuse: families = code_reuse.pop("families", []) else: families = [] if not families and not extraction_info: # Otherwise, boring! continue if families and not any(family["reused_gene_count"] > 1 for family in families): # Most likely a false positive continue ### # If we have gotten to this point, then the sub analysis is worth reporting ### extraction_method = sub["source"].replace("_", " ") if extraction_method != "root": sub_kv_section = ResultKeyValueSection( f"Subanalysis report for {sub['sha256']}, extracted via {extraction_method}" ) else: sub_kv_section = ResultKeyValueSection( f"Subanalysis report for {sub['sha256']}") metadata = self.client.get_sub_analysis_metadata_by_id( analysis_id, sub_analysis_id) processed_subanalysis = self._process_details( metadata.copy(), UNINTERESTING_SUBANALYSIS_KEYS) sub_kv_section.update_items(processed_subanalysis) parent_section.add_subsection(sub_kv_section) if code_reuse: code_reuse_kv_section = ResultKeyValueSection( "Code reuse detected") code_reuse_kv_section.update_items(code_reuse) sub_kv_section.add_subsection(code_reuse_kv_section) sub_sha256 = sub["sha256"] if families: self._process_families(families, sub_sha256, file_verdict_map, sub_kv_section) if extraction_info: self._process_extraction_info(extraction_info["processes"], process_path_set, command_line_set, so) # Setting a heuristic here or downloading the file would be redundant if the hash matched the original file if sub_sha256 != sha256: self._set_heuristic_by_verdict( sub_kv_section, file_verdict_map.get(sub_sha256)) if can_we_download_files: file_was_downloaded = self.client.download_file_by_sha256( sub_sha256, self.working_directory) if file_was_downloaded: path = f"{self.working_directory}/{sub_sha256}.sample" request.add_extracted( path, f"{sub_sha256}.sample", f"Extracted via {extraction_method}", ) self.log.debug( f"Added {sub_sha256}.sample as an extracted file.") else: can_we_download_files = False process_tree_section = so.get_process_tree_result_section() for process_path in process_path_set: process_tree_section.add_tag("dynamic.process.file_name", process_path) for command_line in command_line_set: process_tree_section.add_tag("dynamic.process.command_line", command_line) if process_tree_section.body: parent_section.add_subsection(process_tree_section)
def dump_property(self, field, path, index, res, parent_res, is_orphan): if field['name'].value != '': name = field['name'].display[1:-1] p_type = field['type'].value if path[-1:] == '\\': abs_name = f"{path}{name}" else: abs_name = f"{path}\\{name}" prop_res = ResultSection(f"Property: {abs_name}", body_format=BODY_FORMAT.KEY_VALUE, body={}) # if type is not: 1- storage, 2- stream an not 5- root, that is weird. if p_type != 1 and p_type != 2 and p_type != 5: self.invalid_properties_count += 1 # for properties not storage (which should be seen like a folder) if p_type != 1: size = field['size'].value else: size = 0 address = 0 if size > 0: if field['size'].value < self.ole2parser[ 'header/threshold'].value and index != '0': # we first get the offset from the short block but then we need # to map it back to the file, which is from root[X]. offset = field['start'].value * self.ole2parser.ss_size keep_looping = True root_index = 0 while keep_looping: try: current_root = self.ole2parser[ f"root[{root_index}]"] if offset == 0 or current_root.size > offset: address = current_root.address + offset keep_looping = False else: offset -= current_root.size root_index += 1 except MissingField: keep_looping = False address = None if not is_orphan: self.invalid_streams.append( field['name'].display) else: address = HEADER_SIZE + field[ 'start'].value * self.ole2parser.sector_size else: address = 0 if address >= 0: prop_res.body['property_meta'] = \ f"offset: {hex(address // 8)} size: {hex(size)} / {field['type'].display} / " \ f"{field['decorator'].display} / id={index} left={field['left'].display} " \ f"right={field['right'].display} child={field['child'].display}" else: prop_res.body['property_meta'] = \ f"offset: could not map.. size: {hex(size)} / {field['type'].display} / " \ f"{field['decorator'].display} / id={index} left={field['left'].display} " \ f"right={field['right'].display} child={field['child'].display}" # for root or storage if p_type == 5 or p_type == 1: if field[ 'clsid'].display != "Null GUID: 00000000-0000-0000-0000-000000000000": clsid_desc = self.GUID_DESC.get(field['clsid'].display, "unknown clsid") prop_res.body[ "clsid"] = f"{field['clsid'].display} ({clsid_desc})" prop_res.add_tag('file.ole.clsid', field['clsid'].display) if field['creation'].display != "1601-01-01 00:00:00": prop_res.body["creation_date"] = field['creation'].display prop_res.add_tag('file.date.creation', field['creation'].display) if field['lastmod'].display != "1601-01-01 00:00:00": prop_res.body["last_modified_date"] = field[ 'lastmod'].display prop_res.add_tag('file.date.last_modified', field['lastmod'].display) # fixes up a bug: if name == '\\1CompObj': if p_type != 2: res_error = ResultSection( f"\\1CompObj type is '{p_type}' and it should be 2 (stream) " f"... really suspicious.") res_error.set_heuristic(41) prop_res.add_subsection(res_error) size = field['size'].value # Apparently, we can get to this point and have office_root_entry_parser set to None. # Not sure what we should do about that but trying to use that member variable seems # like a bad idea... if self.office_root_entry_parser is not None: temp_field = None for f in self.office_root_entry_parser.createFields(): if f.name.startswith('compobj'): temp_field = f # cache all the sub-fields.... for _ in temp_field: pass self.parse_field(temp_field, prop_res, self.PARSING_MODE_DISPLAY, parent_res) if size > 0 and index != '0': field_with_other_parser = self.additional_parsing_fields.get( address, None) if field_with_other_parser: # noinspection PyTypeChecker self.parse_field(field_with_other_parser, prop_res, self.PARSING_MODE_DISPLAY, parent_res) if len(prop_res.body) > 1: prop_res.body = json.dumps(prop_res.body) res.add_subsection(prop_res)
def execute(self, request): request.result = Result() request.set_service_context(self.get_tool_version()) temp_filename = request.file_path filename = os.path.basename(temp_filename) extract_dir = os.path.join(self.working_directory, f"{filename}_extracted") decompiled_dir = os.path.join(self.working_directory, f"{filename}_decompiled") file_res = request.result new_files = [] supplementary_files = [] imp_res_list = [] res_list = [] if request.file_type == "java/jar": self.decompile_jar(temp_filename, decompiled_dir) if self.jar_extract(temp_filename, extract_dir): # Analysis properties self.classloader_found = 0 self.security_found = 0 self.url_found = 0 self.runtime_found = 0 self.applet_found = 0 self.manifest_tags = [] self.signature_block_certs = [] def analyze_file(root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir): cur_file_path = os.path.join(root.decode('utf-8'), cf.decode('utf-8')) with open(cur_file_path, "rb") as cur_file: start_bytes = cur_file.read(24) ############################## # Executables in JAR ############################## cur_ext = os.path.splitext(cf)[1][1:].upper() if start_bytes[:2] == b"MZ": mz_res = dict( title_text=f"Embedded executable file found: {cf} " "There may be a malicious intent.", heur_id=1, tags=[('file.behavior', "Embedded PE")], score_condition=APPLET_MZ, ) imp_res_list.append(mz_res) ############################## # Launchable in JAR ############################## elif cur_ext in G_LAUNCHABLE_EXTENSIONS: l_res = dict( title_text=f"Launch-able file type found: {cf}" "There may be a malicious intent.", heur_id=2, tags=[('file.behavior', "Launch-able file in JAR")], score_condition=APPLET_MZ, ) imp_res_list.append(l_res) if cur_file_path.upper().endswith('.CLASS'): self.analyse_class_file(file_res, cf, cur_file, cur_file_path, start_bytes, imp_res_list, supplementary_files, decompiled_dir, extract_dir) for root, _, files in os.walk(extract_dir.encode('utf-8')): logging.info(f"Extracted: {root} - {files}") # if the META-INF folder is encountered if root.upper().endswith(b'META-INF'): # only top level meta self.analyse_meta_information(file_res, root, supplementary_files, extract_dir) continue with ThreadPoolExecutor() as executor: for cf in files: executor.submit(analyze_file, root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir) res = ResultSection("Analysis of the JAR file") res_meta = ResultSection("[Meta Information]", parent=res) if len(self.manifest_tags) > 0: res_manifest = ResultSection("Manifest File Information Extract", parent=res_meta) for tag, val in self.manifest_tags: res_manifest.add_tag(tag, val) for res_cert in self.signature_block_certs: res_meta.add_subsection(res_cert) if self.runtime_found > 0 \ or self.applet_found > 0 \ or self.classloader_found > 0 \ or self.security_found > 0 \ or self.url_found > 0: res.add_line("All suspicious class files were saved as supplementary files.") res_class = ResultSection("[Suspicious classes]", parent=res) if self.runtime_found > 0: ResultSection("Runtime Found", body=f"java/lang/Runtime: {self.runtime_found}", heuristic=Heuristic(10), parent=res_class) if self.applet_found > 0: ResultSection("Applet Found", body=f"java/applet/Applet: {self.applet_found}", heuristic=Heuristic(6), parent=res_class) if self.classloader_found > 0: ResultSection("Classloader Found", body=f"java/lang/ClassLoader: {self.classloader_found}", heuristic=Heuristic(7), parent=res_class) if self.security_found > 0: ResultSection("Security Found", body=f"java/security/*: {self.security_found}", heuristic=Heuristic(8), parent=res_class) if self.url_found > 0: ResultSection("URL Found", body=f"java/net/URL: {self.url_found}", heuristic=Heuristic(9), parent=res_class) res_list.append(res) # Add results if any self.recurse_add_res(file_res, imp_res_list, new_files) for res in res_list: file_res.add_section(res) # Submit embedded files if len(new_files) > 0: new_files = sorted(list(set(new_files))) txt = f"Extracted from 'JAR' file {filename}" for embed in new_files: request.add_extracted(embed, embed.replace(extract_dir + "/", "").replace(decompiled_dir + "/", ""), txt, safelist_interface=self.api_interface) if len(supplementary_files) > 0: supplementary_files = sorted(list(set(supplementary_files))) for path, name, desc in supplementary_files: request.add_supplementary(path, name, desc)
def section_builder(self, parser, field_dict, result, parsertype="MWCP"): json_body = {} malware_name = '' malware_types = [] mitre_group = '' mitre_att = '' category = 'malware' # get malware names from parser objects if parsertype == "RATDecoder": malware_name = parser if parsertype == "MWCP": for name, obj in self.file_parsers.items(): if parser in obj.parser_list: malware_name = obj.malware malware_types = obj.malware_types mitre_att = obj.mitre_att mitre_group = obj.mitre_group category = obj.category for item in [ 'classification', 'mitre_group', 'mitre_att', 'malware', 'malware_types', 'category' ]: val = getattr(obj, item, None) if val: json_body[item] = val break parser_section = ResultSection(f"{parsertype} : {parser}") parser_section = classification_checker(parser_section, parser, self.file_parsers) if len(field_dict) > 0: # if any decoder output exists raise heuristic parser_section.set_body(json.dumps(json_body), body_format=BODY_FORMAT.KEY_VALUE) parser_section.set_heuristic(HEURISTICS_MAP.get(category, 1), attack_id=mitre_att) parser_section.add_tag("source", parsertype) if malware_name: parser_section.add_tag('attribution.implant', malware_name.upper()) if mitre_group: parser_section.add_tag('attribution.actor', mitre_group.upper()) for malware_type in malware_types: parser_section.add_tag('attribution.family', malware_type.upper()) # Create subsections and attach them to the main parser_section subsection_builder(parser_section, field_dict) other_key = "other" if other_key in field_dict: other_content = field_dict[other_key] other_section = ResultSection(f"Other metadata found", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(other_content)) parser_section.add_subsection(other_section) for field in field_dict: if field != other_key and field not in FIELD_TAG_MAP: self.log.debug(f"{field} does not exist in FIELD_TAG_MAP") result.add_section(parser_section)
def test_process_ttps(intezer_static_class_instance, dummy_api_interface_class, mocker): from intezer_static import ALIntezerApi from intezer_sdk.api import IntezerApi from intezer_sdk.errors import UnsupportedOnPremiseVersion from assemblyline_v4_service.common.result import ResultSection, ResultTableSection, TableRow from requests import HTTPError mocker.patch.object(intezer_static_class_instance, "get_api_interface", return_value=dummy_api_interface_class) intezer_static_class_instance.start() parent_res_sec = ResultSection("blah") mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[]) intezer_static_class_instance._process_ttps("blah", parent_res_sec) assert parent_res_sec.subsections == [] mocker.patch.object(IntezerApi, "get_dynamic_ttps", side_effect=HTTPError("FORBIDDEN")) intezer_static_class_instance._process_ttps("blah", parent_res_sec) assert parent_res_sec.subsections == [] mocker.patch.object(IntezerApi, "get_dynamic_ttps", side_effect=UnsupportedOnPremiseVersion()) intezer_static_class_instance._process_ttps("blah", parent_res_sec) assert parent_res_sec.subsections == [] mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[{ "name": "blah", "description": "blah", "data": [], "severity": 1 }]) intezer_static_class_instance._process_ttps("blah", parent_res_sec) correct_res_sec = ResultSection("Signature: blah", "blah") correct_res_sec.set_heuristic(4) correct_res_sec.heuristic.add_signature_id("blah", 10) assert check_section_equality( parent_res_sec.subsections[0].subsections[0], correct_res_sec) parent_res_sec = ResultSection("blah") mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[{ "name": "InjectionInterProcess", "description": "blah", "data": [], "severity": 1 }]) intezer_static_class_instance._process_ttps("blah", parent_res_sec) correct_res_sec = ResultSection("Signature: InjectionInterProcess", "blah") correct_res_sec.set_heuristic(7) correct_res_sec.heuristic.add_signature_id("InjectionInterProcess", 10) correct_res_sec.heuristic.add_attack_id("T1055") assert check_section_equality( parent_res_sec.subsections[0].subsections[0], correct_res_sec) parent_res_sec = ResultSection("blah") mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[{ "name": "enumerates_running_processes", "description": "blah", "data": [{ "wow": "print me!" }], "severity": 1 }]) intezer_static_class_instance._process_ttps("blah", parent_res_sec) correct_res_sec = ResultSection( "Signature: enumerates_running_processes", "blah") correct_res_sec.set_heuristic(8) correct_res_sec.heuristic.add_signature_id( "enumerates_running_processes", 10) correct_res_sec.heuristic.add_attack_id("T1057") assert check_section_equality( parent_res_sec.subsections[0].subsections[0], correct_res_sec) parent_res_sec = ResultSection("blah") mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[{ "name": "blah", "description": "blah", "data": [ { "IP": "blah 2.2.2.2 blah" }, ], "severity": 1 }]) intezer_static_class_instance._process_ttps("blah", parent_res_sec) correct_res_sec = ResultSection("Signature: blah", "blah") correct_res_sec.add_line("\tIP: blah 2.2.2.2 blah") correct_res_sec.set_heuristic(4) correct_res_sec.heuristic.add_signature_id("blah", 10) correct_ioc_res_sec = ResultTableSection( "IOCs found in signature marks") correct_ioc_res_sec.add_row(TableRow(ioc_type="ip", ioc="2.2.2.2")) correct_ioc_res_sec.add_tag("network.dynamic.ip", "2.2.2.2") correct_res_sec.add_subsection(correct_ioc_res_sec) assert check_section_equality( parent_res_sec.subsections[0].subsections[0], correct_res_sec)
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop two embedded file which one generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in [ 'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06' ]: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines( [get_random_phrase() for _ in range(random.randint(1, 5))]) # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(random.randint(1, 4), signature=get_random_phrase( 1, 4).lower().replace(" ", "_")) # Make sure you add your section to the result result.add_section(text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } section_color_map = ResultSection( "Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(color_map_data)) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "Random url!", "url": f"https://{random_host}/" })) # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed host1 = get_random_host() host2 = get_random_host() ip1 = get_random_ip() urls = [{ "url": f"https://{host1}/" }, { "url": f"https://{host2}/" }, { "url": f"https://{ip1}/" }] url_sub_section = ResultSection( 'Example of a url section with multiple links', body_format=BODY_FORMAT.URL, body=json.dumps(urls)) url_sub_section.set_heuristic(random.randint(1, 4)) url_sub_section.add_tag("network.static.ip", ip1) url_sub_section.add_tag("network.static.domain", host1) url_sub_section.add_tag("network.dynamic.domain", host2) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump( b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!" ) memdump_section = ResultSection( 'Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP, body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed) kv_body = { "a_str": "Some string", "a_bool": False, "an_int": 102, } kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(kv_body)) result.add_section(kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a json dump of a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [{ "d1_key": "val", "d1_key2": "val2" }, { "d2_key": "val", "d2_key2": "val2" }], "bool": True } } json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON, body=json.dumps(json_body)) result.add_section(json_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(urls)) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result