def execute(self, request): temp_filename = request.file_path # Filter out large documents if os.path.getsize(temp_filename) > self.max_pdf_size: file_res = Result() res = (ResultSection( f"PDF Analysis of the file was skipped because the " f"file is too big (limit is {(self.max_pdf_size / 1000 / 1000)} MB)." )) file_res.add_section(res) request.result = file_res return filename = os.path.basename(temp_filename) # noinspection PyUnusedLocal file_content = '' with open(temp_filename, 'rb') as f: file_content = f.read() if '<xdp:xdp'.encode(encoding='UTF-8') in file_content: self.find_xdp_embedded(filename, file_content, request) self.peepdf_analysis(temp_filename, file_content, request)
def execute(self, request): result = Result() file_path = request.file_path file_type = request.file_type shutil.copyfile(file_path, self.working_directory + "/analyzed") p1 = subprocess.Popen( "java -jar /var/lib/assemblyline/StegExpose/StegExpose.jar " + self.working_directory + " standard default " + self.working_directory + "/report.csv", shell=True) p1.wait() lsb_steg_results = self.read_csv(self.working_directory + "/report.csv") lsb_steg_results = self.beautify_dict(lsb_steg_results) kv_section = ResultSection("Result of the LSB steganalysis", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(lsb_steg_results)) result.add_section(kv_section) request.result = result
def execute(self, request): """Main Module. See README for details.""" result = Result() self.sha = request.sha256 local = request.file_path text_section = None kv_section = None extracted, metadata = self.dexray(request, local) num_extracted = len(request.extracted) if num_extracted != 0: text_section = ResultSection("DeXRAY found files:") for extracted in request.extracted: file_name = extracted.get('name') text_section.add_line( f"Resubmitted un-quarantined file as : {file_name}") if metadata: # Can contain live URLs to the original content source kv_section = ResultSection("DeXRAY Quarantine Metadata", body_format=BODY_FORMAT.JSON, body=json.dumps(metadata)) result.add_section(kv_section) for section in (text_section, kv_section): if section: result.add_section(section)
def parse_results(response: Dict[str, Any]): res = Result() response = response['data'] url_section = ResultSection('VirusTotal report permalink', body_format=BODY_FORMAT.URL, body=json.dumps( {"url": response['links']['self']})) res.add_section(url_section) response = response['attributes'] scans = response['last_analysis_results'] av_hits = ResultSection('Anti-Virus Detections') av_hits.add_line( f'Found {response["last_analysis_stats"]["malicious"]} AV hit(s) from ' f'{len(response["last_analysis_results"].keys())}') for majorkey, subdict in sorted(scans.items()): if subdict['category'] == "malicious": virus_name = subdict['result'] av_hit_section = AvHitSection(majorkey, virus_name) av_hit_section.set_heuristic( 1, signature=f'{majorkey}.{virus_name}') av_hit_section.add_tag('av.virus_name', virus_name) av_hits.add_subsection(av_hit_section) res.add_section(av_hits) return res
def execute(self, request): result = Result() url = request.task.metadata.get('submitted_url') api_key = request.get_param("api_key") public = request.get_param("public") u = UrlScan(apikey=api_key, url=url, public=public) u.submit() # We need to wait for the API to process our request response = self.wait_processing(u) # We get the response parts that we want and merge them all together report = { **response.json()["verdicts"]["overall"], **response.json()["lists"], **response.json()["page"] } # We convert the "certicates" section from a list of dictionnaries to a dictionnary of lists certificates = report.pop("certificates") certificates = { k: [dic[k] for dic in certificates] for k in certificates[0] } # We add the converted section to the report report = {**report, **certificates} # We create the KEY_VALUE section to add the report to the result page kv_section = ResultSection("Urlscan.io report", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(report)) for domain in report["domains"]: kv_section.add_tag("network.static.domain", domain.strip()) result.add_section(kv_section) # We get the preview of the website screenshot = u.getScreenshot() with open(self.working_directory + "/preview.png", "wb") as ofile: ofile.write(screenshot) # Adding the preview on the result page url_section = ResultSection( 'Urlscan.io website screenshot', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "The preview is also available here !", "url": response.json()["task"]["screenshotURL"] })) result.add_section(url_section) request.add_extracted(self.working_directory + "/preview.png", "preview.png", "Here\'s the preview of the site") request.result = result
def test_reduce(): from assemblyline_v4_service.common.section_reducer import reduce from assemblyline_v4_service.common.result import Result, ResultSection res = Result() result_section = ResultSection("blah") res.add_section(result_section) reduce(res) # Code coverage only assert True
def resubmit_dex2jar_output(self, apk_file: str, target: str, result: Result, request): dex = os.path.join(self.working_directory, "classes.dex") self.get_dex(apk_file, dex) if os.path.exists(dex): d2j = Popen([self.dex2jar, "--output", target, dex], stdout=PIPE, stderr=PIPE) d2j.communicate() if os.path.exists(target): res_sec = ResultSection("Classes.dex file was recompiled as a JAR and re-submitted for analysis") res_sec.add_line(f"JAR file resubmitted as: {os.path.basename(target)}") request.add_extracted(target, os.path.basename(target), "Dex2Jar output JAR file") result.add_section(res_sec)
def execute(self, request: ServiceRequest) -> None: sha256 = request.sha256 result = Result() # First, let's get the analysis metadata, if it exists on the system main_api_result = self._get_analysis_metadata( request.get_param('analysis_id'), sha256) if not main_api_result: self.log.debug(f"SHA256 {sha256} is not on the system.") request.result = result return if main_api_result.get( "verdict") in Verdicts.NOT_SUPPORTED_VERDICTS.value: self.log.debug(f"Unsupported file type: {request.file_type}") request.result = result return elif main_api_result.get("verdict") == AnalysisStatusCode.FAILED.value: self.log.warning("The Intezer server is not feeling well :(") request.result = result return analysis_id = main_api_result["analysis_id"] # Setup the main result section main_kv_section = ResultKeyValueSection( "IntezerStatic analysis report") processed_main_api_result = self._process_details( main_api_result.copy(), UNINTERESTING_ANALYSIS_KEYS) main_kv_section.update_items(processed_main_api_result) if "family_name" in main_api_result: main_kv_section.add_tag("attribution.family", main_api_result["family_name"]) # This file-verdict map will be used later on to assign heuristics to sub-analyses file_verdict_map = {} self._process_iocs(analysis_id, file_verdict_map, main_kv_section) if not self.config["is_on_premise"]: self._process_ttps(analysis_id, main_kv_section) self._handle_subanalyses(request, sha256, analysis_id, file_verdict_map, main_kv_section) # Setting heuristic here to avoid FPs if main_kv_section.subsections: self._set_heuristic_by_verdict(main_kv_section, main_api_result["verdict"]) if main_kv_section.subsections or main_kv_section.heuristic: result.add_section(main_kv_section) request.result = result
def test_parse_results(response, correct_res_secs, metadefender_class_instance): from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic metadefender_class_instance.blocklist = ["a"] metadefender_class_instance.sig_score_revision_map = {} metadefender_class_instance.kw_score_revision_map = {} metadefender_class_instance.current_node = "http://blah" metadefender_class_instance.nodes[ metadefender_class_instance.current_node] = { "engine_map": { "z": { "version": "blah", "def_time": "blah" }, "y": { "version": "blah", "def_time": "blah" } }, "queue_times": [], "file_count": 0 } correct_result = Result() for correct_res_sec in correct_res_secs: section = ResultSection( correct_res_sec["title_text"], body_format=BODY_FORMAT.TEXT if not correct_res_sec.get("body_format") else BODY_FORMAT.JSON, body=correct_res_sec.get("body")) for subsec in correct_res_sec.get("subsections", []): subsection = ResultSection( subsec["title_text"], body=subsec["body"], body_format=BODY_FORMAT.KEY_VALUE, tags=subsec.get("tags"), ) if subsec.get("heuristic"): subsection.set_heuristic(subsec["heuristic"]["heur_id"]) print(subsec["heuristic"]["signatures"]) for key in subsec["heuristic"]["signatures"].keys(): subsection.heuristic.add_signature_id(key) section.add_subsection(subsection) correct_result.add_section(section) actual_result = metadefender_class_instance.parse_results(response) for index, section in enumerate(actual_result.sections): assert check_section_equality(section, correct_result.sections[index])
def execute(self, request: ServiceRequest) -> Optional[Dict[str, Any]]: result = Result() request.result = result # Get AV labels from previous services av_labels = request.task.tags.get('av.virus_name') if not av_labels: return # Extract AVclass tags av_tags = self._get_avclass_tags(request.md5, request.sha1, request.sha256, av_labels) if av_tags is None: return # Build results section = self._get_result_section(av_tags.family, av_tags.is_pup) for tag_section in self._get_category_sections(av_tags.tags): section.add_subsection(tag_section) result.add_section(section)
def execute(self, request): result = Result() file = request.file_path with open(file, "rb") as f: file_content = f.read() content_list = autoit_ripper.extract(data=file_content) if content_list: content = content_list[0][1].decode("utf-8") text_section = ResultSection('[DUMP RESULT]') text_section.add_line(content) text_section.set_heuristic(1) result.add_section(text_section) with open(self.working_directory + "script.au3", "w") as f: f.write(content) request.add_extracted(self.working_directory + 'script.au3', 'script.au3', 'This is the unpacked script') request.result = result
def execute(self, request): qr = xqrcode.decode_from_file(request.file_path) if len(qr) > 0: result_url = qr[0]['data'] result = Result() text_section = ResultSection('QR Code') text_section.add_line(result_url) result.add_section(text_section) url_section = ResultSection('url extracted', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "QR Code Url", "url": f"{result_url}" })) url_section.add_tag("network.static.domain", result_url) result.add_section(url_section) request.result = result else: request.result = Result()
def execute(self, request): result = Result() file_path = request.file_path p1 = subprocess.Popen("clamscan -a -z --detect-pua --alert-macros " + file_path, shell=True, stdout=subprocess.PIPE) p1.wait() stdout = p1.communicate()[0].decode("utf-8") report = stdout.split("\n") report = list(filter(None, report)) text_section = ResultSection("Successfully scanned the file") if "FOUND" in report[0]: text_section.set_heuristic(1) for l in report: text_section.add_line(l) result.add_section(text_section) request.result = result
def execute(self, request: ServiceRequest) -> None: result = Result() self.hits = {} # clear the hits dict path = request.file_path file_name = request.file_name self.log.info(f" Executing {file_name}") self.log.info(f"Number of rules {len(self.sigma_parser.rules)}") self.sigma_parser.register_callback(self.sigma_hit) self.sigma_parser.check_logfile(path) if len(self.hits) > 0: hit_section = ResultSection('Events detected as suspicious') # group alerts together for id, events in self.hits.items(): title = self.sigma_parser.rules[id].title section = SigmaHitSection(title, events) tags = self.sigma_parser.rules[id].tags if tags: for tag in tags: name = tag[7:] if name.startswith(('t', 'g', 's')): attack_id = name.upper() source = events[0]['signature_source'] if attack_id: section.set_heuristic(get_heur_id(events[0]['score']), attack_id=attack_id, signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") else: section.set_heuristic(get_heur_id(events[0]['score']), signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") for event in events: # add the event data as a subsection section.add_subsection(EventDataSection(event)) hit_section.add_subsection(section) result.add_section(hit_section) request.result = result
def parse_results(self, response: Dict[str, Any]) -> Result: """ This method parses the response JSON containing the scan results so that it will be displayed nicely in Assemblyline :param response: The raw results from the MetaDefender scan :return: The Result object to be used when displaying in Assemblyline """ res = Result() scan_results = response.get('scan_results', response) virus_name = "" process_results = response.get('process_info', response) hit = False fail = False processed = {} if scan_results is not None and scan_results.get( 'progress_percentage') == 100: no_threat_detected = [] av_hits = ResultSection('AV Detections as Infected or Suspicious') av_fails = ResultSection('Failed to Scan or No Threats Detected') scans = scan_results.get('scan_details', scan_results) av_scan_times = [] modified_scans = { key: value for key, value in scans.items() if key not in ["progress_percentage"] } for majorkey, subdict in sorted(modified_scans.items()): if majorkey in self.blocklist: continue heur_id = None if subdict['scan_result_i'] == 1: # File is infected virus_name = subdict['threat_found'] if virus_name: heur_id = 1 elif subdict['scan_result_i'] == 2: # File is suspicious virus_name = subdict['threat_found'] if virus_name: heur_id = 2 elif subdict['scan_result_i'] == 10 or subdict[ 'scan_result_i'] == 3: # File was not scanned or failed # noinspection PyBroadException try: engine = self.nodes[self.current_node]['engine_map'][ self._format_engine_name(majorkey)] except Exception: engine = None fail = True av_fails.add_subsection(AvErrorSection(majorkey, engine)) elif subdict['scan_result_i'] == 0: # No threat detected no_threat_detected.append(majorkey) fail = True if heur_id is not None: virus_name = virus_name.replace("a variant of ", "") engine = self.nodes[self.current_node]['engine_map'][ self._format_engine_name(majorkey)] av_hit_section = AvHitSection(majorkey, virus_name, engine, heur_id, self.sig_score_revision_map, self.kw_score_revision_map, self.safelist_match) av_hits.add_subsection(av_hit_section) hit = True av_scan_times.append(self._format_engine_name(majorkey)) av_scan_times.append(subdict['scan_time']) if hit: res.add_section(av_hits) # Only creat a result section for "No Threat Detected" if there was at least one hit if hit and fail: if no_threat_detected: ResultSection( "No Threat Detected by AV Engine(s)", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps( dict(no_threat_detected=no_threat_detected)), parent=av_fails) res.add_section(av_fails) file_size = response['file_info']['file_size'] queue_time = response['process_info']['queue_time'] processing_time = response['process_info']['processing_time'] self.log.info( f"File successfully scanned by node ({self.current_node}). File size: {file_size} B." f"Queue time: {queue_time} ms. Processing time: {processing_time} ms. " f"AV scan times: {str(av_scan_times)}") # Add the queue time to a list, which will be later used to calculate average queue time self.nodes[self.current_node]['queue_times'].append(queue_time) self.nodes[self.current_node]['file_count'] += 1 if process_results is not None and process_results.get( 'progress_percentage') == 100: hit = False fail = False processed = process_results.get('post_processing', process_results) if processed['actions_failed']: fail = True elif processed['actions_ran']: hit = True # add cdr json extracted if hit: cdr_json_section = ResultSection('CDR Successfully Executed', body_format=BODY_FORMAT.JSON, body=json.dumps(processed)) res.add_section(cdr_json_section) if fail: cdr_fails = ResultSection('CDR Failed or No Malicious Files Found') res.add_section(cdr_fails) return res
def run_badging_analysis(self, apk_file: str, result: Result): badging_args = ['d', 'badging', apk_file] badging, errors = self.run_appt(badging_args) if not badging: return res_badging = ResultSection("Android application details") libs = [] permissions = [] components = [] features = [] pkg_version = None for line in badging.splitlines(): if line.startswith("package:"): pkg_name = line.split("name='")[1].split("'")[0] pkg_version = line.split("versionCode='")[1].split("'")[0] res_badging.add_line(f"Package: {pkg_name} v.{pkg_version}") res_badging.add_tag('file.apk.pkg_name', pkg_name) res_badging.add_tag('file.apk.app.version', pkg_version) if line.startswith("sdkVersion:"): min_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Min SDK: {min_sdk}") res_badging.add_tag('file.apk.sdk.min', min_sdk) if line.startswith("targetSdkVersion:"): target_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Target SDK: {target_sdk}") res_badging.add_tag('file.apk.sdk.target', target_sdk) if line.startswith("application-label:"): label = line.split(":'")[1][:-1] res_badging.add_line(f"Default Label: {label}") res_badging.add_tag('file.apk.app.label', label) if line.startswith("launchable-activity:"): launch = line.split("name='")[1].split("'")[0] res_badging.add_line(f"Launchable activity: {launch}") res_badging.add_tag('file.apk.activity', launch) if line.startswith("uses-library-not-required:"): lib = line.split(":'")[1][:-1] if lib not in libs: libs.append(lib) if line.startswith("uses-permission:") or line.startswith("uses-implied-permission:"): perm = line.split("name='")[1].split("'")[0] if perm not in permissions: permissions.append(perm) if line.startswith("provides-component:"): component = line.split(":'")[1][:-1] if component not in components: components.append(component) if "uses-feature:" in line or "uses-implied-feature:" in line: feature = line.split("name='")[1].split("'")[0] if feature not in features: features.append(feature) if pkg_version is not None: pkg_version = int(pkg_version) if pkg_version < 15: ResultSection("Package version is suspiciously low", parent=res_badging, heuristic=Heuristic(17)) elif pkg_version > 999999999: ResultSection("Package version is suspiciously high", parent=res_badging, heuristic=Heuristic(17)) if libs: res_lib = ResultSection("Libraries used", parent=res_badging) for lib in libs: res_lib.add_line(lib) res_lib.add_tag('file.apk.used_library', lib) if permissions: res_permissions = ResultSection("Permissions used", parent=res_badging) dangerous_permissions = [] unknown_permissions = [] for perm in permissions: if perm in ALL_ANDROID_PERMISSIONS: if 'dangerous' in ALL_ANDROID_PERMISSIONS[perm]: dangerous_permissions.append(perm) else: res_permissions.add_line(perm) res_permissions.add_tag('file.apk.permission', perm) else: unknown_permissions.append(perm) if len(set(permissions)) < len(permissions): ResultSection("Some permissions are defined more then once", parent=res_badging, heuristic=Heuristic(18)) if dangerous_permissions: res_dangerous_perm = ResultSection("Dangerous permissions used", parent=res_badging, heuristic=Heuristic(4)) for perm in dangerous_permissions: res_dangerous_perm.add_line(perm) res_dangerous_perm.add_tag('file.apk.permission', perm) if unknown_permissions: res_unknown_perm = ResultSection("Unknown permissions used", parent=res_badging, heuristic=Heuristic(5)) for perm in unknown_permissions: res_unknown_perm.add_line(perm) res_unknown_perm.add_tag('file.apk.permission', perm) if features: res_features = ResultSection("Features used", parent=res_badging) for feature in features: res_features.add_line(feature) res_features.add_tag('file.apk.feature', feature) if components: res_components = ResultSection("Components provided", parent=res_badging) for component in components: res_components.add_line(component) res_components.add_tag('file.apk.provides_component', component) result.add_section(res_badging)
def peepdf_analysis(self, temp_filename, file_content, request): file_res = Result() try: res_list = [] # js_stream = [] f_list = [] js_dump = [] pdf_parser = PDFParser() ret, pdf_file = pdf_parser.parse(temp_filename, True, False, file_content) if ret == 0: stats_dict = pdf_file.getStats() if ", ".join(stats_dict['Errors']) == "Bad PDF header, %%EOF not found, PDF sections not found, No " \ "indirect objects found in the body": # Not a PDF return json_body = dict( version=stats_dict['Version'], binary=stats_dict['Binary'], linearized=stats_dict['Linearized'], encrypted=stats_dict['Encrypted'], ) if stats_dict['Encryption Algorithms']: temp = [] for algorithmInfo in stats_dict['Encryption Algorithms']: temp.append(f"{algorithmInfo[0]} {str(algorithmInfo[1])} bits") json_body["encryption_algorithms"] = temp json_body.update(dict( updates=stats_dict['Updates'], objects=stats_dict['Objects'], streams=stats_dict['Streams'], comments=stats_dict['Comments'], errors={True: ", ".join(stats_dict['Errors']), False: "None"}[len(stats_dict['Errors']) != 0] )) res = ResultSection("PDF File Information", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(json_body)) for version in range(len(stats_dict['Versions'])): stats_version = stats_dict['Versions'][version] v_json_body = dict( catalog=stats_version['Catalog'] or "no", info=stats_version['Info'] or "no", objects=self.list_first_x(stats_version['Objects'][1]), ) if stats_version['Compressed Objects'] is not None: v_json_body['compressed_objects'] = self.list_first_x(stats_version['Compressed Objects'][1]) if stats_version['Errors'] is not None: v_json_body['errors'] = self.list_first_x(stats_version['Errors'][1]) v_json_body['streams'] = self.list_first_x(stats_version['Streams'][1]) if stats_version['Xref Streams'] is not None: v_json_body['xref_streams'] = self.list_first_x(stats_version['Xref Streams'][1]) if stats_version['Object Streams'] is not None: v_json_body['object_streams'] = self.list_first_x(stats_version['Object Streams'][1]) if int(stats_version['Streams'][0]) > 0: v_json_body['encoded'] = self.list_first_x(stats_version['Encoded'][1]) if stats_version['Decoding Errors'] is not None: v_json_body['decoding_errors'] = self.list_first_x(stats_version['Decoding Errors'][1]) if stats_version['Objects with JS code'] is not None: v_json_body['objects_with_js_code'] = \ self.list_first_x(stats_version['Objects with JS code'][1]) # js_stream.extend(stats_version['Objects with JS code'][1]) res_version = ResultSection(f"Version {str(version)}", parent=res, body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(v_json_body)) actions = stats_version['Actions'] events = stats_version['Events'] vulns = stats_version['Vulns'] elements = stats_version['Elements'] is_suspicious = False if events is not None or actions is not None or vulns is not None or elements is not None: res_suspicious = ResultSection('Suspicious elements', parent=res_version) if events is not None: for event in events: res_suspicious.add_line(f"{event}: {self.list_first_x(events[event])}") is_suspicious = True if actions is not None: for action in actions: res_suspicious.add_line(f"{action}: {self.list_first_x(actions[action])}") is_suspicious = True if vulns is not None: for vuln in vulns: if vuln in vulnsDict: temp = [vuln, ' ('] for vulnCVE in vulnsDict[vuln]: if len(temp) != 2: temp.append(',') vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE temp.append(vulnCVE) cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE) if cve_found: res_suspicious.add_tag('attribution.exploit', vulnCVE[cve_found.start():cve_found.end()]) res_suspicious.add_tag('file.behavior', vulnCVE[cve_found.start():cve_found.end()]) temp.append('): ') temp.append(str(vulns[vuln])) res_suspicious.add_line(temp) else: res_suspicious.add_line(f"{vuln}: {str(vulns[vuln])}") is_suspicious = True if elements is not None: for element in elements: if element in vulnsDict: temp = [element, ' ('] for vulnCVE in vulnsDict[element]: if len(temp) != 2: temp.append(',') vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE temp.append(vulnCVE) cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE) if cve_found: res_suspicious.add_tag('attribution.exploit', vulnCVE[cve_found.start():cve_found.end()]) res_suspicious.add_tag('file.behavior', vulnCVE[cve_found.start():cve_found.end()]) temp.append('): ') temp.append(str(elements[element])) res_suspicious.add_line(temp) is_suspicious = True else: res_suspicious.add_line(f"\t\t{element}: {str(elements[element])}") is_suspicious = True res_suspicious.set_heuristic(8) if is_suspicious else None urls = stats_version['URLs'] if urls is not None: res.add_line("") res_url = ResultSection('Found URLs', parent=res) for url in urls: res_url.add_line(f"\t\t{url}") res_url.set_heuristic(9) for obj in stats_version['Objects'][1]: cur_obj = pdf_file.getObject(obj, version) if cur_obj.containsJScode: cur_res = ResultSection(f"Object [{obj} {version}] contains {len(cur_obj.JSCode)} " f"block of JavaScript") score_modifier = 0 js_idx = 0 for js in cur_obj.JSCode: sub_res = ResultSection('Block of JavaScript', parent=cur_res) js_idx += 1 js_score = 0 js_code, unescaped_bytes, _, _, _ = analyseJS(js) js_dump += [x for x in js_code] # Malicious characteristics big_buffs = self.get_big_buffs("".join(js_code)) if len(big_buffs) == 1: js_score += 500 * len(big_buffs) if len(big_buffs) > 0: js_score += 500 * len(big_buffs) has_eval, has_unescape = self.check_dangerous_func("".join(js_code)) if has_unescape: js_score += 100 if has_eval: js_score += 100 js_cmt = "" if has_eval or has_unescape or len(big_buffs) > 0: score_modifier += js_score js_cmt = "Suspiciously malicious " cur_res.add_tag('file.behavior', "Suspicious JavaScript in PDF") sub_res.set_heuristic(7) js_res = ResultSection(f"{js_cmt}JavaScript Code (block: {js_idx})", parent=sub_res) if js_score > 0: temp_js_outname = f"object{obj}-{version}_{js_idx}.js" temp_js_path = os.path.join(self.working_directory, temp_js_outname) temp_js_bin = "".join(js_code).encode("utf-8") f = open(temp_js_path, "wb") f.write(temp_js_bin) f.close() f_list.append(temp_js_path) js_res.add_line(f"The JavaScript block was saved as {temp_js_outname}") if has_eval or has_unescape: analysis_res = ResultSection("[Suspicious Functions]", parent=js_res) if has_eval: analysis_res.add_line("eval: This JavaScript block uses eval() function " "which is often used to launch deobfuscated " "JavaScript code.") analysis_res.set_heuristic(3) if has_unescape: analysis_res.add_line("unescape: This JavaScript block uses unescape() " "function. It may be legitimate but it is definitely " "suspicious since malware often use this to " "deobfuscate code blocks.") analysis_res.set_heuristic(3) buff_idx = 0 for buff in big_buffs: buff_idx += 1 error, new_buff = unescape(buff) if error == 0: buff = new_buff if buff not in unescaped_bytes: temp_path_name = None if ";base64," in buff[:100] and "data:" in buff[:100]: temp_path_name = f"obj{obj}_unb64_{buff_idx}.buff" try: buff = b64decode(buff.split(";base64,")[1].strip()) temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(buff) f.close() f_list.append(temp_path) except Exception: self.log.error("Found 'data:;base64, ' buffer " "but failed to base64 decode.") temp_path_name = None if temp_path_name is not None: buff_cond = f" and was resubmitted as {temp_path_name}" else: buff_cond = "" buff_res = ResultSection( f"A {len(buff)} bytes buffer was found in the JavaScript " f"block{buff_cond}. Here are the first 256 bytes.", parent=js_res, body=hexdump(bytes(buff[:256], "utf-8")), body_format=BODY_FORMAT.MEMORY_DUMP) buff_res.set_heuristic(2) processed_sc = [] sc_idx = 0 for sc in unescaped_bytes: if sc not in processed_sc: sc_idx += 1 processed_sc.append(sc) try: sc = sc.decode("hex") except Exception: pass shell_score = 500 temp_path_name = f"obj{obj}_unescaped_{sc_idx}.buff" shell_res = ResultSection(f"Unknown unescaped {len(sc)} bytes JavaScript " f"buffer (id: {sc_idx}) was resubmitted as " f"{temp_path_name}. Here are the first 256 bytes.", parent=js_res) shell_res.set_body(hexdump(sc[:256]), body_format=BODY_FORMAT.MEMORY_DUMP) temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(sc) f.close() f_list.append(temp_path) cur_res.add_tag('file.behavior', "Unescaped JavaScript Buffer") shell_res.set_heuristic(6) score_modifier += shell_score if score_modifier > 0: res_list.append(cur_res) elif cur_obj.type == "stream": if cur_obj.isEncodedStream and cur_obj.filter is not None: data = cur_obj.decodedStream encoding = cur_obj.filter.value.replace("[", "").replace("]", "").replace("/", "").strip() val = cur_obj.rawValue otype = cur_obj.elements.get("/Type", None) sub_type = cur_obj.elements.get("/Subtype", None) length = cur_obj.elements.get("/Length", None) else: data = cur_obj.rawStream encoding = None val = cur_obj.rawValue otype = cur_obj.elements.get("/Type", None) sub_type = cur_obj.elements.get("/Subtype", None) length = cur_obj.elements.get("/Length", None) if otype: otype = otype.value.replace("/", "").lower() if sub_type: sub_type = sub_type.value.replace("/", "").lower() if length: length = length.value if otype == "embeddedfile": if len(data) > 4096: if encoding is not None: temp_encoding_str = f"_{encoding}" else: temp_encoding_str = "" cur_res = ResultSection( f'Embedded file found ({length} bytes) [obj: {obj} {version}] ' f'and dumped for analysis {f"(Type: {otype}) " if otype is not None else ""}' f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}' f'{f"(Encoded with {encoding})" if encoding is not None else ""}' ) temp_path_name = f"EmbeddedFile_{obj}{temp_encoding_str}.obj" temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(data) f.close() f_list.append(temp_path) cur_res.add_line(f"The EmbeddedFile object was saved as {temp_path_name}") res_list.append(cur_res) elif otype not in BANNED_TYPES: cur_res = ResultSection( f'Unknown stream found [obj: {obj} {version}] ' f'{f"(Type: {otype}) " if otype is not None else ""}' f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}' f'{f"(Encoded with {encoding})" if encoding is not None else ""}' ) for line in val.splitlines(): cur_res.add_line(line) emb_res = ResultSection('First 256 bytes', parent=cur_res) first_256 = data[:256] if isinstance(first_256, str): first_256 = first_256.encode() emb_res.set_body(hexdump(first_256), BODY_FORMAT.MEMORY_DUMP) res_list.append(cur_res) else: pass file_res.add_section(res) for results in res_list: file_res.add_section(results) if js_dump: js_dump_res = ResultSection('Full JavaScript dump') temp_js_dump = "javascript_dump.js" temp_js_dump_path = os.path.join(self.working_directory, temp_js_dump) try: temp_js_dump_bin = "\n\n----\n\n".join(js_dump).encode("utf-8") except UnicodeDecodeError: temp_js_dump_bin = "\n\n----\n\n".join(js_dump) temp_js_dump_sha1 = hashlib.sha1(temp_js_dump_bin).hexdigest() f = open(temp_js_dump_path, "wb") f.write(temp_js_dump_bin) f.flush() f.close() f_list.append(temp_js_dump_path) js_dump_res.add_line(f"The JavaScript dump was saved as {temp_js_dump}") js_dump_res.add_line(f"The SHA-1 for the JavaScript dump is {temp_js_dump_sha1}") js_dump_res.add_tag('file.pdf.javascript.sha1', temp_js_dump_sha1) file_res.add_section(js_dump_res) for filename in f_list: request.add_extracted(filename, os.path.basename(filename), f"Dumped from {os.path.basename(temp_filename)}") else: res = ResultSection("ERROR: Could not parse file with PeePDF.") file_res.add_section(res) finally: request.result = file_res try: del pdf_file except Exception: pass try: del pdf_parser except Exception: pass gc.collect()
def execute(self, request): file_path = request.file_path result = Result() # Report the version of suricata as the service context request.set_service_context( f"Suricata version: {self.get_suricata_version()}") # restart Suricata if we need to self.start_suricata_if_necessary() # Strip frame headers from the PCAP, since Suricata sometimes has trouble parsing strange PCAPs stripped_filepath = self.strip_frame_headers(file_path) # Check to make sure the size of the stripped file isn't 0 - this happens on pcapng files # TODO: there's probably a better way to do this - don't event strip it if it's pcapng if os.stat(stripped_filepath).st_size == 0: stripped_filepath = file_path # Switch stdout and stderr so we don't get our logs polluted mystdout = StringIO() old_stdout = sys.stdout sys.stdout = mystdout mystderr = StringIO() old_stderr = sys.stderr sys.stderr = mystderr # Pass the pcap file to Suricata via the socket ret = self.suricata_sc.send_command( "pcap-file", { "filename": stripped_filepath, "output-dir": self.working_directory }) if not ret or ret["return"] != "OK": self.log.exception( f"Failed to submit PCAP for processing: {ret['message']}") # Wait for the socket finish processing our PCAP while True: time.sleep(1) try: ret = self.suricata_sc.send_command("pcap-current") if ret and ret["message"] == "None": break except ConnectionResetError as e: raise RecoverableError(e) # Bring back stdout and stderr sys.stdout = old_stdout sys.stderr = old_stderr # NOTE: for now we will ignore content of mystdout and mystderr but we have them just in case... alerts, signatures, domains, ips, urls, email_addresses, tls_dict, extracted_files, reverse_lookup = self.parse_suricata_output( ).values() file_extracted_section = ResultSection("File(s) extracted by Suricata") # Parse the json results of the service if request.get_param("extract_files"): for file in extracted_files: sha256, filename, extracted_file_path = file.values() self.log.info(f"extracted file {filename}") try: if request.add_extracted( extracted_file_path, filename, "Extracted by Suricata", safelist_interface=self.api_interface): file_extracted_section.add_line(filename) if filename != sha256: file_extracted_section.add_tag( 'file.name.extracted', filename) except FileNotFoundError as e: # An intermittent issue, just try again raise RecoverableError(e) except MaxExtractedExceeded: # We've hit our limit pass # Report a null score to indicate that files were extracted. If no sigs hit, it's not clear # where the extracted files came from if file_extracted_section.body: result.add_section(file_extracted_section) # Add tags for the domains, urls, and IPs we've discovered root_section = ResultSection("Discovered IOCs", parent=result) if domains: domain_section = ResultSection("Domains", parent=root_section) for domain in domains: domain_section.add_line(domain) domain_section.add_tag('network.dynamic.domain', domain) if ips: ip_section = ResultSection("IP Addresses", parent=root_section) for ip in ips: # Make sure it's not a local IP if not (ip.startswith("127.") or ip.startswith("192.168.") or ip.startswith("10.") or (ip.startswith("172.") and 16 <= int(ip.split(".")[1]) <= 31)): ip_section.add_line(ip) ip_section.add_tag('network.dynamic.ip', ip) if urls: url_section = ResultSection("URLs", parent=root_section) for url in urls: url_section.add_line(url) url_section.add_tag('network.dynamic.uri', url) if email_addresses: email_section = ResultSection("Email Addresses", parent=root_section) for eml in email_addresses: email_section.add_line(eml) email_section.add_tag('network.email.address', eml) # Map between suricata key names and AL tag types tls_mappings = { "subject": 'cert.subject', "issuerdn": 'cert.issuer', "version": 'cert.version', "notbefore": 'cert.valid.start', "notafter": 'cert.valid.end', "fingerprint": 'cert.thumbprint', "sni": 'network.tls.sni' } if tls_dict: tls_section = ResultSection("TLS Information", parent=root_section, body_format=BODY_FORMAT.JSON) kv_body = {} for tls_type, tls_values in tls_dict.items(): if tls_type == "fingerprint": # make sure the cert fingerprint/thumbprint matches other values, # like from PEFile tls_values = [ v.replace(":", "").lower() for v in tls_values ] if tls_type in tls_mappings: kv_body[tls_type] = tls_values tag_type = tls_mappings[tls_type] if tag_type is not None: for tls_value in tls_values: tls_section.add_tag(tag_type, tls_value) elif tls_type == "ja3": kv_body.setdefault('ja3_hash', []) kv_body.setdefault('ja3_string', []) for ja3_entry in tls_values: ja3_hash = ja3_entry.get("hash") ja3_string = ja3_entry.get("string") if ja3_hash: kv_body['ja3_hash'].append(ja3_hash) tls_section.add_tag('network.tls.ja3_hash', ja3_hash) if ja3_string: kv_body['ja3_string'].append(ja3_string) tls_section.add_tag('network.tls.ja3_string', ja3_string) else: kv_body[tls_type] = tls_values # stick a message in the logs about a new TLS type found in suricata logs self.log.info( f"Found new TLS type {tls_type} with values {tls_values}" ) tls_section.set_body(json.dumps(kv_body)) # Create the result sections if there are any hits if len(alerts) > 0: for signature_id, signature_details in signatures.items(): signature = signature_details['signature'] attributes = signature_details['attributes'] section = ResultSection(f'{signature_id}: {signature}') heur_id = 3 if any(x in signature for x in self.config.get("sure_score")): heur_id = 1 elif any(x in signature for x in self.config.get("vhigh_score")): heur_id = 2 section.set_heuristic(heur_id) if signature_details['al_signature']: section.add_tag("file.rule.suricata", signature_details['al_signature']) for timestamp, src_ip, src_port, dest_ip, dest_port in alerts[ signature_id][:10]: section.add_line( f"{timestamp} {src_ip}:{src_port} -> {dest_ip}:{dest_port}" ) if len(alerts[signature_id]) > 10: section.add_line( f'And {len(alerts[signature_id]) - 10} more flows') # Tag IPs/Domains/URIs associated to signature for flow in alerts[signature_id]: dest_ip = flow[3] section.add_tag('network.dynamic.ip', dest_ip) if dest_ip in reverse_lookup.keys(): section.add_tag('network.dynamic.domain', reverse_lookup[dest_ip]) [ section.add_tag('network.dynamic.uri', uri) for uri in urls if dest_ip in uri or (reverse_lookup.get(dest_ip) and reverse_lookup[dest_ip] in uri) ] # Add a tag for the signature id and the message section.add_tag('network.signature.signature_id', str(signature_id)) section.add_tag('network.signature.message', signature) [ section.add_tag('network.static.uri', attr['uri']) for attr in attributes if attr.get('uri') ] # Tag malware_family for malware_family in signature_details['malware_family']: section.add_tag('attribution.family', malware_family) result.add_section(section) self.ontology.add_result_part( Signature, data=dict( name=signature_details['al_signature'], type="SURICATA", malware_families=signature_details['malware_family'] or None, attributes=attributes)) # Add the original Suricata output as a supplementary file in the result request.add_supplementary( os.path.join(self.working_directory, 'eve.json'), 'SuricataEventLog.json', 'json') # Add the stats.log to the result, which can be used to determine service success if os.path.exists(os.path.join(self.working_directory, 'stats.log')): request.add_supplementary( os.path.join(self.working_directory, 'stats.log'), 'stats.log', 'log') request.result = result
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop 3 embedded file which two generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))]) # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(3, signature="sig_one") # You can attach attack ids to heuristics after they where defined text_section.heuristic.add_attack_id("T1066") # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how # many time the signature fired by setting its frequency. If you call add_signature_id twice with the # same signature, this will effectively increase the frequency of the signature. text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2) text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3) text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_four", score=0) # The heuristic for text_section should have the following properties # 1. 1 attack ID: T1066 # 2. 4 signatures: sig_one, sig_two, sig_three and sig_four # 3. Signature frequencies are cumulative therefor they will be as follow: # - sig_one = 1 # - sig_two = 5 # - sig_three = 2 # - sig_four = 1 # 4. The score used by each heuristic is driven by the following rules: signature_score_map is higher # priority, then score value for the add_signature_id is in second place and finally the default # heuristic score is use. Therefor the score used to calculate the total score for the text_section is # as follow: # - sig_one: 10 -> heuristic default score # - sig_two: 20 -> score provided by the function add_signature_id # - sig_three: 30 -> score provided by the heuristic map # - sig_four: 40 -> score provided by the heuristic map because it's higher priority than the # function score # 5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210 # Make sure you add your section to the result result.add_section(text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } # The classification of a section can be set to any valid classification for your system section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL, body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"})) # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed host1 = get_random_host() host2 = get_random_host() ip1 = get_random_ip() ip2 = get_random_ip() ip3 = get_random_ip() urls = [ {"url": f"https://{host1}/"}, {"url": f"https://{host2}/"}, {"url": f"https://{ip1}/"}, {"url": f"https://{ip2}/"}, {"url": f"https://{ip3}/"}] # A heuristic can fire more then once without being associated to a signature url_heuristic = Heuristic(4, frequency=len(urls)) url_sub_section = ResultSection('Example of a url section with multiple links', body=json.dumps(urls), body_format=BODY_FORMAT.URL, heuristic=url_heuristic) url_sub_section.add_tag("network.static.ip", ip1) url_sub_section.add_tag("network.static.ip", ip2) url_sub_section.add_tag("network.static.ip", ip3) url_sub_section.add_tag("network.static.domain", host1) url_sub_section.add_tag("network.dynamic.domain", host2) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump(b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!") memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP, body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed) kv_body = { "a_str": "Some string", "a_bool": False, "an_int": 102, } kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(kv_body)) result.add_section(kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a json dump of a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [ {"d1_key": "val", "d1_key2": "val2"}, {"d2_key": "val", "d2_key2": "val2"} ], "bool": True } } json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON, body=json.dumps(json_body)) result.add_section(json_section) # ================================================================== # PROCESS_TREE section: # This section allows the service writer to list a bunch of dictionary objects that have nested lists # of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore # each dictionary must have be of the following format: # { # "process_pid": int, # "process_name": str, # "command_line": str, # "children": [] NB: This list either is empty or contains more dictionaries that have the same # structure # } nc_body = [ { "process_pid": 123, "process_name": "evil.exe", "command_line": "C:\\evil.exe", "signatures": {}, "children": [ { "process_pid": 321, "process_name": "takeovercomputer.exe", "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff", "signatures": {"one":250}, "children": [ { "process_pid": 456, "process_name": "evenworsethanbefore.exe", "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad", "signatures": {"one":10, "two":10, "three":10}, "children": [] }, { "process_pid": 234, "process_name": "badfile.exe", "command_line": "C:\\badfile.exe -k nothing_to_see_here", "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10}, "children": [] } ] }, { "process_pid": 345, "process_name": "benignexe.exe", "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"", "signatures": {"one": 2000}, "children": [] } ] }, { "process_pid": 987, "process_name": "runzeroday.exe", "command_line": "C:\\runzeroday.exe -f insert_bad_spelling", "signatures": {}, "children": [] } ] nc_section = ResultSection('Example of a PROCESS_TREE section', body_format=BODY_FORMAT.PROCESS_TREE, body=json.dumps(nc_body)) result.add_section(nc_section) # ================================================================== # TABLE section: # This section allows the service writer to have their content displayed in a table format in the UI # The body argument must be a list [] of dict {} objects. A dict object can have a key value pair # where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested # table within a cell. table_body = [ { "a_str": "Some string1", "extra_column_here": "confirmed", "a_bool": False, "an_int": 101, }, { "a_str": "Some string2", "a_bool": True, "an_int": 102, }, { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, { "a_str": "Some string4", "a_bool": None, "an_int": -1000000000000000000, "extra_column_there": "confirmed", "nested_table": { "a_str": "Some string3", "a_bool": False, "nested_table_thats_too_deep": { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, }, }, ] table_section = ResultSection('Example of a TABLE section', body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body)) result.add_section(table_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # Embedded files can also have their own classification! fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"CLASSIFIED!!!__"+data.encode()) request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look", classification=cl_engine.RESTRICTED) # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(urls)) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result
class ViperMonkey(ServiceBase): def __init__(self, config: Optional[Dict] = None) -> None: super().__init__(config) self.ip_list: List[str] = [] self.url_list: List[str] = [] self.found_powershell = False self.file_hashes: List[str] = [] self.result: Optional[Result] = None def start(self) -> None: self.log.debug("ViperMonkey service started") def execute(self, request: ServiceRequest) -> None: self.result = Result() request.result = self.result self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] vmonkey_err = False actions: List[str] = [] external_functions: List[str] = [] tmp_iocs: List[str] = [] output_results: Dict[str, Any] = {} potential_base64: Set[str] = set() # Running ViperMonkey try: file_contents = request.file_contents input_file: str = request.file_path input_file_obj: Optional[IO] = None # Typical start to XML files if not file_contents.startswith( b"<?") and request.file_type == "code/xml": # Default encoding/decoding if BOM not found encoding: Optional[str] = None decoding: Optional[str] = None # Remove potential BOMs from contents if file_contents.startswith(BOM_UTF8): encoding = "utf-8" decoding = "utf-8-sig" elif file_contents.startswith(BOM_UTF16): encoding = "utf-16" decoding = "utf-16" if encoding and decoding: input_file_obj = tempfile.NamedTemporaryFile( "w+", encoding=encoding) input_file_obj.write( file_contents.decode(decoding, errors="ignore")) input_file = input_file_obj.name else: # If the file_type was detected as XML, it's probably buried within but not actually an XML file # Give no response as ViperMonkey can't process this kind of file return cmd = " ".join([ PYTHON2_INTERPRETER, os.path.join(os.path.dirname(__file__), "vipermonkey_compat.py2"), input_file, self.working_directory, ]) p = subprocess.run(cmd, capture_output=True, shell=True) stdout = p.stdout # Close file if input_file_obj and os.path.exists(input_file_obj.name): input_file_obj.close() # Add artifacts artifact_dir = os.path.join( self.working_directory, os.path.basename(input_file) + "_artifacts") if os.path.exists(artifact_dir): for file in os.listdir(artifact_dir): try: file_path = os.path.join(artifact_dir, file) if os.path.isfile(file_path) and os.path.getsize( file_path): request.add_extracted( file_path, file, "File extracted by ViperMonkey during analysis" ) except os.error as e: self.log.warning(e) # Read output if stdout: for line in stdout.splitlines(): if line.startswith(b"{") and line.endswith(b"}"): try: output_results = json.loads(line) except UnicodeDecodeError: output_results = json.loads( line.decode("utf-8", "replace")) break # Checking for tuple in case vmonkey return is None # If no macros found, return is [][][], if error, return is None # vmonkey_err can still happen if return is [][][], log as warning instead of error if isinstance(output_results.get("vmonkey_values"), dict): """ Structure of variable "actions" is as follows: [action, parameters, description] action: 'Found Entry Point', 'Execute Command', etc... parameters: Parameters for function description: 'Shell Function', etc... external_functions is a list of built-in VBA functions that were called """ actions = output_results["vmonkey_values"]["actions"] external_functions = output_results["vmonkey_values"][ "external_funcs"] tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"] if output_results["vmonkey_err"]: vmonkey_err = True self.log.warning(output_results["vmonkey_err"]) else: vmonkey_err = True else: vmonkey_err = True except Exception: self.log.exception( f"Vipermonkey failed to analyze file {request.sha256}") if actions: # Creating action section action_section = ResultSection("Recorded Actions:", parent=self.result) action_section.add_tag("technique.macro", "Contains VBA Macro(s)") sub_action_sections: Dict[str, ResultSection] = {} for action, parameters, description in actions: # Creating action sub-sections for each action if not description: # For actions with no description, just use the type of action description = action if description not in sub_action_sections: # Action's description will be the sub-section name sub_action_section = ResultSection(description, parent=action_section) sub_action_sections[description] = sub_action_section if description == "Shell function": sub_action_section.set_heuristic(2) else: # Reuse existing section sub_action_section = sub_action_sections[description] if sub_action_section.heuristic: sub_action_section.heuristic.increment_frequency() # Parameters are sometimes stored as a list, account for this if isinstance(parameters, list): for item in parameters: # Parameters includes more than strings (booleans for example) if isinstance(item, str): # Check for PowerShell self.extract_powershell(item, sub_action_section, request) # Join list items into single string param = ", ".join(str(p) for p in parameters) else: param = parameters # Parameters includes more than strings (booleans for example) if isinstance(param, str): self.extract_powershell(param, sub_action_section, request) # If the description field was empty, re-organize result section for this case if description == action: sub_action_section.add_line(param) else: sub_action_section.add_line( f"Action: {action}, Parameters: {param}") # Check later for base64 potential_base64.add(param) # Add urls/ips found in parameter to respective lists self.find_ip(param) # Check tmp_iocs res_temp_iocs = ResultSection("Runtime temporary IOCs") for ioc in tmp_iocs: self.extract_powershell(ioc, res_temp_iocs, request) potential_base64.add(ioc) self.find_ip(ioc) if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body: self.result.add_section(res_temp_iocs) # Add PowerShell score/tag if found if self.found_powershell: ResultSection("Discovered PowerShell code in file", parent=self.result, heuristic=Heuristic(3)) # Check parameters and temp_iocs for base64 base64_section = ResultSection("Possible Base64 found", heuristic=Heuristic(5, frequency=0)) for param in potential_base64: self.check_for_b64(param, base64_section, request, request.file_contents) if base64_section.body: self.result.add_section(base64_section) # Add url/ip tags self.add_ip_tags() # Create section for built-in VBA functions called if len(external_functions) > 0: external_func_section = ResultSection( "VBA functions called", body_format=BODY_FORMAT.MEMORY_DUMP, parent=self.result) for func in external_functions: if func in vba_builtins: external_func_section.add_line(func + ": " + vba_builtins[func]) else: external_func_section.add_line(func) # Add vmonkey log as a supplemental file if we have results if "stdout" in output_results and (vmonkey_err or request.result.sections): temp_log_copy = os.path.join( tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log") with open(temp_log_copy, "w") as temp_log_file: temp_log_file.write(output_results["stdout"]) request.add_supplementary(temp_log_copy, "vipermonkey_output.log", "ViperMonkey log output") if vmonkey_err is True: ResultSection( 'ViperMonkey has encountered an error, please check "vipermonkey_output.log"', parent=self.result, heuristic=Heuristic(1), ) def extract_powershell(self, parameter: str, section: ResultSection, request: ServiceRequest) -> None: """Searches parameter for PowerShell, adds as extracted if found Args: parameter: String to be searched section: Section to be modified if PowerShell found """ matches = find_powershell_strings(parameter.encode()) if not matches: return self.found_powershell = True for match in matches: powershell_command = get_powershell_command(match.value) sha256hash = hashlib.sha256(powershell_command).hexdigest() # Add PowerShell code as extracted, account for duplicates if sha256hash not in self.file_hashes: powershell_filename = f"{sha256hash[0:10]}.ps1" ResultSection( "Discovered PowerShell code in parameter.", parent=section, body=powershell_command[:100].decode() + f"... see [{powershell_filename}]", ) powershell_file_path = os.path.join(self.working_directory, powershell_filename) with open(powershell_file_path, "wb") as f: f.write(powershell_command) request.add_extracted( powershell_file_path, powershell_filename, "Discovered PowerShell code in parameter") self.file_hashes.append(sha256hash) def find_ip(self, parameter: str) -> None: """ Parses parameter for urls/ip addresses, adds them to their respective lists Args: parameter: String to be searched """ url_list = re.findall(r"https?://(?:[-\w.]|(?:[\da-zA-Z/?=%&]))+", parameter) ip_list = re.findall(R_IP, parameter) for url in url_list: url_strip = url.strip() if url_strip: self.url_list.append(url_strip) for ip in ip_list: ip_strip = ip.strip() if ip_strip: self.ip_list.append(ip_strip) def add_ip_tags(self) -> None: """ Adds tags for urls and ip addresses from given lists """ if self.url_list or self.ip_list: sec_iocs = ResultSection( "ViperMonkey has found the following IOCs:", parent=self.result, heuristic=Heuristic(4)) # Add Urls for url in set(self.url_list): sec_iocs.add_line(url) sec_iocs.add_tag("network.static.uri", url) try: parsed = urlparse(url) if parsed.hostname and not re.match( IP_ONLY_REGEX, parsed.hostname): sec_iocs.add_tag("network.static.domain", parsed.hostname) except Exception: pass # Add IPs for ip in set(self.ip_list): sec_iocs.add_line(ip) # Checking if IP ports also found and adding the corresponding tags if re.findall(":", ip): net_ip, net_port = ip.split(":") sec_iocs.add_tag("network.static.ip", net_ip) sec_iocs.add_tag("network.port", net_port) else: sec_iocs.add_tag("network.static.ip", ip) def check_for_b64(self, data: str, section: ResultSection, request: ServiceRequest, file_contents: bytes) -> bool: """Search and decode base64 strings in sample data. Args: data: Data to be parsed section: base64 subsection, must have heuristic set Returns: decoded: Boolean which is true if base64 found """ assert section.heuristic decoded_param = data decoded = False encoded_data = data.encode() for content, start, end in find_base64(encoded_data): if encoded_data[start:end] in file_contents: # Present in original file, not an intermediate IoC continue try: # Powershell base64 will be utf-16 content = content.decode("utf-16").encode() except UnicodeDecodeError: pass try: if len(content) < FILE_PARAMETER_SIZE: decoded_param = decoded_param[: start] + " " + content.decode( errors="ignore" ) + decoded_param[end:] else: b64hash = "" pe_files = find_pe_files(content) for pe_file in pe_files: b64hash = hashlib.sha256(pe_file).hexdigest() pe_path = os.path.join(self.working_directory, b64hash) with open(pe_path, "wb") as f: f.write(pe_file) request.add_extracted( pe_path, b64hash, "PE file found in base64 encoded parameter") section.heuristic.add_signature_id("pe_file") if not pe_files: b64hash = hashlib.sha256(content).hexdigest() content_path = os.path.join(self.working_directory, b64hash) with open(content_path, "wb") as f: f.write(content) request.add_extracted( content_path, b64hash, "Large base64 encoded parameter") section.heuristic.add_signature_id("possible_file") decoded_param = decoded_param[: start] + f"[See extracted file {b64hash}]" + decoded_param[ end:] decoded = True except Exception: pass if decoded: section.heuristic.increment_frequency() section.add_line( f"Possible Base64 {truncate(data)} decoded: {decoded_param}") self.find_ip(decoded_param) return decoded
def _add_resultinfo_for_match(self, result: Result, match): """ Parse from Yara signature match and add information to the overall AL service result. This module determines result score and identifies any AL tags that should be added (i.e. IMPLANT_NAME, THREAT_ACTOR, etc.). Args: result: AL ResultSection object. match: Yara rules Match object item. Returns: None. """ almeta = YaraMetadata(match) self._normalize_metadata(almeta) section = ResultSection('', classification=almeta.classification) if self.deep_scan or almeta.al_status != "NOISY": section.set_heuristic(self.YARA_HEURISTICS_MAP.get( almeta.category, 1), signature=f'{match.namespace}.{match.rule}', attack_id=almeta.mitre_att) section.add_tag(f'file.rule.{self.name.lower()}', f'{match.namespace}.{match.rule}') title_elements = [ f"[{match.namespace}] {match.rule}", ] if almeta.actor_type: section.add_tag('attribution.actor', almeta.actor_type) for tag in almeta.tags: section.add_tag(tag['type'], tag['value']) # Malware Tags implant_title_elements = [] for (implant_name, implant_family) in almeta.malwares: if implant_name: implant_title_elements.append(implant_name) section.add_tag('attribution.implant', implant_name) if implant_family: implant_title_elements.append(implant_family) section.add_tag('attribution.family', implant_family) if implant_title_elements: title_elements.append( f"- Implant(s): {', '.join(implant_title_elements)}") # Threat Actor metadata for actor in almeta.actors: title_elements.append(actor) section.add_tag('attribution.actor', actor) # Exploit / CVE metadata if almeta.exploits: title_elements.append( f"- Exploit(s): {', '.join(almeta.exploits)}") for exploit in almeta.exploits: section.add_tag('attribution.exploit', exploit) # Include technique descriptions in the section behavior for (category, name) in almeta.techniques: descriptor = self.TECHNIQUE_DESCRIPTORS.get(category, None) if descriptor: technique_type, technique_description = descriptor section.add_tag(technique_type, name) almeta.behavior.add(technique_description) for (category, name) in almeta.infos: descriptor = self.INFO_DESCRIPTORS.get(category, None) if descriptor: info_type, info_description = descriptor section.add_tag(info_type, name) almeta.behavior.add(info_description) # Summaries if almeta.behavior: title_elements.append(f"- Behavior: {', '.join(almeta.behavior)}") for element in almeta.behavior: section.add_tag('file.behavior', element) title = " ".join(title_elements) section.title_text = title json_body = dict(name=match.rule, ) for item in [ 'id', 'version', 'author', 'description', 'source', 'malware', 'info', 'technique', 'tool', 'exploit', 'actor', 'category', 'mitre_att' ]: val = almeta.__dict__.get(item, None) if val: json_body[item] = val string_match_data = self._add_string_match_data(match) if string_match_data: json_body['string_hits'] = string_match_data section.set_body(json.dumps(json_body), body_format=BODY_FORMAT.KEY_VALUE) result.add_section(section)
class ViperMonkey(ServiceBase): def __init__(self, config=None): super(ViperMonkey, self).__init__(config) self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] self.request = None self.result = None def start(self): self.log.debug('ViperMonkey service started') def execute(self, request): self.result = Result() request.result = self.result self.request = request self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] vmonkey_err = False actions = [] external_functions = [] tmp_iocs = [] output_results = {} # Running ViperMonkey try: cmd = " ".join([ PYTHON2_INTERPRETER, os.path.join(os.path.dirname(__file__), 'vipermonkey_compat.py2'), request.file_path ]) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) stdout, _ = p.communicate() # Read output if stdout: for l in stdout.splitlines(): if l.startswith(b"{") and l.endswith(b"}"): try: output_results = json.loads(l) except UnicodeDecodeError: output_results = json.loads( l.decode("utf-8", "replace")) break # Checking for tuple in case vmonkey return is None # If no macros found, return is [][], if error, return is None if type(output_results.get('vmonkey_values')) == dict: ''' Structure of variable "actions" is as follows: [action, description, parameter] action: 'Found Entry Point', 'Execute Command', etc... parameter: Parameters for function description: 'Shell Function', etc... external_functions is a list of built-in VBA functions that were called ''' actions = output_results['vmonkey_values']['actions'] external_functions = output_results['vmonkey_values'][ 'external_funcs'] tmp_iocs = output_results['vmonkey_values']['tmp_iocs'] else: vmonkey_err = True else: vmonkey_err = True except Exception: raise # Add vmonkey log as a supplemental file if 'stdout' in output_results: temp_log_copy = os.path.join( tempfile.gettempdir(), f'{request.sid}_vipermonkey_output.log') with open(temp_log_copy, "w") as temp_log_file: temp_log_file.write(output_results['stdout']) self.request.add_supplementary(temp_log_copy, 'vipermonkey_output.log', 'ViperMonkey log output') if vmonkey_err is True: ResultSection( 'ViperMonkey has encountered an error, please check "vipermonkey_output.log"', parent=self.result, heuristic=Heuristic(1)) if len(actions) > 0: # Creating action section action_section = ResultSection('Recorded Actions:', parent=self.result) action_section.add_tag('technique.macro', 'Contains VBA Macro(s)') for action in actions: # Creating action sub-sections for each action cur_action = action[0] cur_description = action[2] if action[2] else cur_action # Entry point actions have an empty description field, re-organize result section for this case if cur_action == 'Found Entry Point': sub_action_section = ResultSection('Found Entry Point', parent=action_section) sub_action_section.add_line(action[1]) else: # Action's description will be the sub-section name sub_action_section = ResultSection(cur_description, parent=action_section) if cur_description == 'Shell function': sub_action_section.set_heuristic(2) # Parameters are sometimes stored as a list, account for this if isinstance(action[1], list): for item in action[1]: # Parameters includes more than strings (booleans for example) if isinstance(item, str): # Check for PowerShell self.extract_powershell( item, sub_action_section) # Join list items into single string param = ', '.join(str(a) for a in action[1]) else: param = action[1] # Parameters includes more than strings (booleans for example) if isinstance(param, str): self.extract_powershell(param, sub_action_section) sub_action_section.add_line(f'Action: {cur_action}') sub_action_section.add_line(f'Parameters: {param}') # If decoded is true, possible base64 string has been found self.check_for_b64(param, sub_action_section) # Add urls/ips found in parameter to respective lists self.find_ip(param) # Check tmp_iocs res_temp_iocs = ResultSection('Runtime temporary IOCs') for ioc in tmp_iocs: self.extract_powershell(ioc, res_temp_iocs) self.check_for_b64(ioc, res_temp_iocs) self.find_ip(ioc) if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body: self.result.add_section(res_temp_iocs) # Add PowerShell score/tag if found if self.found_powershell: ResultSection('Discovered PowerShell code in file', parent=self.result, heuristic=Heuristic(3)) # Add url/ip tags self.add_ip_tags() # Create section for built-in VBA functions called if len(external_functions) > 0: vba_builtin_dict = {} dict_path = os.path.join(os.path.dirname(__file__), 'VBA_built_ins.txt') with open(dict_path, 'r') as f: for line in f: line = line.strip() if re.search(r'^#', line): continue if line: line = line.split(';') vba_builtin_dict[line[0].strip()] = line[1].strip() external_func_section = ResultSection( 'VBA functions called', body_format=BODY_FORMAT.MEMORY_DUMP, parent=self.result) for func in external_functions: if func in vba_builtin_dict: external_func_section.add_line(func + ': ' + vba_builtin_dict[func]) else: external_func_section.add_line(func) def extract_powershell(self, parameter, section): """Searches parameter for PowerShell, adds as extracted if found Args: parameter: String to be searched section: Section to be modified if PowerShell found """ if re.findall(r'(?:powershell)|(?:pwsh)', parameter, re.IGNORECASE): self.found_powershell = True if type(parameter) == str: # Unicode-objects must be encoded before hashing sha256hash = hashlib.sha256(parameter.encode()).hexdigest() else: sha256hash = hashlib.sha256(parameter).hexdigest() ResultSection('Discovered PowerShell code in parameter.', parent=section) # Add PowerShell code as extracted, account for duplicates if sha256hash not in self.file_hashes: self.file_hashes.append(sha256hash) powershell_filename = f'{sha256hash[0:25]}_extracted_powershell' powershell_file_path = os.path.join(self.working_directory, powershell_filename) with open(powershell_file_path, 'w') as f: f.write(parameter) self.request.add_extracted( powershell_file_path, powershell_filename, 'Discovered PowerShell code in parameter') def find_ip(self, parameter): """ Parses parameter for urls/ip addresses, adds them to their respective lists Args: parameter: String to be searched """ url_list = re.findall(r'https?://(?:[-\w.]|(?:[\da-zA-Z/?=%&]))+', parameter) ip_list = re.findall(R_IP, parameter) for url in url_list: url_strip = url.strip() if url_strip: self.url_list.append(url_strip) for ip in ip_list: ip_strip = ip.strip() if ip_strip: self.ip_list.append(ip_strip) def add_ip_tags(self): """ Adds tags for urls and ip addresses from given lists """ if self.url_list or self.ip_list: sec_iocs = ResultSection( "ViperMonkey has found the following IOCs:", parent=self.result, heuristic=Heuristic(4)) # Add Urls for url in set(self.url_list): sec_iocs.add_line(url) sec_iocs.add_tag('network.static.uri', url) try: parsed = urlparse(url) if not re.match(IP_ONLY_REGEX, parsed.hostname): sec_iocs.add_tag('network.static.domain', parsed.hostname) except Exception: pass # Add IPs for ip in set(self.ip_list): sec_iocs.add_line(ip) # Checking if IP ports also found and adding the corresponding tags if re.findall(":", ip): net_ip, net_port = ip.split(':') sec_iocs.add_tag('network.static.ip', net_ip) sec_iocs.add_tag('network.port', net_port) else: sec_iocs.add_tag('network.static.ip', ip) def check_for_b64(self, data, section): """Search and decode base64 strings in sample data. Args: data: Data to be parsed section: Sub-section to be modified if base64 found Returns: decoded: Boolean which is true if base64 found """ b64_matches = [] # b64_matches_raw will be used for replacing in case b64_matches are modified b64_matches_raw = [] decoded_param = data decoded = False for b64_match in re.findall( '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})', re.sub('\x3C\x00\x20{2}\x00', '', data)): b64 = b64_match.replace('\n', '').replace('\r', '').replace( ' ', '').replace('<', '') uniq_char = ''.join(set(b64)) if len(uniq_char) > 6: if len(b64) >= 16 and len(b64) % 4 == 0: b64_matches.append(b64) b64_matches_raw.append(b64_match) for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw): try: base64data = binascii.a2b_base64(b64_string) # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex base64data_decoded = ' ' + base64data.decode('utf-16').encode( 'ascii', 'ignore') # Replace base64 from param with decoded string decoded_param = re.sub(b64_string_raw, base64data_decoded, decoded_param) decoded = True except Exception: pass if decoded: decoded_section = ResultSection('Possible Base64 found', parent=section, heuristic=Heuristic(5)) decoded_section.add_line( f'Possible Base64 Decoded Parameters: {decoded_param}') self.find_ip(decoded_param) return decoded
def execute(self, request): parser = eml_parser.eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) # Validate URLs in sample, strip out [] if found content_str = request.file_contents.decode(errors="ignore") content_str, retry = self.validate_urls(content_str) while retry: content_str, retry = self.validate_urls(content_str) parsed_eml = parser.decode_email_bytes(content_str.encode()) result = Result() header = parsed_eml['header'] if "from" in header: all_uri = set() for body_counter, body in enumerate(parsed_eml['body']): if request.get_param('extract_body_text'): fd, path = mkstemp() with open(path, 'w') as f: f.write(body['content']) os.close(fd) request.add_extracted(path, "body_" + str(body_counter), "Body text") if "uri" in body: for uri in body['uri']: all_uri.add(uri) kv_section = ResultSection('Email Headers', body_format=BODY_FORMAT.KEY_VALUE, parent=result) # Basic tags kv_section.add_tag("network.email.address", header['from'].strip()) for to in header['to']: kv_section.add_tag("network.email.address", to) kv_section.add_tag("network.email.date", str(header['date']).strip()) kv_section.add_tag("network.email.subject", header['subject'].strip()) # Add CCs to body and tags if 'cc' in header: for to in header['to']: kv_section.add_tag("network.email.address", to.strip()) # Add Message ID to body and tags if 'message-id' in header['header']: kv_section.add_tag("network.email.msg_id", header['header']['message-id'][0].strip()) # Add Tags for received IPs if 'received_ip' in header: for ip in header['received_ip']: kv_section.add_tag('network.static.ip', ip.strip()) # Add Tags for received Domains if 'received_domain' in header: for dom in header['received_domain']: kv_section.add_tag('network.static.domain', dom.strip()) # If we've found URIs, add them to a section if len(all_uri) > 0: uri_section = ResultSection('URIs Found:', parent=result) for uri in all_uri: uri_section.add_line(uri) uri_section.add_tag('network.static.uri', uri.strip()) parsed_url = urlparse(uri) if parsed_url.hostname and re.match( IP_ONLY_REGEX, parsed_url.hostname): uri_section.add_tag('network.static.ip', parsed_url.hostname) else: uri_section.add_tag('network.static.domain', parsed_url.hostname) # Bring all headers together... extra_header = header.pop('header', {}) header.pop('received', None) header.update(extra_header) kv_section.body = json.dumps(header, default=self.json_serial) if "attachment" in parsed_eml: for attachment in parsed_eml['attachment']: fd, path = mkstemp() with open(path, 'wb') as f: f.write(base64.b64decode(attachment['raw'])) os.close(fd) request.add_extracted(path, attachment['filename'], "Attachment ") ResultSection('Extracted Attachments:', body="\n".join([ x['filename'] for x in parsed_eml['attachment'] ]), parent=result) if request.get_param('save_emlparser_output'): fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write( json.dumps(parsed_eml, default=self.json_serial)) request.add_supplementary( temp_path, "parsing.json", "These are the raw results of running GOVCERT-LU's eml_parser" ) else: text_section = ResultSection('EML parsing results') text_section.add_line("Could not parse EML") result.add_section(text_section) request.result = result
def execute(self, request: ServiceRequest) -> None: """ Main module see README for details. """ start = time.time() result = Result() request.result = result file_path = request.file_path if request.deep_scan: # Maximum size of submitted file to run this service: max_size = 200000 # String length maximum, used in basic ASCII and UNICODE modules: max_length = 1000000 # String list maximum size # List produced by basic ASCII and UNICODE module results and will determine # if patterns.py will only evaluate network IOC patterns: st_max_size = 100000 # Minimum string size for encoded/stacked string modules: enc_min_length = 7 stack_min_length = 7 else: max_size = self.config.get('max_size', 85000) max_length = self.config.get('max_length', 5000) st_max_size = self.config.get('st_max_size', 0) enc_min_length = self.config.get('enc_min_length', 7) stack_min_length = self.config.get('stack_min_length', 7) timeout = self.service_attributes.timeout - 50 if len(request.file_contents) > max_size: return stack_args = [ FLOSS, f'-n {stack_min_length}', '--no-decoded-strings', file_path ] decode_args = [ FLOSS, f'-n {enc_min_length}', '-x', '--no-static-strings', '--no-stack-strings', file_path ] with Popen(stack_args, stdout=PIPE, stderr=PIPE) as stack, \ Popen(decode_args, stdout=PIPE, stderr=PIPE) as decode: stack_out, _, timed_out = self.handle_process( stack, timeout + start - time.time(), ' '.join(stack_args)) if timed_out: result.add_section( ResultSection('FLARE FLOSS stacked strings timed out')) self.log.warning( f'floss stacked strings timed out for sample {request.sha256}' ) dec_out, dec_err, timed_out = self.handle_process( decode, timeout + start - time.time(), ' '.join(decode_args)) if timed_out: result.add_section( ResultSection('FLARE FLOSS decoded strings timed out')) self.log.warning( f'floss decoded strings timed out for sample {request.sha256}' ) if stack_out: sections = [[y for y in x.splitlines() if y] for x in stack_out.split(b'\n\n')] for section in sections: if not section: # skip empty continue match = re.match(rb'FLOSS static\s+.*\s+strings', section[0]) if match: result_section = static_result(section, max_length, st_max_size) if result_section: result.add_section(result_section) continue match = re.match(rb'.*\d+ stackstring.*', section[0]) if match: result_section = stack_result(section) if result_section: result.add_section(result_section) continue # Process decoded strings results if dec_out: result_section = decoded_result(dec_out) if result_section: if dec_err: result_section.add_line( "Flare Floss generated error messages while analyzing:" ) result_section.add_line(safe_str(dec_err)) result.add_section(result_section)
def execute(self, request): """Main Module. See README for details.""" global imginfo result = Result() request.result = result self.sha = request.sha256 infile = request.file_path run_steg = request.get_param('run_steg') # Run image-specific modules supported_images = re.compile('image/(bmp|gif|jpeg|jpg|png)') if re.match(supported_images, request.file_type): # Extract img info using Pillow (already available in steg.py) and determine if steg modules should be run if self.config['run_steg_auto'] or run_steg: decloak = True else: decloak = False try: imginfo = ImageInfo(infile, request, result, self.working_directory, self.log) except NotSupported: decloak = False # Run Tesseract on sample # Process the command and save the csv result in the result object usable_out = None orig_outfile = os.path.join(self.working_directory, 'outfile') stdout, stderr = self.tesseract_call(infile, orig_outfile) if stdout or stderr: # Assess Tesseract warnings if b"pix too small" in stderr: # Make the image larger with convert command c_outfile = os.path.join(self.working_directory, 'enlrg_img') c_stdout, c_stderr = self.convert_img(infile, c_outfile) if c_stdout: c_outfile = os.path.join(self.working_directory, 'c_outfile') enlrg_infile = os.path.join(self.working_directory, 'enlrg') if not c_stderr: stdout, stderr = self.tesseract_call( enlrg_infile, c_outfile) if stdout: if not stderr: outfile = c_outfile else: outfile = orig_outfile else: outfile = orig_outfile else: outfile = orig_outfile else: outfile = orig_outfile else: outfile = orig_outfile self.log.debug( "Tesseract errored/warned on sample {}. Error:{}". format(self.sha, stderr)) usable_out = self.assess_output(outfile, request) if usable_out: ores = ResultSection("OCR Engine detected strings in image", body_format=BODY_FORMAT.MEMORY_DUMP) ores.add_line("Text preview (up to 500 bytes):\n") ores.add_line("{}".format(usable_out[0:500])) result.add_section(ores) # Find attached data additional_content = self.find_additional_content(infile) if additional_content: ares = (ResultSection("Possible Appended Content Found", body_format=BODY_FORMAT.MEMORY_DUMP)) ares.add_line( "{} Bytes of content found at end of image file".format( len(additional_content))) ares.add_line("Text preview (up to 500 bytes):\n") ares.add_line("{}".format(safe_str(additional_content)[0:500])) ares.set_heuristic(2) result.add_section(ares) file_name = "{}_appended_img_content".format( hashlib.sha256(additional_content).hexdigest()[0:10]) file_path = os.path.join(self.working_directory, file_name) request.add_extracted(file_path, file_name, "Carved content found at end of image.") with open(file_path, 'wb') as unibu_file: unibu_file.write(additional_content) # Steganography modules if decloak: if request.deep_scan: imginfo.decloak()
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop two embedded file which one generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in [ 'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06' ]: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines( [get_random_phrase() for _ in range(random.randint(1, 5))]) # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(random.randint(1, 4), signature=get_random_phrase( 1, 4).lower().replace(" ", "_")) # Make sure you add your section to the result result.add_section(text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } section_color_map = ResultSection( "Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(color_map_data)) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "Random url!", "url": f"https://{random_host}/" })) # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed host1 = get_random_host() host2 = get_random_host() ip1 = get_random_ip() urls = [{ "url": f"https://{host1}/" }, { "url": f"https://{host2}/" }, { "url": f"https://{ip1}/" }] url_sub_section = ResultSection( 'Example of a url section with multiple links', body_format=BODY_FORMAT.URL, body=json.dumps(urls)) url_sub_section.set_heuristic(random.randint(1, 4)) url_sub_section.add_tag("network.static.ip", ip1) url_sub_section.add_tag("network.static.domain", host1) url_sub_section.add_tag("network.dynamic.domain", host2) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump( b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!" ) memdump_section = ResultSection( 'Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP, body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed) kv_body = { "a_str": "Some string", "a_bool": False, "an_int": 102, } kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(kv_body)) result.add_section(kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a json dump of a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [{ "d1_key": "val", "d1_key2": "val2" }, { "d2_key": "val", "d2_key2": "val2" }], "bool": True } } json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON, body=json.dumps(json_body)) result.add_section(json_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(urls)) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop 3 embedded file which two generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in [ 'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06' ]: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultTextSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines( [get_random_phrase() for _ in range(random.randint(1, 5))]) # You can tag data to a section, tagging is used to to quickly find defining information about a file text_section.add_tag("attribution.implant", "ResultSample") # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(3, signature="sig_one") # You can attach attack ids to heuristics after they where defined text_section.heuristic.add_attack_id( random.choice(list(software_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(attack_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(group_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(revoke_map.keys()))) # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how # many time the signature fired by setting its frequency. If you call add_signature_id twice with the # same signature, this will effectively increase the frequency of the signature. text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2) text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3) text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_four", score=0) # The heuristic for text_section should have the following properties # 1. 1 attack ID: T1066 # 2. 4 signatures: sig_one, sig_two, sig_three and sig_four # 3. Signature frequencies are cumulative therefor they will be as follow: # - sig_one = 1 # - sig_two = 5 # - sig_three = 2 # - sig_four = 1 # 4. The score used by each heuristic is driven by the following rules: signature_score_map is higher # priority, then score value for the add_signature_id is in second place and finally the default # heuristic score is use. Therefor the score used to calculate the total score for the text_section is # as follow: # - sig_one: 10 -> heuristic default score # - sig_two: 20 -> score provided by the function add_signature_id # - sig_three: 30 -> score provided by the heuristic map # - sig_four: 40 -> score provided by the heuristic map because it's higher priority than the # function score # 5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210 # Make sure you add your section to the result result.add_section(text_section) # Even if the section was added to the results you can still modify it by adding a subsection for example ResultSection( "Example of sub-section without a body added later in processing", parent=text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 cmap_values = [random.random() * cmap_max for _ in range(50)] # The classification of a section can be set to any valid classification for your system section_color_map = ResultGraphSection( "Example of colormap result section", classification=cl_engine.RESTRICTED) section_color_map.set_colormap(cmap_min, cmap_max, cmap_values) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultURLSection('Example of a simple url section') url_section.add_url(f"https://{random_host}/", name="Random url!") # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed hosts = [get_random_host() for _ in range(2)] # A heuristic can fire more then once without being associated to a signature url_heuristic = Heuristic(4, frequency=len(hosts)) url_sub_section = ResultURLSection( 'Example of a url sub-section with multiple links', heuristic=url_heuristic, classification=cl_engine.RESTRICTED) for host in hosts: url_sub_section.add_url(f"https://{host}/") url_sub_section.add_tag("network.static.domain", host) # You can keep nesting sections if you really need to ips = [get_random_ip() for _ in range(3)] url_sub_sub_section = ResultURLSection( 'Exemple of a two level deep sub-section') for ip in ips: url_sub_sub_section.add_url(f"https://{ip}/") url_sub_sub_section.add_tag("network.static.ip", ip) # Since url_sub_sub_section is a sub-section of url_sub_section # we will add it as a sub-section of url_sub_section not to the main result itself url_sub_section.add_subsection(url_sub_sub_section) # Invalid sections will be ignored, and an error will apear in the logs # Sub-sections of invalid sections will be ignored too invalid_section = ResultSection("") ResultSection( "I won't make it to the report because my parent is invalid :(", parent=invalid_section) url_sub_section.add_subsection(invalid_section) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump( b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!" ) memdump_section = ResultMemoryDumpSection( 'Example of a memory dump section', body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a dictionary (only str, int, and booleans are allowed) kv_section = ResultKeyValueSection( 'Example of a KEY_VALUE section') # You can add items individually kv_section.set_item('key', "value") # Or simply add them in bulk kv_section.update_items({ "a_str": "Some string", "a_bool": False, "an_int": 102, }) result.add_section(kv_section) # ================================================================== # ORDERED_KEY_VALUE section: # This section provides the same functionality as the KEY_VALUE section except the order of the fields # are garanteed to be preserved in the order in which the fields are added to the section. Also with # this section, you can repeat the same key name multiple times oredered_kv_section = ResultOrderedKeyValueSection( 'Example of an ORDERED_KEY_VALUE section') # You can add items individually for x in range(random.randint(3, 6)): oredered_kv_section.add_item(f'key{x}', f"value{x}") result.add_section(oredered_kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [{ "d1_key": "val", "d1_key2": "val2" }, { "d2_key": "val", "d2_key2": "val2" }], "bool": True } } json_section = ResultJSONSection('Example of a JSON section') # You can set the json result to a specific value json_section.set_json(json_body) # You can also update specific parts after the fact json_section.update_json({ 'an_int': 1000, 'updated_key': 'updated_value' }) result.add_section(json_section) # ================================================================== # PROCESS_TREE section: # This section allows the service writer to list a bunch of dictionary objects that have nested lists # of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore # each dictionary must have be of the following format: # { # "process_pid": int, # "process_name": str, # "command_line": str, # "signatures": {} This dict has the signature name as a key and the score as it's value # "children": [] NB: This list either is empty or contains more dictionaries that have the same # structure # } process_tree_section = ResultProcessTreeSection( 'Example of a PROCESS_TREE section') # You can use the ProcessItem class to create the processes to add to the result section evil_process = ProcessItem(123, "evil.exe", "c:\\evil.exe") evil_process_child_1 = ProcessItem( 321, "takeovercomputer.exe", "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff") # You can add child processes to the ProcessItem objects evil_process_child_1.add_child_process( ProcessItem( 456, "evenworsethanbefore.exe", "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad", signatures={ "one": 10, "two": 10, "three": 10 })) evil_process_child_1.add_child_process( ProcessItem(234, "badfile.exe", "C:\\badfile.exe -k nothing_to_see_here", signatures={ "one": 1000, "two": 10, "three": 10, "four": 10, "five": 10 })) # You can add signatures that hit on a ProcessItem Object evil_process_child_1.add_signature('one', 250) # Or even directly create the ProcessItem object with the signature in it evil_process_child_2 = ProcessItem( 345, "benignexe.exe", "C:\\benignexe.exe -f \"just kidding, i'm evil\"", signatures={"one": 2000}) # You can also add counts for network, file and registry events to a ProcessItem object evil_process_child_2.add_network_events(4) evil_process_child_2.add_file_events(7000) evil_process_child_2.add_registry_events(10) # You can also indicate if the process tree item has been safelisted benign_process = ProcessItem(678, "trustme.exe", "C:\\trustme.exe") benign_process.safelist() evil_process.add_child_process(evil_process_child_1) evil_process.add_child_process(evil_process_child_2) # Add your processes to the result section via the add_process function process_tree_section.add_process(evil_process) process_tree_section.add_process( ProcessItem(987, "runzeroday.exe", "C:\\runzeroday.exe -f insert_bad_spelling")) process_tree_section.add_process(benign_process) result.add_section(process_tree_section) # ================================================================== # TABLE section: # This section allows the service writer to have their content displayed in a table format in the UI # The body argument must be a list [] of dict {} objects. A dict object can have a key value pair # where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested # table within a cell. table_section = ResultTableSection('Example of a TABLE section') # Use the TableRow class to help adding row to the Table section table_section.add_row( TableRow(a_str="Some string1", extra_column_here="confirmed", a_bool=False, an_int=101)) table_section.add_row( TableRow( { "a_str": "Some string2", "a_bool": True, "an_int": "to_be_overriden_by_kwargs" }, an_int=102)) table_section.add_row( TableRow(a_str="Some string3", a_bool=False, an_int=103)) # Valid values for the items in the TableRow are: str, int, bool, None, or dict of those values table_section.add_row( TableRow( { "a_str": "Some string4", "a_bool": None, "an_int": -1000000000000000000 }, { "extra_column_there": "confirmed", "nested_key_value_pair": { "a_str": "Some string3", "a_bool": False, "nested_kv_thats_too_deep": { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, } })) result.add_section(table_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # Embedded files can also have their own classification! fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"CLASSIFIED!!!__" + data.encode()) request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look", classification=cl_engine.RESTRICTED) # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(url_sub_section.body) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Zeroize on safe tags # When this feature is turned on, the section will get its score set to zero if all its tags # were safelisted by the safelisting engine zero_section = ResultSection('Example of zeroize-able section', zeroize_on_tag_safe=True) zero_section.set_heuristic(2) zero_section.add_line( "This section will have a zero score if all tags are safelisted." ) zero_section.add_tag('network.static.ip', '127.0.0.1') result.add_section(zero_section) # ================================================================== # Auto-collapse # When this feature is turned on, the section will be collapsed when first displayed collapse_section = ResultSection( 'Example of auto-collapse section', auto_collapse=True) collapse_section.set_heuristic(2) collapse_section.add_line( "This section was collapsed when first loaded in the UI") result.add_section(collapse_section) # ================================================================== # Image Section # This type of section allows the service writer to display images to the user image_section = ResultImageSection(request, 'Example of Image section') for x in range(6): image_section.add_image(f'data/000{x+1}.jpg', f'000{x+1}.jpg', f'ResultSample screenshot 000{x+1}', ocr_heuristic_id=6) result.add_section(image_section) # ================================================================== # Multi Section # This type of section allows the service writer to display multiple section types # in the same result section. Here's a concrete exemple of this: multi_section = ResultMultiSection( 'Example of Multi-typed section') multi_section.add_section_part( TextSectionBody( body="We have detected very high entropy multiple sections " "of your file, this section is most-likely packed or " "encrypted.\n\nHere are affected sections:")) section_count = random.randint(1, 4) for x in range(section_count): multi_section.add_section_part( KVSectionBody(section_name=f".UPX{x}", offset=f'0x00{8+x}000', size='4196 bytes')) graph_part = GraphSectionBody() graph_part.set_colormap( 0, 8, [7 + random.random() for _ in range(20)]) multi_section.add_section_part(graph_part) if x != section_count - 1: multi_section.add_section_part(DividerSectionBody()) multi_section.add_tag("file.pe.sections.name", f".UPX{x}") multi_section.set_heuristic(5) result.add_section(multi_section) # ================================================================== # Propagate temporary submission data to other services # Sometimes two service can work in tandem were one extra some piece of information the other # one uses to do it's work. This is how a service can set temporary data that other # services that subscribe to can use. request.temp_submission_data['kv_section'] = kv_section.body request.temp_submission_data[ 'process_tree_section'] = process_tree_section.body request.temp_submission_data['url_section'] = url_sub_section.body # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result