def run_strings_analysis(self, apk_file, result: Result): string_args = ['d', 'strings', apk_file] strings, _ = self.run_appt(string_args) if not strings or strings == "String pool is unitialized.\n": ResultSection("No strings found in APK", body="This is highly unlikely and most-likely malicious.", parent=result, heuristic=Heuristic(6)) else: res_strings = ResultSection("Strings Analysis", parent=result) config_args = ['d', 'configurations', apk_file] configs, _ = self.run_appt(config_args) languages = [] for line in configs.splitlines(): config = line.upper() if config in ISO_LOCALES: languages.append(config) res_strings.add_tag('file.apk.locale', config) data_line = strings.split("\n", 1)[0] count = int(data_line.split(" entries")[0].rsplit(" ", 1)[1]) styles = int(data_line.split(" styles")[0].rsplit(" ", 1)[1]) if count < 50: ResultSection("Low volume of strings, this is suspicious.", parent=res_strings, body_format=BODY_FORMAT.MEMORY_DUMP, body=safe_str(strings), heuristic=Heuristic(7)) if len(languages) < 2: ResultSection("This app is not built for multiple languages. This is unlikely.", parent=res_strings, heuristic=Heuristic(8)) res_strings.add_line(f"Total string count: {count}") res_strings.add_line(f"Total styles: {styles}") if languages: res_strings.add_line(f"Languages: {', '.join(languages)}")
def manage_threat_level(self, data, result): if data['threat_level'] == 'Low Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(1)) if data['threat_level'] == 'Moderate Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(2)) if data['threat_level'] == 'High Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(3)) result.add_section(threat_section)
def _get_category_section(self, category: str, tags: Iterator[AVClassTag]) -> ResultSection: """ Gets a `ResultSection` for a list of tags from a single category. Result contains table with AVclass tag information in descending order by rank. :param category: Category of tags :param tags: Tags belonging to category :return: `ResultSection` """ tags = sorted(tags, key=lambda t: t.rank, reverse=True) category_name, heur_id, tag_type = AVCLASS_CATEGORY[category] tag_table = [{ 'name': tag.name, 'category': category_name, 'path': tag.path, 'rank': tag.rank } for tag in tags] section = ResultSection( f'AVclass extracted {len(tags)} {category_name} tags', body=json.dumps(tag_table), body_format=BODY_FORMAT.TABLE, heuristic=Heuristic(heur_id) if heur_id is not None else None) if tag_type is not None: for tag in tags: section.add_tag(tag_type, tag.name) return section
def additional_parsing(self, file_path: str) -> Optional[ResultSection]: urls = set() try: with pikepdf.open(file_path) as pdf: num_pages = len(pdf.pages) for page in pdf.pages: if '/Annots' not in page: continue for annot in page['/Annots'].as_list(): if annot.get('/Subtype') == '/Link': if '/A' not in annot: continue _url = annot['/A'].get('/URI') if not hasattr(_url, '__str__'): continue url = str(_url) if re.match(FULL_URI, url): urls.add(url) if not urls: return None patterns = PatternMatch() body = '\n'.join(urls) tags: dict[str, set[bytes]] = patterns.ioc_match(body.encode()) result = ResultSection( 'URL in Annotations', heuristic=Heuristic( 27, signature='one_page' if num_pages == 1 else None), body=body) for ty, vals in tags.items(): for val in vals: result.add_tag(ty, val) return result except Exception as e: self.log.warning(f'pikepdf failed to parse sample: {e}') return None
def stack_result(section: List[bytes]) -> Optional[ResultSection]: """ Generates a ResultSection from floss stacked strings output section """ result = ResultSection('FLARE FLOSS Sacked Strings', body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(3)) assert result.heuristic strings = section[1:] if not strings: return None groups = group_strings(s.decode() for s in strings) for group in groups: res = ResultSection( f"Group: '{min(group, key=len)}' Strings: {len(group)}", body='\n'.join(group), body_format=BODY_FORMAT.MEMORY_DUMP) for string in group: ioc_tag(string.encode(), res, just_network=len(group) > 1000) result.add_subsection(res) if any(res.tags for res in result.subsections): result.heuristic.add_signature_id('stacked_ioc') return result
def decoded_result(text: bytes) -> Optional[ResultSection]: """ Generates a ResultSection from floss decoded strings output section """ lines = text.splitlines() lines[0] = b'Most likely decoding functions:' body = b'\n'.join(lines[:-1]) strings = re.findall(rb'^\[[A-Z]+\]\s+0x[0-9A-F]+\s+(.+)', body, flags=re.M) if not strings: return None result = ResultSection('FLARE FLOSS Decoded Strings', body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(1)) assert result.heuristic ioc = False for string in strings: ioc = ioc_tag(string, result, just_network=len(strings) > 1000) or ioc result.add_tag('file.string.decoded', string[:75]) if ioc: result.heuristic.add_signature_id('decoded_ioc') result.add_line(body.decode()) return result
def add_ip_tags(self): """ Adds tags for urls and ip addresses from given lists """ if self.url_list or self.ip_list: sec_iocs = ResultSection( "ViperMonkey has found the following IOCs:", parent=self.result, heuristic=Heuristic(4)) # Add Urls for url in set(self.url_list): sec_iocs.add_line(url) sec_iocs.add_tag('network.static.uri', url) try: parsed = urlparse(url) if not re.match(IP_ONLY_REGEX, parsed.hostname): sec_iocs.add_tag('network.static.domain', parsed.hostname) except Exception: pass # Add IPs for ip in set(self.ip_list): sec_iocs.add_line(ip) # Checking if IP ports also found and adding the corresponding tags if re.findall(":", ip): net_ip, net_port = ip.split(':') sec_iocs.add_tag('network.static.ip', net_ip) sec_iocs.add_tag('network.port', net_port) else: sec_iocs.add_tag('network.static.ip', ip)
def find_scripts_and_exes(apktool_out_dir: str, result: Result): scripts = [] executables = [] apks = [] # We are gonna do the full apktool output dir here but in case we want to do less, # you can edit the test_path list test_paths = [apktool_out_dir] for path in test_paths: for root, _, files in os.walk(path): for f in files: if f.endswith(".smali"): continue cur_file = os.path.join(root, f) file_type = fileinfo(cur_file)['type'] if "code/sh" in file_type: scripts.append(cur_file.replace(apktool_out_dir, '')) elif "executable/linux" in file_type: executables.append(cur_file.replace(apktool_out_dir, '')) elif "android/apk" in file_type: executables.append(cur_file.replace(apktool_out_dir, '')) if scripts: res_script = ResultSection("Shell script(s) found inside APK", parent=result, heuristic=Heuristic(1)) for script in sorted(scripts)[:20]: res_script.add_line(script) if len(scripts) > 20: res_script.add_line(f"and {len(scripts) - 20} more...") if executables: res_exe = ResultSection("Executable(s) found inside APK", parent=result, heuristic=Heuristic(2)) for exe in sorted(executables)[:20]: res_exe.add_line(exe) if len(executables) > 20: res_exe.add_line(f"and {len(executables) - 20} more...") if apks: res_apk = ResultSection("Other APKs where found inside the APK", parent=result, heuristic=Heuristic(19)) for apk in sorted(apks)[:20]: res_apk.add_line(apk) if len(apks) > 20: res_apk.add_line(f"and {len(apks) - 20} more...")
def check_file_name_anomalies(self, filename): """Filename anomalies detection""" is_double_ext, f_ext = self.fna_check_double_extension(filename) is_empty_filename = self.fna_check_empty_filename(filename, f_ext) too_many_whitespaces = self.fna_check_filename_ws(filename, f_ext) has_unicode_ext_hiding_ctrls = self.fna_check_unicode_bidir_ctrls(filename, f_ext) file_res = Result() if too_many_whitespaces or is_double_ext or has_unicode_ext_hiding_ctrls or is_empty_filename: res = ResultSection(title_text="File Name Anomalies", parent=file_res) # Tag filename as it might be of interest res.add_tag("file.name.extracted", filename) # Remove Unicode controls, if any, for reporting fn_no_controls = "".join( c for c in filename if c not in ["\u202E", "\u202B", "\u202D", "\u202A", "\u200E", "\u200F"] ) # Also add a line with "actual" file name res.add_line(f"Actual file name: {wrap_bidir_unicode_string(fn_no_controls)}") if too_many_whitespaces: sec = ResultSection("Too many whitespaces", parent=res, heuristic=Heuristic(1)) sec.add_tag("file.name.anomaly", "TOO_MANY_WHITESPACES") sec.add_tag("file.behavior", "File name has too many whitespaces") if is_double_ext: sec = ResultSection("Double file extension", parent=res, heuristic=Heuristic(2)) sec.add_tag("file.name.anomaly", "DOUBLE_FILE_EXTENSION") sec.add_tag("file.behavior", "Double file extension") if has_unicode_ext_hiding_ctrls: sec = ResultSection("Hidden launchable file extension", parent=res, heuristic=Heuristic(3)) sec.add_tag("file.name.anomaly", "UNICODE_EXTENSION_HIDING") sec.add_tag("file.behavior", "Real file extension hidden using unicode trickery") if is_empty_filename: sec = ResultSection("Empty Filename", parent=res, heuristic=Heuristic(4)) sec.add_tag("file.name.anomaly", "FILENAME_EMPTY_OR_ALL_SPACES") sec.add_tag("file.behavior", "File name is empty or all whitespaces") return file_res
def bbcrack_results(self, request: ServiceRequest) -> Optional[ResultSection]: """ Balbuzard's bbcrack XOR'd strings to find embedded patterns/PE files of interest Args: request: AL request object with result section Returns: The result section (with request.result as its parent) if one is created """ x_res = (ResultSection("BBCrack XOR'd Strings:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(2))) if request.deep_scan: xresult = bbcrack(request.file_contents, level=2) else: xresult = bbcrack(request.file_contents, level=1) xformat_string = '%-20s %-7s %-7s %-50s' xor_al_results = [] xindex = 0 for transform, regex, offset, score, smatch in xresult: if regex == 'EXE_HEAD': xindex += 1 xtemp_file = os.path.join( self.working_directory, f"EXE_HEAD_{xindex}_{offset}_{score}.unXORD") with open(xtemp_file, 'wb') as xdata: xdata.write(smatch) pe_extracted = self.pe_dump( request, xtemp_file, offset, file_string="xorpe_decoded", msg="Extracted xor file during FrakenStrings analysis.") if pe_extracted: xor_al_results.append( xformat_string % (str(transform), offset, score, "[PE Header Detected. " "See Extracted files]")) else: if not regex.startswith("EXE_"): x_res.add_tag(self.BBCRACK_TO_TAG.get(regex, regex), smatch) xor_al_results.append( xformat_string % (str(transform), offset, score, safe_str(smatch))) # Result Graph: if len(xor_al_results) > 0: xcolumn_names = ('Transform', 'Offset', 'Score', 'Decoded String') x_res.add_line(xformat_string % xcolumn_names) x_res.add_line(xformat_string % tuple('-' * len(s) for s in xcolumn_names)) x_res.add_lines(xor_al_results) request.result.add_section(x_res) return x_res return None
def add_image(self, path: str, name: str, description: str, classification: Optional[Classification] = None, ocr_heuristic_id: Optional[int] = None) -> dict: """ Add a image file to be viewed in the result section. :param path: Complete path to the image file :param name: Display name of the image file :param description: Descriptive text about the image file :param classification: Classification of the image file (default: service classification) :return: None """ with tempfile.NamedTemporaryFile(dir=self._working_directory, delete=False) as outtmp: with tempfile.NamedTemporaryFile(dir=self._working_directory, delete=False) as thumbtmp: # Load Image img = Image.open(path) # Force image format switch to prevent exploit to cross-over img_format = 'WEBP' if img.format == img_format: img_format = 'PNG' if img_format == "WEBP" and (img.height > WEBP_MAX_SIZE or img.width > WEBP_MAX_SIZE): # Maintain aspect ratio img.thumbnail((WEBP_MAX_SIZE, WEBP_MAX_SIZE), Image.ANTIALIAS) # Save and upload new image img.save(outtmp.name, format=img_format) img_res = self.task.add_supplementary(outtmp.name, name, description, classification, is_section_image=True) # Save and upload thumbnail img.thumbnail((128, 128)) img.save(thumbtmp.name, format=img_format, optimize=True) thumb_res = self.task.add_supplementary(thumbtmp.name, f"{name}.thumb", f"{description} (thumbnail)", classification, is_section_image=True) data = {'img': {k: v for k, v in img_res.items() if k in ['name', 'description', 'sha256']}, 'thumb': {k: v for k, v in thumb_res.items() if k in ['name', 'description', 'sha256']}} if ocr_heuristic_id: try: detections = ocr_detections(path) if detections: heuristic = Heuristic(ocr_heuristic_id, signatures={k: len(v) for k, v in detections.items()}) ocr_section = ResultKeyValueSection(f'Suspicious strings found during OCR analysis on file {name}') ocr_section.set_heuristic(heuristic) for k, v in detections.items(): ocr_section.set_item(k, v) data['ocr_section'] = ocr_section except ImportError as e: self.log.warning(str(e)) return data
def generate_results(presults, result, analysis_results, request): if presults['unpacked']: result.add_section( ResultSection("Successully unpacked binary.", heuristic=Heuristic(1))) for r in presults['unpacked_samples']: if len(r['malware_id']) > 0: for rm in r['malware_id']: section = ResultSection("{} - {}".format( r['sha256'], rm['name']), heuristic=Heuristic(2)) section.add_line("Details: {}".format(rm['reference'])) result.add_section(section) request.add_extracted(r['data_path'], r['sha256'], f'Unpacked from {request.sha256}') result.add_section( ResultSection(f"UNPACME Detailed Results", body_format=BODY_FORMAT.JSON, body=json.dumps(analysis_results['results']))) return result, request
def check_for_b64(self, data, section): """Search and decode base64 strings in sample data. Args: data: Data to be parsed section: Sub-section to be modified if base64 found Returns: decoded: Boolean which is true if base64 found """ b64_matches = [] # b64_matches_raw will be used for replacing in case b64_matches are modified b64_matches_raw = [] decoded_param = data decoded = False for b64_match in re.findall( '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})', re.sub('\x3C\x00\x20{2}\x00', '', data)): b64 = b64_match.replace('\n', '').replace('\r', '').replace( ' ', '').replace('<', '') uniq_char = ''.join(set(b64)) if len(uniq_char) > 6: if len(b64) >= 16 and len(b64) % 4 == 0: b64_matches.append(b64) b64_matches_raw.append(b64_match) for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw): try: base64data = binascii.a2b_base64(b64_string) # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex base64data_decoded = ' ' + base64data.decode('utf-16').encode( 'ascii', 'ignore') # Replace base64 from param with decoded string decoded_param = re.sub(b64_string_raw, base64data_decoded, decoded_param) decoded = True except Exception: pass if decoded: decoded_section = ResultSection('Possible Base64 found', parent=section, heuristic=Heuristic(5)) decoded_section.add_line( f'Possible Base64 Decoded Parameters: {decoded_param}') self.find_ip(decoded_param) return decoded
def embedded_pe_results( self, request: ServiceRequest) -> Optional[ResultSection]: """ Finds, extracts and reports embedded executables Args: request: AL request object with result section Returns: The result section (with request.result as its parent) if one is created """ # PE Strings pat_exedos = rb'(?s)This program cannot be run in DOS mode' pat_exeheader = rb'(?s)MZ.{32,1024}PE\000\000.+' embedded_pe = False for pos_exe in re.findall(pat_exeheader, request.file_contents[1:]): if re.search(pat_exedos, pos_exe): pe_sha256 = hashlib.sha256(pos_exe).hexdigest() temp_file = os.path.join(self.working_directory, "EXE_TEMP_{}".format(pe_sha256)) with open(temp_file, 'wb') as pedata: pedata.write(pos_exe) embedded_pe = embedded_pe or self.pe_dump( request, temp_file, offset=0, file_string="embed_pe", msg="PE header strings discovered in sample", fail_on_except=True) # Report embedded PEs if any are found if embedded_pe: return ResultSection( "Embedded PE header discovered in sample. See extracted files.", heuristic=Heuristic(3), parent=request.result) return None
def execute(self, request): request.result = Result() request.set_service_context(self.get_tool_version()) temp_filename = request.file_path filename = os.path.basename(temp_filename) extract_dir = os.path.join(self.working_directory, f"{filename}_extracted") decompiled_dir = os.path.join(self.working_directory, f"{filename}_decompiled") file_res = request.result new_files = [] supplementary_files = [] imp_res_list = [] res_list = [] if request.file_type == "java/jar": self.decompile_jar(temp_filename, decompiled_dir) if self.jar_extract(temp_filename, extract_dir): # Analysis properties self.classloader_found = 0 self.security_found = 0 self.url_found = 0 self.runtime_found = 0 self.applet_found = 0 self.manifest_tags = [] self.signature_block_certs = [] def analyze_file(root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir): cur_file_path = os.path.join(root.decode('utf-8'), cf.decode('utf-8')) with open(cur_file_path, "rb") as cur_file: start_bytes = cur_file.read(24) ############################## # Executables in JAR ############################## cur_ext = os.path.splitext(cf)[1][1:].upper() if start_bytes[:2] == b"MZ": mz_res = dict( title_text=f"Embedded executable file found: {cf} " "There may be a malicious intent.", heur_id=1, tags=[('file.behavior', "Embedded PE")], score_condition=APPLET_MZ, ) imp_res_list.append(mz_res) ############################## # Launchable in JAR ############################## elif cur_ext in G_LAUNCHABLE_EXTENSIONS: l_res = dict( title_text=f"Launch-able file type found: {cf}" "There may be a malicious intent.", heur_id=2, tags=[('file.behavior', "Launch-able file in JAR")], score_condition=APPLET_MZ, ) imp_res_list.append(l_res) if cur_file_path.upper().endswith('.CLASS'): self.analyse_class_file(file_res, cf, cur_file, cur_file_path, start_bytes, imp_res_list, supplementary_files, decompiled_dir, extract_dir) for root, _, files in os.walk(extract_dir.encode('utf-8')): logging.info(f"Extracted: {root} - {files}") # if the META-INF folder is encountered if root.upper().endswith(b'META-INF'): # only top level meta self.analyse_meta_information(file_res, root, supplementary_files, extract_dir) continue with ThreadPoolExecutor() as executor: for cf in files: executor.submit(analyze_file, root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir) res = ResultSection("Analysis of the JAR file") res_meta = ResultSection("[Meta Information]", parent=res) if len(self.manifest_tags) > 0: res_manifest = ResultSection("Manifest File Information Extract", parent=res_meta) for tag, val in self.manifest_tags: res_manifest.add_tag(tag, val) for res_cert in self.signature_block_certs: res_meta.add_subsection(res_cert) if self.runtime_found > 0 \ or self.applet_found > 0 \ or self.classloader_found > 0 \ or self.security_found > 0 \ or self.url_found > 0: res.add_line("All suspicious class files were saved as supplementary files.") res_class = ResultSection("[Suspicious classes]", parent=res) if self.runtime_found > 0: ResultSection("Runtime Found", body=f"java/lang/Runtime: {self.runtime_found}", heuristic=Heuristic(10), parent=res_class) if self.applet_found > 0: ResultSection("Applet Found", body=f"java/applet/Applet: {self.applet_found}", heuristic=Heuristic(6), parent=res_class) if self.classloader_found > 0: ResultSection("Classloader Found", body=f"java/lang/ClassLoader: {self.classloader_found}", heuristic=Heuristic(7), parent=res_class) if self.security_found > 0: ResultSection("Security Found", body=f"java/security/*: {self.security_found}", heuristic=Heuristic(8), parent=res_class) if self.url_found > 0: ResultSection("URL Found", body=f"java/net/URL: {self.url_found}", heuristic=Heuristic(9), parent=res_class) res_list.append(res) # Add results if any self.recurse_add_res(file_res, imp_res_list, new_files) for res in res_list: file_res.add_section(res) # Submit embedded files if len(new_files) > 0: new_files = sorted(list(set(new_files))) txt = f"Extracted from 'JAR' file {filename}" for embed in new_files: request.add_extracted(embed, embed.replace(extract_dir + "/", "").replace(decompiled_dir + "/", ""), txt, safelist_interface=self.api_interface) if len(supplementary_files) > 0: supplementary_files = sorted(list(set(supplementary_files))) for path, name, desc in supplementary_files: request.add_supplementary(path, name, desc)
def validate_certs(apktool_out_dir: str, result: Result): has_cert = False for root, _, files in os.walk(os.path.join(apktool_out_dir, "original", "META-INF")): for f in files: cur_file = os.path.join(root, f) stdout, stderr = Popen(["keytool", "-printcert", "-file", cur_file], stderr=PIPE, stdout=PIPE).communicate() stdout = safe_str(stdout) if stdout: if "keytool error" not in stdout: has_cert = True issuer = "" owner = "" country = "" valid_from = "" valid_to = "" valid_year_end = 0 valid_year_start = 0 valid_until_date = time.time() play_store_min = 'Sat Oct 22 00:00:00 2033' play_store_min_valid_date = time.mktime(time.strptime(play_store_min, "%a %b %d %H:%M:%S %Y")) for line in stdout.splitlines(): if "Owner:" in line: owner = line.split(": ", 1)[1] country = owner.split("C=") if len(country) != 1: country = country[1] else: country = "" if "Issuer:" in line: issuer = line.split(": ", 1)[1] if "Valid from:" in line: valid_from = line.split(": ", 1)[1].split(" until:")[0] valid_to = line.rsplit(": ", 1)[1] valid_from_splitted = valid_from.split(" ") valid_to_splitted = valid_to.split(" ") valid_year_start = int(valid_from_splitted[-1]) valid_year_end = int(valid_to_splitted[-1]) valid_until = " ".join(valid_to_splitted[:-2] + valid_to_splitted[-1:]) valid_until_date = time.mktime(time.strptime(valid_until, "%a %b %d %H:%M:%S %Y")) res_cert = ResultSection("Certificate Analysis", body=safe_str(stdout), parent=result, body_format=BODY_FORMAT.MEMORY_DUMP) res_cert.add_tag('cert.valid.start', valid_from) res_cert.add_tag('cert.valid.end', valid_to) res_cert.add_tag('cert.issues', issuer) res_cert.add_tag('cert.owner', owner) if owner == issuer: ResultSection("Certificate is self-signed", parent=res_cert, heuristic=Heuristic(10)) if not country: ResultSection("Certificate owner has no country", parent=res_cert, heuristic=Heuristic(11)) if valid_year_start < 2008: ResultSection("Certificate valid before first android release", parent=res_cert, heuristic=Heuristic(12)) if valid_year_start > valid_year_end: ResultSection("Certificate expires before validity date starts", parent=res_cert, heuristic=Heuristic(16)) if (valid_year_end - valid_year_start) > 30: ResultSection("Certificate valid more then 30 years", parent=res_cert, heuristic=Heuristic(13)) if valid_until_date < play_store_min_valid_date: ResultSection("Certificate not valid until minimum valid playstore date", parent=res_cert, heuristic=Heuristic(20)) if country: try: int(country) is_int_country = True except Exception: is_int_country = False if len(country) != 2 or is_int_country: ResultSection("Invalid country code in certificate owner", parent=res_cert, heuristic=Heuristic(14)) if f != "CERT.RSA": ResultSection(f"Certificate name not using conventional name: {f}", parent=res_cert, heuristic=Heuristic(15)) if not has_cert: ResultSection("This APK is not signed", parent=result, heuristic=Heuristic(9))
def execute(self, request): self.result = Result() request.result = self.result self.request = request self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] vmonkey_err = False actions = [] external_functions = [] tmp_iocs = [] output_results = {} # Running ViperMonkey try: cmd = " ".join([ PYTHON2_INTERPRETER, os.path.join(os.path.dirname(__file__), 'vipermonkey_compat.py2'), request.file_path ]) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) stdout, _ = p.communicate() # Read output if stdout: for l in stdout.splitlines(): if l.startswith(b"{") and l.endswith(b"}"): try: output_results = json.loads(l) except UnicodeDecodeError: output_results = json.loads( l.decode("utf-8", "replace")) break # Checking for tuple in case vmonkey return is None # If no macros found, return is [][], if error, return is None if type(output_results.get('vmonkey_values')) == dict: ''' Structure of variable "actions" is as follows: [action, description, parameter] action: 'Found Entry Point', 'Execute Command', etc... parameter: Parameters for function description: 'Shell Function', etc... external_functions is a list of built-in VBA functions that were called ''' actions = output_results['vmonkey_values']['actions'] external_functions = output_results['vmonkey_values'][ 'external_funcs'] tmp_iocs = output_results['vmonkey_values']['tmp_iocs'] else: vmonkey_err = True else: vmonkey_err = True except Exception: raise # Add vmonkey log as a supplemental file if 'stdout' in output_results: temp_log_copy = os.path.join( tempfile.gettempdir(), f'{request.sid}_vipermonkey_output.log') with open(temp_log_copy, "w") as temp_log_file: temp_log_file.write(output_results['stdout']) self.request.add_supplementary(temp_log_copy, 'vipermonkey_output.log', 'ViperMonkey log output') if vmonkey_err is True: ResultSection( 'ViperMonkey has encountered an error, please check "vipermonkey_output.log"', parent=self.result, heuristic=Heuristic(1)) if len(actions) > 0: # Creating action section action_section = ResultSection('Recorded Actions:', parent=self.result) action_section.add_tag('technique.macro', 'Contains VBA Macro(s)') for action in actions: # Creating action sub-sections for each action cur_action = action[0] cur_description = action[2] if action[2] else cur_action # Entry point actions have an empty description field, re-organize result section for this case if cur_action == 'Found Entry Point': sub_action_section = ResultSection('Found Entry Point', parent=action_section) sub_action_section.add_line(action[1]) else: # Action's description will be the sub-section name sub_action_section = ResultSection(cur_description, parent=action_section) if cur_description == 'Shell function': sub_action_section.set_heuristic(2) # Parameters are sometimes stored as a list, account for this if isinstance(action[1], list): for item in action[1]: # Parameters includes more than strings (booleans for example) if isinstance(item, str): # Check for PowerShell self.extract_powershell( item, sub_action_section) # Join list items into single string param = ', '.join(str(a) for a in action[1]) else: param = action[1] # Parameters includes more than strings (booleans for example) if isinstance(param, str): self.extract_powershell(param, sub_action_section) sub_action_section.add_line(f'Action: {cur_action}') sub_action_section.add_line(f'Parameters: {param}') # If decoded is true, possible base64 string has been found self.check_for_b64(param, sub_action_section) # Add urls/ips found in parameter to respective lists self.find_ip(param) # Check tmp_iocs res_temp_iocs = ResultSection('Runtime temporary IOCs') for ioc in tmp_iocs: self.extract_powershell(ioc, res_temp_iocs) self.check_for_b64(ioc, res_temp_iocs) self.find_ip(ioc) if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body: self.result.add_section(res_temp_iocs) # Add PowerShell score/tag if found if self.found_powershell: ResultSection('Discovered PowerShell code in file', parent=self.result, heuristic=Heuristic(3)) # Add url/ip tags self.add_ip_tags() # Create section for built-in VBA functions called if len(external_functions) > 0: vba_builtin_dict = {} dict_path = os.path.join(os.path.dirname(__file__), 'VBA_built_ins.txt') with open(dict_path, 'r') as f: for line in f: line = line.strip() if re.search(r'^#', line): continue if line: line = line.split(';') vba_builtin_dict[line[0].strip()] = line[1].strip() external_func_section = ResultSection( 'VBA functions called', body_format=BODY_FORMAT.MEMORY_DUMP, parent=self.result) for func in external_functions: if func in vba_builtin_dict: external_func_section.add_line(func + ': ' + vba_builtin_dict[func]) else: external_func_section.add_line(func)
def find_network_indicators(apktool_out_dir: str, result: Result): # Whitelist skip_list = [ "android.intent", "com.google", "com.android", ] indicator_whitelist = [ 'google.to', 'google.ttl', 'google.delay', 'google_tagmanager.db', 'gtm_urls.db', 'gtm.url', 'google_tagmanager.db', 'google_analytics_v4.db', 'Theme.Dialog.Alert', 'popupLocationInfo.gravity', 'popupLocationInfo.displayId', 'popupLocationInfo.left', 'popupLocationInfo.top', 'popupLocationInfo.right', 'popupLocationInfo.bottom', 'googleads.g.doubleclick.net', 'ad.doubleclick.net', '.doubleclick.net', '.googleadservices.com', '.googlesyndication.com', 'android.hardware.type.watch', 'mraid.js', 'google_inapp_purchase.db', 'mobileads.google.com', 'mobileads.google.com', 'share_history.xml', 'share_history.xml', 'activity_choser_model_history.xml', 'FragmentPager.SavedState{', 'android.remoteinput.results', 'android.people', 'android.picture', 'android.icon', 'android.text', 'android.title', 'android.title.big', 'FragmentTabHost.SavedState{', 'android.remoteinput.results', 'android.remoteinput.results', 'android.remoteinput.results', 'libcore.icu.ICU', ] file_list = [] # Indicators url_list = [] domain_list = [] ip_list = [] email_list = [] # Build dynamic whitelist smali_dir = os.path.join(apktool_out_dir, "smali") for root, dirs, files in os.walk(smali_dir): if not files: continue else: skip_list.append(root.replace(smali_dir + "/", "").replace("/", ".")) for cdir in dirs: skip_list.append(os.path.join(root, cdir).replace(smali_dir + "/", "").replace("/", ".")) asset_dir = os.path.join(apktool_out_dir, "assets") if os.path.exists(asset_dir): for root, dirs, files in os.walk(asset_dir): if not files: continue else: for asset_file in files: file_list.append(asset_file) skip_list = list(set(skip_list)) # Find indicators proc = Popen(['grep', '-ER', r'(([[:alpha:]](-?[[:alnum:]])*)\.)*[[:alpha:]](-?[[:alnum:]])+\.[[:alpha:]]{2,}', smali_dir], stdout=PIPE, stderr=PIPE) grep, _ = proc.communicate() for line in safe_str(grep).splitlines(): file_path, line = line.split(":", 1) if "const-string" in line or "Ljava/lang/String;" in line: data = line.split("\"", 1)[1].split("\"")[0] data_low = data.lower() data_split = data.split(".") if data in file_list: continue elif data in indicator_whitelist: continue elif data.startswith("/"): continue elif data_low.startswith("http://") or data_low.startswith('ftp://') or data_low.startswith('https://'): url_list.append(data) elif len(data_split[0]) < len(data_split[-1]) and len(data_split[-1]) > 3: continue elif data.startswith('android.') and data_low != data: continue elif "/" in data and "." in data and data.index("/") < data.index("."): continue elif " " in data: continue elif data_split[0] in ['com', 'org', 'net', 'java']: continue elif data_split[-1].lower() in ['so', 'properties', 'zip', 'read', 'id', 'store', 'name', 'author', 'sh', 'soccer', 'fitness', 'news', 'video']: continue elif data.endswith("."): continue else: do_skip = False for skip in skip_list: if data.startswith(skip): do_skip = True break if do_skip: continue data = data.strip(".") if is_valid_domain(data): domain_list.append(data) elif is_valid_ip(data): ip_list.append(data) elif is_valid_email(data): email_list.append(data) url_list = list(set(url_list)) for url in url_list: dom_ip = url.split("//")[1].split("/")[0] if ":" in dom_ip: dom_ip = dom_ip.split(":")[0] if is_valid_ip(dom_ip): ip_list.append(dom_ip) elif is_valid_domain(dom_ip): domain_list.append(dom_ip) ip_list = list(set(ip_list)) domain_list = list(set(domain_list)) email_list = list(set(email_list)) if url_list or ip_list or domain_list or email_list: res_net = ResultSection("Network indicator(s) found", parent=result, heuristic=Heuristic(3)) if url_list: res_url = ResultSection("Found urls in the decompiled code", parent=res_net) count = 0 for url in url_list: count += 1 if count <= 20: res_url.add_line(url) res_url.add_tag('network.static.uri', url) if count > 20: res_url.add_line(f"and {count - 20} more...") if ip_list: res_ip = ResultSection("Found IPs in the decompiled code", parent=res_net) count = 0 for ip in ip_list: count += 1 if count <= 20: res_ip.add_line(ip) res_ip.add_tag('network.static.ip', ip) if count > 20: res_ip.add_line(f"and {count - 20} more...") if domain_list: res_domain = ResultSection("Found domains in the decompiled code", parent=res_net) count = 0 for domain in domain_list: count += 1 if count <= 20: res_domain.add_line(domain) res_domain.add_tag('network.static.domain', domain) if count > 20: res_domain.add_line(f"and {count - 20} more...") if email_list: res_email = ResultSection("Found email addresses in the decompiled code", parent=res_net) count = 0 for email in email_list: count += 1 if count <= 20: res_email.add_line(email) res_email.add_tag('network.email.address', email) if count > 20: res_email.add_line(f"and {count - 20} more...")
def run_badging_analysis(self, apk_file: str, result: Result): badging_args = ['d', 'badging', apk_file] badging, errors = self.run_appt(badging_args) if not badging: return res_badging = ResultSection("Android application details") libs = [] permissions = [] components = [] features = [] pkg_version = None for line in badging.splitlines(): if line.startswith("package:"): pkg_name = line.split("name='")[1].split("'")[0] pkg_version = line.split("versionCode='")[1].split("'")[0] res_badging.add_line(f"Package: {pkg_name} v.{pkg_version}") res_badging.add_tag('file.apk.pkg_name', pkg_name) res_badging.add_tag('file.apk.app.version', pkg_version) if line.startswith("sdkVersion:"): min_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Min SDK: {min_sdk}") res_badging.add_tag('file.apk.sdk.min', min_sdk) if line.startswith("targetSdkVersion:"): target_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Target SDK: {target_sdk}") res_badging.add_tag('file.apk.sdk.target', target_sdk) if line.startswith("application-label:"): label = line.split(":'")[1][:-1] res_badging.add_line(f"Default Label: {label}") res_badging.add_tag('file.apk.app.label', label) if line.startswith("launchable-activity:"): launch = line.split("name='")[1].split("'")[0] res_badging.add_line(f"Launchable activity: {launch}") res_badging.add_tag('file.apk.activity', launch) if line.startswith("uses-library-not-required:"): lib = line.split(":'")[1][:-1] if lib not in libs: libs.append(lib) if line.startswith("uses-permission:") or line.startswith("uses-implied-permission:"): perm = line.split("name='")[1].split("'")[0] if perm not in permissions: permissions.append(perm) if line.startswith("provides-component:"): component = line.split(":'")[1][:-1] if component not in components: components.append(component) if "uses-feature:" in line or "uses-implied-feature:" in line: feature = line.split("name='")[1].split("'")[0] if feature not in features: features.append(feature) if pkg_version is not None: pkg_version = int(pkg_version) if pkg_version < 15: ResultSection("Package version is suspiciously low", parent=res_badging, heuristic=Heuristic(17)) elif pkg_version > 999999999: ResultSection("Package version is suspiciously high", parent=res_badging, heuristic=Heuristic(17)) if libs: res_lib = ResultSection("Libraries used", parent=res_badging) for lib in libs: res_lib.add_line(lib) res_lib.add_tag('file.apk.used_library', lib) if permissions: res_permissions = ResultSection("Permissions used", parent=res_badging) dangerous_permissions = [] unknown_permissions = [] for perm in permissions: if perm in ALL_ANDROID_PERMISSIONS: if 'dangerous' in ALL_ANDROID_PERMISSIONS[perm]: dangerous_permissions.append(perm) else: res_permissions.add_line(perm) res_permissions.add_tag('file.apk.permission', perm) else: unknown_permissions.append(perm) if len(set(permissions)) < len(permissions): ResultSection("Some permissions are defined more then once", parent=res_badging, heuristic=Heuristic(18)) if dangerous_permissions: res_dangerous_perm = ResultSection("Dangerous permissions used", parent=res_badging, heuristic=Heuristic(4)) for perm in dangerous_permissions: res_dangerous_perm.add_line(perm) res_dangerous_perm.add_tag('file.apk.permission', perm) if unknown_permissions: res_unknown_perm = ResultSection("Unknown permissions used", parent=res_badging, heuristic=Heuristic(5)) for perm in unknown_permissions: res_unknown_perm.add_line(perm) res_unknown_perm.add_tag('file.apk.permission', perm) if features: res_features = ResultSection("Features used", parent=res_badging) for feature in features: res_features.add_line(feature) res_features.add_tag('file.apk.feature', feature) if components: res_components = ResultSection("Components provided", parent=res_badging) for component in components: res_components.add_line(component) res_components.add_tag('file.apk.provides_component', component) result.add_section(res_badging)
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop 3 embedded file which two generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in [ 'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06' ]: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultTextSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines( [get_random_phrase() for _ in range(random.randint(1, 5))]) # You can tag data to a section, tagging is used to to quickly find defining information about a file text_section.add_tag("attribution.implant", "ResultSample") # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(3, signature="sig_one") # You can attach attack ids to heuristics after they where defined text_section.heuristic.add_attack_id( random.choice(list(software_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(attack_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(group_map.keys()))) text_section.heuristic.add_attack_id( random.choice(list(revoke_map.keys()))) # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how # many time the signature fired by setting its frequency. If you call add_signature_id twice with the # same signature, this will effectively increase the frequency of the signature. text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2) text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3) text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_four", score=0) # The heuristic for text_section should have the following properties # 1. 1 attack ID: T1066 # 2. 4 signatures: sig_one, sig_two, sig_three and sig_four # 3. Signature frequencies are cumulative therefor they will be as follow: # - sig_one = 1 # - sig_two = 5 # - sig_three = 2 # - sig_four = 1 # 4. The score used by each heuristic is driven by the following rules: signature_score_map is higher # priority, then score value for the add_signature_id is in second place and finally the default # heuristic score is use. Therefor the score used to calculate the total score for the text_section is # as follow: # - sig_one: 10 -> heuristic default score # - sig_two: 20 -> score provided by the function add_signature_id # - sig_three: 30 -> score provided by the heuristic map # - sig_four: 40 -> score provided by the heuristic map because it's higher priority than the # function score # 5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210 # Make sure you add your section to the result result.add_section(text_section) # Even if the section was added to the results you can still modify it by adding a subsection for example ResultSection( "Example of sub-section without a body added later in processing", parent=text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 cmap_values = [random.random() * cmap_max for _ in range(50)] # The classification of a section can be set to any valid classification for your system section_color_map = ResultGraphSection( "Example of colormap result section", classification=cl_engine.RESTRICTED) section_color_map.set_colormap(cmap_min, cmap_max, cmap_values) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultURLSection('Example of a simple url section') url_section.add_url(f"https://{random_host}/", name="Random url!") # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed hosts = [get_random_host() for _ in range(2)] # A heuristic can fire more then once without being associated to a signature url_heuristic = Heuristic(4, frequency=len(hosts)) url_sub_section = ResultURLSection( 'Example of a url sub-section with multiple links', heuristic=url_heuristic, classification=cl_engine.RESTRICTED) for host in hosts: url_sub_section.add_url(f"https://{host}/") url_sub_section.add_tag("network.static.domain", host) # You can keep nesting sections if you really need to ips = [get_random_ip() for _ in range(3)] url_sub_sub_section = ResultURLSection( 'Exemple of a two level deep sub-section') for ip in ips: url_sub_sub_section.add_url(f"https://{ip}/") url_sub_sub_section.add_tag("network.static.ip", ip) # Since url_sub_sub_section is a sub-section of url_sub_section # we will add it as a sub-section of url_sub_section not to the main result itself url_sub_section.add_subsection(url_sub_sub_section) # Invalid sections will be ignored, and an error will apear in the logs # Sub-sections of invalid sections will be ignored too invalid_section = ResultSection("") ResultSection( "I won't make it to the report because my parent is invalid :(", parent=invalid_section) url_sub_section.add_subsection(invalid_section) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump( b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!" ) memdump_section = ResultMemoryDumpSection( 'Example of a memory dump section', body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a dictionary (only str, int, and booleans are allowed) kv_section = ResultKeyValueSection( 'Example of a KEY_VALUE section') # You can add items individually kv_section.set_item('key', "value") # Or simply add them in bulk kv_section.update_items({ "a_str": "Some string", "a_bool": False, "an_int": 102, }) result.add_section(kv_section) # ================================================================== # ORDERED_KEY_VALUE section: # This section provides the same functionality as the KEY_VALUE section except the order of the fields # are garanteed to be preserved in the order in which the fields are added to the section. Also with # this section, you can repeat the same key name multiple times oredered_kv_section = ResultOrderedKeyValueSection( 'Example of an ORDERED_KEY_VALUE section') # You can add items individually for x in range(random.randint(3, 6)): oredered_kv_section.add_item(f'key{x}', f"value{x}") result.add_section(oredered_kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [{ "d1_key": "val", "d1_key2": "val2" }, { "d2_key": "val", "d2_key2": "val2" }], "bool": True } } json_section = ResultJSONSection('Example of a JSON section') # You can set the json result to a specific value json_section.set_json(json_body) # You can also update specific parts after the fact json_section.update_json({ 'an_int': 1000, 'updated_key': 'updated_value' }) result.add_section(json_section) # ================================================================== # PROCESS_TREE section: # This section allows the service writer to list a bunch of dictionary objects that have nested lists # of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore # each dictionary must have be of the following format: # { # "process_pid": int, # "process_name": str, # "command_line": str, # "signatures": {} This dict has the signature name as a key and the score as it's value # "children": [] NB: This list either is empty or contains more dictionaries that have the same # structure # } process_tree_section = ResultProcessTreeSection( 'Example of a PROCESS_TREE section') # You can use the ProcessItem class to create the processes to add to the result section evil_process = ProcessItem(123, "evil.exe", "c:\\evil.exe") evil_process_child_1 = ProcessItem( 321, "takeovercomputer.exe", "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff") # You can add child processes to the ProcessItem objects evil_process_child_1.add_child_process( ProcessItem( 456, "evenworsethanbefore.exe", "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad", signatures={ "one": 10, "two": 10, "three": 10 })) evil_process_child_1.add_child_process( ProcessItem(234, "badfile.exe", "C:\\badfile.exe -k nothing_to_see_here", signatures={ "one": 1000, "two": 10, "three": 10, "four": 10, "five": 10 })) # You can add signatures that hit on a ProcessItem Object evil_process_child_1.add_signature('one', 250) # Or even directly create the ProcessItem object with the signature in it evil_process_child_2 = ProcessItem( 345, "benignexe.exe", "C:\\benignexe.exe -f \"just kidding, i'm evil\"", signatures={"one": 2000}) # You can also add counts for network, file and registry events to a ProcessItem object evil_process_child_2.add_network_events(4) evil_process_child_2.add_file_events(7000) evil_process_child_2.add_registry_events(10) # You can also indicate if the process tree item has been safelisted benign_process = ProcessItem(678, "trustme.exe", "C:\\trustme.exe") benign_process.safelist() evil_process.add_child_process(evil_process_child_1) evil_process.add_child_process(evil_process_child_2) # Add your processes to the result section via the add_process function process_tree_section.add_process(evil_process) process_tree_section.add_process( ProcessItem(987, "runzeroday.exe", "C:\\runzeroday.exe -f insert_bad_spelling")) process_tree_section.add_process(benign_process) result.add_section(process_tree_section) # ================================================================== # TABLE section: # This section allows the service writer to have their content displayed in a table format in the UI # The body argument must be a list [] of dict {} objects. A dict object can have a key value pair # where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested # table within a cell. table_section = ResultTableSection('Example of a TABLE section') # Use the TableRow class to help adding row to the Table section table_section.add_row( TableRow(a_str="Some string1", extra_column_here="confirmed", a_bool=False, an_int=101)) table_section.add_row( TableRow( { "a_str": "Some string2", "a_bool": True, "an_int": "to_be_overriden_by_kwargs" }, an_int=102)) table_section.add_row( TableRow(a_str="Some string3", a_bool=False, an_int=103)) # Valid values for the items in the TableRow are: str, int, bool, None, or dict of those values table_section.add_row( TableRow( { "a_str": "Some string4", "a_bool": None, "an_int": -1000000000000000000 }, { "extra_column_there": "confirmed", "nested_key_value_pair": { "a_str": "Some string3", "a_bool": False, "nested_kv_thats_too_deep": { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, } })) result.add_section(table_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # Embedded files can also have their own classification! fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"CLASSIFIED!!!__" + data.encode()) request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look", classification=cl_engine.RESTRICTED) # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(url_sub_section.body) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Zeroize on safe tags # When this feature is turned on, the section will get its score set to zero if all its tags # were safelisted by the safelisting engine zero_section = ResultSection('Example of zeroize-able section', zeroize_on_tag_safe=True) zero_section.set_heuristic(2) zero_section.add_line( "This section will have a zero score if all tags are safelisted." ) zero_section.add_tag('network.static.ip', '127.0.0.1') result.add_section(zero_section) # ================================================================== # Auto-collapse # When this feature is turned on, the section will be collapsed when first displayed collapse_section = ResultSection( 'Example of auto-collapse section', auto_collapse=True) collapse_section.set_heuristic(2) collapse_section.add_line( "This section was collapsed when first loaded in the UI") result.add_section(collapse_section) # ================================================================== # Image Section # This type of section allows the service writer to display images to the user image_section = ResultImageSection(request, 'Example of Image section') for x in range(6): image_section.add_image(f'data/000{x+1}.jpg', f'000{x+1}.jpg', f'ResultSample screenshot 000{x+1}', ocr_heuristic_id=6) result.add_section(image_section) # ================================================================== # Multi Section # This type of section allows the service writer to display multiple section types # in the same result section. Here's a concrete exemple of this: multi_section = ResultMultiSection( 'Example of Multi-typed section') multi_section.add_section_part( TextSectionBody( body="We have detected very high entropy multiple sections " "of your file, this section is most-likely packed or " "encrypted.\n\nHere are affected sections:")) section_count = random.randint(1, 4) for x in range(section_count): multi_section.add_section_part( KVSectionBody(section_name=f".UPX{x}", offset=f'0x00{8+x}000', size='4196 bytes')) graph_part = GraphSectionBody() graph_part.set_colormap( 0, 8, [7 + random.random() for _ in range(20)]) multi_section.add_section_part(graph_part) if x != section_count - 1: multi_section.add_section_part(DividerSectionBody()) multi_section.add_tag("file.pe.sections.name", f".UPX{x}") multi_section.set_heuristic(5) result.add_section(multi_section) # ================================================================== # Propagate temporary submission data to other services # Sometimes two service can work in tandem were one extra some piece of information the other # one uses to do it's work. This is how a service can set temporary data that other # services that subscribe to can use. request.temp_submission_data['kv_section'] = kv_section.body request.temp_submission_data[ 'process_tree_section'] = process_tree_section.body request.temp_submission_data['url_section'] = url_sub_section.body # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result
def hex_results(self, request, patterns): """ Finds and reports long ascii hex strings Args: request: AL request object with result section patterns: PatternMatch object """ asciihex_file_found = False asciihex_dict = {} asciihex_bb_dict = {} hex_pat = re.compile(b'((?:[0-9a-fA-F]{2}[\r]?[\n]?){16,})') for hex_match in re.findall(hex_pat, request.file_contents): hex_string = hex_match.replace(b'\r', b'').replace(b'\n', b'') afile_found, asciihex_results = self.unhexlify_ascii( request, hex_string, request.file_type, patterns) if afile_found: asciihex_file_found = True if asciihex_results != b"": for ask, asi in asciihex_results.items(): if ask.startswith('BB_'): # Add any xor'd content to its own result set ask = ask.split('_', 1)[1] if ask not in asciihex_bb_dict: asciihex_bb_dict[ask] = [] asciihex_bb_dict[ask].append(asi) else: if ask not in asciihex_dict: asciihex_dict[ask] = [] asciihex_dict[ask].append(asi) # Report Ascii Hex Encoded Data: if asciihex_file_found: asciihex_emb_res = (ResultSection( "Found Large Ascii Hex Strings in Non-Executable:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(7), parent=request.result)) asciihex_emb_res.add_line( "Extracted possible ascii-hex object(s). See extracted files.") if len(asciihex_dict) > 0: # Different scores are used depending on whether the file is a document heuristic = Heuristic(8) if request.file_type.startswith("document"): heuristic = Heuristic(10) asciihex_res = (ResultSection("ASCII HEX DECODED IOC Strings:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=heuristic, parent=request.result)) for k, l in sorted(asciihex_dict.items()): for i in l: for ii in i: asciihex_res.add_line( f"Found {k.replace('_', ' ')} decoded HEX string: {ii}" ) if len(asciihex_bb_dict) > 0: asciihex_bb_res = (ResultSection( "ASCII HEX AND XOR DECODED IOC Strings:", heuristic=Heuristic(9), parent=request.result)) xindex = 0 for k, l in sorted(asciihex_bb_dict.items()): for i in l: for kk, ii in i.items(): xindex += 1 asx_res = (ResultSection(f"Result {xindex}", parent=asciihex_bb_res)) asx_res.add_line( f"Found {k.replace('_', ' ')} decoded HEX string, masked with " f"transform {ii[1]}:") asx_res.add_line("Decoded XOR string:") asx_res.add_line(ii[0]) asx_res.add_line("Original ASCII HEX String:") asx_res.add_line(kk) asciihex_bb_res.add_tag(k, ii[0])
def hex_results(self, request: ServiceRequest, patterns: PatternMatch) -> None: """ Finds and reports long ascii hex strings Args: request: AL request object with result section patterns: PatternMatch object """ asciihex_file_found = False asciihex_dict: Dict[str, Set[str]] = {} asciihex_bb_dict: Dict[str, Set[Tuple[bytes, bytes, str]]] = {} hex_pat = re.compile(b'((?:[0-9a-fA-F]{2}[\r]?[\n]?){16,})') for hex_match in re.findall(hex_pat, request.file_contents): hex_string = hex_match.replace(b'\r', b'').replace(b'\n', b'') afile_found, asciihex_results, xorhex_results = self.unhexlify_ascii( request, hex_string, request.file_type, patterns) if afile_found: asciihex_file_found = True for ascii_key, ascii_values in asciihex_results.items(): asciihex_dict.setdefault(ascii_key, set()) asciihex_dict[ascii_key].update(ascii_values) for xor_key, xor_results in xorhex_results.items(): if xor_key.startswith('BB_'): xor_key = xor_key.split('_', 1)[1] asciihex_bb_dict.setdefault(xor_key, set()) asciihex_bb_dict[xor_key].add(xor_results) else: asciihex_dict.setdefault(xor_key, set()) asciihex_dict[xor_key].add(safe_str(xor_results[1])) # Report Ascii Hex Encoded Data: if asciihex_file_found: asciihex_emb_res = (ResultSection( "Found Large Ascii Hex Strings in Non-Executable:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(7), parent=request.result)) asciihex_emb_res.add_line( "Extracted possible ascii-hex object(s). See extracted files.") if asciihex_dict: # Different scores are used depending on whether the file is a document asciihex_res = (ResultSection( "ASCII HEX DECODED IOC Strings:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic( 10 if request.file_type.startswith("document") else 8), parent=request.result)) for key, hex_list in sorted(asciihex_dict.items()): for h in hex_list: asciihex_res.add_line( f"Found {key.replace('_', ' ')} decoded HEX string: {safe_str(h)}" ) asciihex_res.add_tag(key, h) if asciihex_bb_dict: asciihex_bb_res = (ResultSection( "ASCII HEX AND XOR DECODED IOC Strings:", heuristic=Heuristic(9), parent=request.result)) for xindex, (xkey, xset) in enumerate(sorted(asciihex_bb_dict.items())): for xresult in xset: data, match, transform = xresult asx_res = (ResultSection(f"Result {xindex}", parent=asciihex_bb_res)) asx_res.add_line( f"Found {xkey.replace('_', ' ')} decoded HEX string, masked with " f"transform {safe_str(transform)}:") asx_res.add_line("Decoded XOR string:") asx_res.add_line(safe_str(match)) asx_res.add_line("Original ASCII HEX String:") asx_res.add_line(safe_str(data)) asciihex_bb_res.add_tag(xkey, match)
def execute(self, request): # ================================================================== # Execute a request: # Every time your service receives a new file to scan, the execute function is called # This is where you should execute your processing code. # For the purpose of this example, we will only generate results ... # You should run your code here... # ================================================================== # Check if we're scanning an embedded file # This service always drop 3 embedded file which two generates random results and the other empty results # We're making a check to see if we're scanning the embedded file. # In a normal service this is not something you would do at all but since we are using this # service in our unit test to test all features of our report generator, we have to do this if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a', '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec', 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']: # Main file results... # ================================================================== # Write the results: # First, create a result object where all the result sections will be saved to result = Result() # ================================================================== # Standard text section: BODY_FORMAT.TEXT - DEFAULT # Text sections basically just dumps the text to the screen... # All sections scores will be SUMed in the service result # The Result classification will be the highest classification found in the sections text_section = ResultSection('Example of a default section') # You can add lines to your section one at a time # Here we will generate a random line text_section.add_line(get_random_phrase()) # Or your can add them from a list # Here we will generate random amount of random lines text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))]) # If the section needs to affect the score of the file you need to set a heuristics # Here we will pick one at random # In addition to add a heuristic, we will associated a signature with the heuristic, # we're doing this by adding the signature name to the heuristic. (Here we generating a random name) text_section.set_heuristic(3, signature="sig_one") # You can attach attack ids to heuristics after they where defined text_section.heuristic.add_attack_id("T1066") # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how # many time the signature fired by setting its frequency. If you call add_signature_id twice with the # same signature, this will effectively increase the frequency of the signature. text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2) text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3) text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_three") text_section.heuristic.add_signature_id("sig_four", score=0) # The heuristic for text_section should have the following properties # 1. 1 attack ID: T1066 # 2. 4 signatures: sig_one, sig_two, sig_three and sig_four # 3. Signature frequencies are cumulative therefor they will be as follow: # - sig_one = 1 # - sig_two = 5 # - sig_three = 2 # - sig_four = 1 # 4. The score used by each heuristic is driven by the following rules: signature_score_map is higher # priority, then score value for the add_signature_id is in second place and finally the default # heuristic score is use. Therefor the score used to calculate the total score for the text_section is # as follow: # - sig_one: 10 -> heuristic default score # - sig_two: 20 -> score provided by the function add_signature_id # - sig_three: 30 -> score provided by the heuristic map # - sig_four: 40 -> score provided by the heuristic map because it's higher priority than the # function score # 5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210 # Make sure you add your section to the result result.add_section(text_section) # ================================================================== # Color map Section: BODY_FORMAT.GRAPH_DATA # Creates a color map bar using a minimum and maximum domain # e.g. We are using this section to display the entropy distribution in some services cmap_min = 0 cmap_max = 20 color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } # The classification of a section can be set to any valid classification for your system section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED) result.add_section(section_color_map) # ================================================================== # URL section: BODY_FORMAT.URL # Generate a list of clickable urls using a json encoded format # As you can see here, the body of the section can be set directly instead of line by line random_host = get_random_host() url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL, body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"})) # Since urls are very important features we can tag those features in the system so they are easy to find # Tags are defined by a type and a value url_section.add_tag("network.static.domain", random_host) # You may also want to provide a list of url! # Also, No need to provide a name, the url link will be displayed host1 = get_random_host() host2 = get_random_host() ip1 = get_random_ip() ip2 = get_random_ip() ip3 = get_random_ip() urls = [ {"url": f"https://{host1}/"}, {"url": f"https://{host2}/"}, {"url": f"https://{ip1}/"}, {"url": f"https://{ip2}/"}, {"url": f"https://{ip3}/"}] # A heuristic can fire more then once without being associated to a signature url_heuristic = Heuristic(4, frequency=len(urls)) url_sub_section = ResultSection('Example of a url section with multiple links', body=json.dumps(urls), body_format=BODY_FORMAT.URL, heuristic=url_heuristic) url_sub_section.add_tag("network.static.ip", ip1) url_sub_section.add_tag("network.static.ip", ip2) url_sub_section.add_tag("network.static.ip", ip3) url_sub_section.add_tag("network.static.domain", host1) url_sub_section.add_tag("network.dynamic.domain", host2) # Since url_sub_section is a sub-section of url_section # we will add it as a sub-section of url_section not to the main result itself url_section.add_subsection(url_sub_section) result.add_section(url_section) # ================================================================== # Memory dump section: BODY_FORMAT.MEMORY_DUMP # Dump whatever string content you have into a <pre/> html tag so you can do your own formatting data = hexdump(b"This is some random text that we will format as an hexdump and you'll see " b"that the hexdump formatting will be preserved by the memory dump section!") memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP, body=data) memdump_section.set_heuristic(random.randint(1, 4)) result.add_section(memdump_section) # ================================================================== # KEY_VALUE section: # This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI # while also providing easy to parse data for auto mated tools. # NB: You should definitely use this over a JSON body type since this one will be displayed correctly # in the UI for the user # The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed) kv_body = { "a_str": "Some string", "a_bool": False, "an_int": 102, } kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(kv_body)) result.add_section(kv_section) # ================================================================== # JSON section: # Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor) # to display a tree view of JSON results. # NB: Use this sparingly! As a service developer you should do your best to include important # results as their own result sections. # The body argument must be a json dump of a python dictionary json_body = { "a_str": "Some string", "a_list": ["a", "b", "c"], "a_bool": False, "an_int": 102, "a_dict": { "list_of_dict": [ {"d1_key": "val", "d1_key2": "val2"}, {"d2_key": "val", "d2_key2": "val2"} ], "bool": True } } json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON, body=json.dumps(json_body)) result.add_section(json_section) # ================================================================== # PROCESS_TREE section: # This section allows the service writer to list a bunch of dictionary objects that have nested lists # of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore # each dictionary must have be of the following format: # { # "process_pid": int, # "process_name": str, # "command_line": str, # "children": [] NB: This list either is empty or contains more dictionaries that have the same # structure # } nc_body = [ { "process_pid": 123, "process_name": "evil.exe", "command_line": "C:\\evil.exe", "signatures": {}, "children": [ { "process_pid": 321, "process_name": "takeovercomputer.exe", "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff", "signatures": {"one":250}, "children": [ { "process_pid": 456, "process_name": "evenworsethanbefore.exe", "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad", "signatures": {"one":10, "two":10, "three":10}, "children": [] }, { "process_pid": 234, "process_name": "badfile.exe", "command_line": "C:\\badfile.exe -k nothing_to_see_here", "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10}, "children": [] } ] }, { "process_pid": 345, "process_name": "benignexe.exe", "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"", "signatures": {"one": 2000}, "children": [] } ] }, { "process_pid": 987, "process_name": "runzeroday.exe", "command_line": "C:\\runzeroday.exe -f insert_bad_spelling", "signatures": {}, "children": [] } ] nc_section = ResultSection('Example of a PROCESS_TREE section', body_format=BODY_FORMAT.PROCESS_TREE, body=json.dumps(nc_body)) result.add_section(nc_section) # ================================================================== # TABLE section: # This section allows the service writer to have their content displayed in a table format in the UI # The body argument must be a list [] of dict {} objects. A dict object can have a key value pair # where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested # table within a cell. table_body = [ { "a_str": "Some string1", "extra_column_here": "confirmed", "a_bool": False, "an_int": 101, }, { "a_str": "Some string2", "a_bool": True, "an_int": 102, }, { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, { "a_str": "Some string4", "a_bool": None, "an_int": -1000000000000000000, "extra_column_there": "confirmed", "nested_table": { "a_str": "Some string3", "a_bool": False, "nested_table_thats_too_deep": { "a_str": "Some string3", "a_bool": False, "an_int": 103, }, }, }, ] table_section = ResultSection('Example of a TABLE section', body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body)) result.add_section(table_section) # ================================================================== # Re-Submitting files to the system # Adding extracted files will have them resubmitted to the system for analysis # This file will generate random results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(data.encode()) request.add_extracted(temp_path, "file.txt", "Extracted by some magic!") # Embedded files can also have their own classification! fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"CLASSIFIED!!!__"+data.encode()) request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look", classification=cl_engine.RESTRICTED) # This file will generate empty results on the next run fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "wb") as myfile: myfile.write(b"EMPTY") request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file") # ================================================================== # Supplementary files # Adding supplementary files will save them on the datastore for future # reference but wont reprocess those files. fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(urls)) request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file") # like embedded files, you can add more then one supplementary files fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(json_body)) request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file") # ================================================================== # Wrap-up: # Save your result object back into the request request.result = result # ================================================================== # Empty results file elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06': # Creating and empty result object request.result = Result() # ================================================================== # Randomized results file else: # For the randomized results file, we will completely randomize the results # The content of those results do not matter since we've already showed you # all the different result sections, tagging, heuristics and file upload functions embedded_result = Result() # random number of sections for _ in range(1, 3): embedded_result.add_section(self._create_random_section()) request.result = embedded_result
def unicode_results(self, request: ServiceRequest, patterns: PatternMatch) -> Optional[ResultSection]: """ Finds and report unicode encoded strings Args: request: AL request object with result section patterns: PatternMatch object Returns: The result section (with request.result as its parent) if one is created """ unicode_al_results: Dict[str, Tuple[bytes, bytes]] = {} dropped_unicode: List[Tuple[str, str]] = [] for hes in self.HEXENC_STRINGS: if re.search( re.escape(hes) + b'[A-Fa-f0-9]{2}', request.file_contents): dropped = self.decode_encoded_udata(request, hes, request.file_contents, unicode_al_results) for uhash in dropped: dropped_unicode.append((uhash, safe_str(hes))) # Report Unicode Encoded Data: unicode_heur = Heuristic( 5, frequency=len(dropped_unicode)) if dropped_unicode else None unicode_emb_res = ResultSection( "Found Unicode-Like Strings in Non-Executable:", body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=unicode_heur) for uhash, uenc in dropped_unicode: unicode_emb_res.add_line( f"Extracted over 50 bytes of possible embedded unicode with " f"{uenc} encoding. SHA256: {uhash}. See extracted files.") for unires_index, (sha256, (decoded, encoded)) in enumerate(unicode_al_results.items()): sub_uni_res = (ResultSection(f"Result {unires_index}", parent=unicode_emb_res)) sub_uni_res.add_line(f'ENCODED TEXT SIZE: {len(decoded)}') sub_uni_res.add_line( f'ENCODED SAMPLE TEXT: {safe_str(encoded)}[........]') sub_uni_res.add_line(f'DECODED SHA256: {sha256}') subb_uni_res = (ResultSection("DECODED ASCII DUMP:", body_format=BODY_FORMAT.MEMORY_DUMP, parent=sub_uni_res)) subb_uni_res.add_line('{}'.format(safe_str(decoded))) # Look for IOCs of interest hits = self.ioc_to_tag(decoded, patterns, sub_uni_res, st_max_length=1000, taglist=True) if hits: sub_uni_res.set_heuristic(6) subb_uni_res.add_line( "Suspicious string(s) found in decoded data.") else: sub_uni_res.set_heuristic(4) if unicode_al_results or dropped_unicode: request.result.add_section(unicode_emb_res) return unicode_emb_res return None
def execute(self, request): request.result = Result() self.result = request.result file_path = request.file_path fh = open(file_path, 'rb') try: self.swf = SWF(fh) if self.swf is None: raise Exception("self.swf is None") except Exception as e: self.log.exception( f"Unable to parse file {request.sha256}: {str(e)}") fh.close() raise self.tag_summary = defaultdict(list) self.symbols = {} self.binary_data = {} self.exported_assets = [] self.big_buffers = set() self.has_product_info = False self.anti_decompilation = False self.recent_compile = False self.disasm_path = None header_subsection = ResultSection(title_text="SWF Header", parent=self.result) if self.swf.header.version: header_subsection.add_line("Version: %d" % self.swf.header.version) header_subsection.add_tag(tag_type="file.swf.header.version", value=str(self.swf.header.version)) header_subsection.add_line("File length: %d" % self.swf.header.file_length) if self.swf.header.frame_size.__str__(): header_subsection.add_line("Frame size: %s" % self.swf.header.frame_size.__str__()) header_subsection.add_tag( tag_type="file.swf.header.frame.size", value=self.swf.header.frame_size.__str__()) if self.swf.header.frame_rate: header_subsection.add_line("Frame rate: %d" % self.swf.header.frame_rate) header_subsection.add_tag(tag_type="file.swf.header.frame.rate", value=str(self.swf.header.frame_rate)) if self.swf.header.frame_count: header_subsection.add_line("Frame count: %d" % self.swf.header.frame_count) header_subsection.add_tag(tag_type="file.swf.header.frame.count", value=str(self.swf.header.frame_count)) # Parse Tags tag_subsection = ResultSection(title_text="SWF Tags", parent=self.result) tag_types = [] for tag in self.swf.tags: self.tag_analyzers.get(SWF_TAGS.get(tag.type), self._dummy)(tag) tag_types.append(str(tag.type)) tag_list = ','.join(tag_types) tags_ssdeep = ssdeep.hash(tag_list) tag_subsection.add_tag(tag_type="file.swf.tags_ssdeep", value=tags_ssdeep) # TODO: not sure we want to split those... # _, hash_one, hash_two = tags_ssdeep.split(':') # tag_subsection.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP, value=hash_one) # tag_subsection.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP, value=hash_two) # Script Overview if len(self.symbols.keys()) > 0: root_symbol = 'unspecified' if 0 in self.symbols: root_symbol = self.symbols[0] self.symbols.pop(0) symbol_subsection = ResultSection(title_text="Symbol Summary", parent=self.result) symbol_subsection.add_line(f'Main: {root_symbol}') if len(self.symbols.keys()) > 0: for tag_id, name in sorted([(k, v) for k, v in self.symbols.items()]): symbol_subsection.add_line(f'ID:{tag_id} - {name}') if len(self.binary_data.keys()) > 0: binary_subsection = ResultSection( title_text="Attached Binary Data", heuristic=Heuristic(3), parent=self.result) for tag_id, tag_data in self.binary_data.items(): tag_name = self.symbols.get(tag_id, 'unspecified') binary_subsection.add_line(f'ID:{tag_id} - {tag_name}') try: binary_filename = hashlib.sha256( tag_data).hexdigest() + '.attached_binary' binary_path = os.path.join(self.working_directory, binary_filename) with open(binary_path, 'wb') as fh: fh.write(tag_data) request.add_extracted( binary_path, f"{tag_name}_{tag_id}", f"SWF Embedded Binary Data {str(tag_id)}") except Exception: self.log.exception( "Error submitting embedded binary data for swf:") tags_subsection = ResultSection(title_text="Tags of Interest") for tag in sorted(self.tag_summary.keys()): body = [] summaries = self.tag_summary[tag] for summary in summaries: summary_line = '\t'.join(summary) body.append(summary_line) if body: subtag_section = ResultSection(title_text=tag, parent=tags_subsection) subtag_section.add_lines(body) if len(tags_subsection.subsections) > 0: self.result.add_section(tags_subsection) if len(self.big_buffers) > 0: bbs = ResultSection(title_text="Large String Buffers", heuristic=Heuristic(1), parent=self.result) for buf in self.big_buffers: if isinstance(buf, str): buf = buf.encode() bbs.add_line("Found a %d byte string." % len(buf)) buf_filename = "" try: buf_filename = hashlib.sha256( buf).hexdigest() + '.stringbuf' buf_path = os.path.join(self.working_directory, buf_filename) with open(buf_path, 'wb') as fh: fh.write(buf) request.add_extracted(buf_path, "AVM2 Large String Buffer.", buf_filename) except Exception: self.log.exception( "Error submitting AVM2 String Buffer %s" % buf_filename) if not self.has_product_info: self.log.debug("Missing product info.") no_info = ResultSection(title_text="Missing Product Information", heuristic=Heuristic(5), parent=self.result) no_info.add_line( "This SWF doesn't specify information about the product that created it." ) if self.anti_decompilation: self.log.debug("Anti-disassembly techniques may be present.") no_dis = ResultSection(title_text="Incomplete Disassembly", heuristic=Heuristic(4), parent=self.result) no_dis.add_line( "This SWF may contain intentional corruption or obfuscation to prevent disassembly." ) if self.recent_compile: recent_compile = ResultSection(title_text="Recent Compilation", heuristic=Heuristic(2), parent=self.result) recent_compile.add_line( "This SWF was compiled within the last 24 hours.") fh.close()
def base64_results(self, request: ServiceRequest, patterns: PatternMatch) -> Optional[ResultSection]: """ Finds and reports Base64 encoded text Args: request: AL request object with result section patterns: PatternMatch object Returns: The result section (with request.result as its parent) if one is created """ b64_al_results = [] b64_matches = set() # Base64 characters with possible space, newline characters and HTML line feeds (&#(XA|10);) for b64_match in re.findall( b'([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}' b'(?:&#[x1][A0];)?[\r]?[\n]?){2,})', request.file_contents): b64_string = b64_match.replace(b'\n', b'').replace(b'\r', b'').replace(b' ', b'')\ .replace(b'
', b'').replace(b' ', b'') if b64_string in b64_matches: continue b64_matches.add(b64_string) uniq_char = set(b64_string) if len(uniq_char) > 6: b64result, tags = self.b64(request, b64_string, patterns) if len(b64result) > 0: b64_al_results.append((b64result, tags)) # UTF-16 strings for ust in strings.extract_unicode_strings(request.file_contents, n=self.st_min_length): for b64_match in re.findall( b'([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})', ust.s): b64_string = b64_match.replace(b'\n', b'').replace( b'\r', b'').replace(b' ', b'') uniq_char = set(b64_string) if len(uniq_char) > 6: b64result, tags = self.b64(request, b64_string, patterns) if len(b64result) > 0: b64_al_results.append((b64result, tags)) # Report B64 Results if len(b64_al_results) > 0: b64_ascii_content: List[bytes] = [] b64_res = (ResultSection("Base64 Strings:", heuristic=Heuristic(1), parent=request.result)) b64index = 0 for b64dict, tags in b64_al_results: for ttype, values in tags.items(): for v in values: b64_res.add_tag(ttype, v) for b64k, b64l in b64dict.items(): b64index += 1 sub_b64_res = (ResultSection(f"Result {b64index}", parent=b64_res)) sub_b64_res.add_line(f'BASE64 TEXT SIZE: {b64l[0]}') sub_b64_res.add_line( f'BASE64 SAMPLE TEXT: {safe_str(b64l[1])}[........]') sub_b64_res.add_line(f'DECODED SHA256: {b64k}') subb_b64_res = (ResultSection( "DECODED ASCII DUMP:", body_format=BODY_FORMAT.MEMORY_DUMP, parent=sub_b64_res)) subb_b64_res.add_line(safe_str(b64l[2])) if b64l[2] not in [ b"[Possible file contents. See extracted files.]", b"[IOCs discovered with other non-printable data. See extracted files.]" ]: b64_ascii_content.append(b64l[3]) # Write all non-extracted decoded b64 content to file if len(b64_ascii_content) > 0: all_b64 = b"\n".join(b64_ascii_content) b64_all_sha256 = hashlib.sha256(all_b64).hexdigest() self.extract_file(request, all_b64, f"all_b64_{b64_all_sha256[:7]}.txt", "all misc decoded b64 from sample") return b64_res return None
def execute(self, request: ServiceRequest) -> None: self.result = Result() request.result = self.result self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] vmonkey_err = False actions: List[str] = [] external_functions: List[str] = [] tmp_iocs: List[str] = [] output_results: Dict[str, Any] = {} potential_base64: Set[str] = set() # Running ViperMonkey try: file_contents = request.file_contents input_file: str = request.file_path input_file_obj: Optional[IO] = None # Typical start to XML files if not file_contents.startswith( b"<?") and request.file_type == "code/xml": # Default encoding/decoding if BOM not found encoding: Optional[str] = None decoding: Optional[str] = None # Remove potential BOMs from contents if file_contents.startswith(BOM_UTF8): encoding = "utf-8" decoding = "utf-8-sig" elif file_contents.startswith(BOM_UTF16): encoding = "utf-16" decoding = "utf-16" if encoding and decoding: input_file_obj = tempfile.NamedTemporaryFile( "w+", encoding=encoding) input_file_obj.write( file_contents.decode(decoding, errors="ignore")) input_file = input_file_obj.name else: # If the file_type was detected as XML, it's probably buried within but not actually an XML file # Give no response as ViperMonkey can't process this kind of file return cmd = " ".join([ PYTHON2_INTERPRETER, os.path.join(os.path.dirname(__file__), "vipermonkey_compat.py2"), input_file, self.working_directory, ]) p = subprocess.run(cmd, capture_output=True, shell=True) stdout = p.stdout # Close file if input_file_obj and os.path.exists(input_file_obj.name): input_file_obj.close() # Add artifacts artifact_dir = os.path.join( self.working_directory, os.path.basename(input_file) + "_artifacts") if os.path.exists(artifact_dir): for file in os.listdir(artifact_dir): try: file_path = os.path.join(artifact_dir, file) if os.path.isfile(file_path) and os.path.getsize( file_path): request.add_extracted( file_path, file, "File extracted by ViperMonkey during analysis" ) except os.error as e: self.log.warning(e) # Read output if stdout: for line in stdout.splitlines(): if line.startswith(b"{") and line.endswith(b"}"): try: output_results = json.loads(line) except UnicodeDecodeError: output_results = json.loads( line.decode("utf-8", "replace")) break # Checking for tuple in case vmonkey return is None # If no macros found, return is [][][], if error, return is None # vmonkey_err can still happen if return is [][][], log as warning instead of error if isinstance(output_results.get("vmonkey_values"), dict): """ Structure of variable "actions" is as follows: [action, parameters, description] action: 'Found Entry Point', 'Execute Command', etc... parameters: Parameters for function description: 'Shell Function', etc... external_functions is a list of built-in VBA functions that were called """ actions = output_results["vmonkey_values"]["actions"] external_functions = output_results["vmonkey_values"][ "external_funcs"] tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"] if output_results["vmonkey_err"]: vmonkey_err = True self.log.warning(output_results["vmonkey_err"]) else: vmonkey_err = True else: vmonkey_err = True except Exception: self.log.exception( f"Vipermonkey failed to analyze file {request.sha256}") if actions: # Creating action section action_section = ResultSection("Recorded Actions:", parent=self.result) action_section.add_tag("technique.macro", "Contains VBA Macro(s)") sub_action_sections: Dict[str, ResultSection] = {} for action, parameters, description in actions: # Creating action sub-sections for each action if not description: # For actions with no description, just use the type of action description = action if description not in sub_action_sections: # Action's description will be the sub-section name sub_action_section = ResultSection(description, parent=action_section) sub_action_sections[description] = sub_action_section if description == "Shell function": sub_action_section.set_heuristic(2) else: # Reuse existing section sub_action_section = sub_action_sections[description] if sub_action_section.heuristic: sub_action_section.heuristic.increment_frequency() # Parameters are sometimes stored as a list, account for this if isinstance(parameters, list): for item in parameters: # Parameters includes more than strings (booleans for example) if isinstance(item, str): # Check for PowerShell self.extract_powershell(item, sub_action_section, request) # Join list items into single string param = ", ".join(str(p) for p in parameters) else: param = parameters # Parameters includes more than strings (booleans for example) if isinstance(param, str): self.extract_powershell(param, sub_action_section, request) # If the description field was empty, re-organize result section for this case if description == action: sub_action_section.add_line(param) else: sub_action_section.add_line( f"Action: {action}, Parameters: {param}") # Check later for base64 potential_base64.add(param) # Add urls/ips found in parameter to respective lists self.find_ip(param) # Check tmp_iocs res_temp_iocs = ResultSection("Runtime temporary IOCs") for ioc in tmp_iocs: self.extract_powershell(ioc, res_temp_iocs, request) potential_base64.add(ioc) self.find_ip(ioc) if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body: self.result.add_section(res_temp_iocs) # Add PowerShell score/tag if found if self.found_powershell: ResultSection("Discovered PowerShell code in file", parent=self.result, heuristic=Heuristic(3)) # Check parameters and temp_iocs for base64 base64_section = ResultSection("Possible Base64 found", heuristic=Heuristic(5, frequency=0)) for param in potential_base64: self.check_for_b64(param, base64_section, request, request.file_contents) if base64_section.body: self.result.add_section(base64_section) # Add url/ip tags self.add_ip_tags() # Create section for built-in VBA functions called if len(external_functions) > 0: external_func_section = ResultSection( "VBA functions called", body_format=BODY_FORMAT.MEMORY_DUMP, parent=self.result) for func in external_functions: if func in vba_builtins: external_func_section.add_line(func + ": " + vba_builtins[func]) else: external_func_section.add_line(func) # Add vmonkey log as a supplemental file if we have results if "stdout" in output_results and (vmonkey_err or request.result.sections): temp_log_copy = os.path.join( tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log") with open(temp_log_copy, "w") as temp_log_file: temp_log_file.write(output_results["stdout"]) request.add_supplementary(temp_log_copy, "vipermonkey_output.log", "ViperMonkey log output") if vmonkey_err is True: ResultSection( 'ViperMonkey has encountered an error, please check "vipermonkey_output.log"', parent=self.result, heuristic=Heuristic(1), )
def validate_certs(self, certs, cur_file, supplementary_files): """ This method tags out of a certificate or certificate chain. The start and end date, issuer, and owner are all pulled. The certificate itself is included as a supplementary file. :param certs: the keytool -printcert string representation of a certificate/certificate chain :param cur_file: the file path of the certificate (to be used in supplementary_files) :param supplementary_files: the services supplementary files :return: """ certs = certificate_chain_from_printcert(certs) for cert in certs: res_cert = ResultSection("Certificate Analysis", body=safe_str(cert.raw), body_format=BODY_FORMAT.MEMORY_DUMP) res_cert.add_tag('cert.valid.start', cert.valid_from) res_cert.add_tag('cert.valid.end', cert.valid_to) res_cert.add_tag('cert.issuer', cert.issuer) res_cert.add_tag('cert.owner', cert.owner) valid_from_splitted = cert.valid_from.split(" ") valid_from_year = int(valid_from_splitted[-1]) valid_to_splitted = cert.valid_to.split(" ") valid_to_year = int(valid_to_splitted[-1]) if cert.owner == cert.issuer: ResultSection("Certificate is self-signed", parent=res_cert, heuristic=Heuristic(11)) if not cert.country: ResultSection("Certificate owner has no country", parent=res_cert, heuristic=Heuristic(12)) if valid_from_year > valid_to_year: ResultSection("Certificate expires before validity date starts", parent=res_cert, heuristic=Heuristic(15)) if (valid_to_year - valid_from_year) > 30: ResultSection("Certificate valid more then 30 years", parent=res_cert, heuristic=Heuristic(13)) if cert.country: try: int(cert.country) is_int_country = True except Exception: is_int_country = False if len(cert.country) != 2 or is_int_country: ResultSection("Invalid country code in certificate owner", parent=res_cert, heuristic=Heuristic(14)) self.signature_block_certs.append(res_cert) if len(res_cert.subsections) > 0: name = os.path.basename(cur_file) desc = f'JAR Signature Block: {name}' supplementary_files.append((cur_file.decode('utf-8'), name.decode('utf-8'), desc))
def execute(self, request): # --- Setup ---------------------------------------------------------------------------------------------- request.result = Result() patterns = PatternMatch() if request.deep_scan: max_attempts = 100 else: max_attempts = 10 self.files_extracted = set() self.hashes = set() before = set() # --- Pre-Processing -------------------------------------------------------------------------------------- # Get all IOCs prior to de-obfuscation pat_values = patterns.ioc_match(request.file_contents, bogon_ip=True, just_network=False) if pat_values: if request.get_param('extract_original_iocs'): ioc_res = ResultSection( "The following IOCs were found in the original file", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) else: ioc_res = None for k, val in pat_values.items(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode( 'ascii', 'ignore') if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(asc_asc)}" ) ioc_res.add_tag(k, asc_asc) before.add((k, asc_asc)) else: for v in val: if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_res.add_tag(k, v) before.add((k, v)) # --- Prepare Techniques ---------------------------------------------------------------------------------- techniques = [ ('MSOffice Embedded script', self.msoffice_embedded_script_string), ('CHR and CHRB decode', self.chr_decode), ('String replace', self.string_replace), ('Powershell carets', self.powershell_carets), ('Array of strings', self.array_of_strings), ('Fake array vars', self.vars_of_fake_arrays), ('Reverse strings', self.str_reverse), ('B64 Decode', self.b64decode_str), ('Simple XOR function', self.simple_xor_function), ] second_pass = [('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), ('Charcode hex', self.charcode_hex)] final_pass = [ ('Charcode', self.charcode), ] code_extracts = [('.*html.*', "HTML scripts extraction", self.extract_htmlscript)] layers_list = [] layer = request.file_contents # --- Stage 1: Script Extraction -------------------------------------------------------------------------- for pattern, name, func in code_extracts: if re.match(re.compile(pattern), request.task.file_type): extracted_parts = func(request.file_contents) layer = b"\n".join(extracted_parts).strip() layers_list.append((name, layer)) break # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------ idx = 0 first_pass_len = len(techniques) layers_count = len(layers_list) while True: if idx > max_attempts: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break for name, technique in techniques: res = technique(layer) if res: layers_list.append((name, res)) # Looks like it worked, restart with new layer layer = res # If the layers haven't changed in a passing, break if layers_count == len(layers_list): if len(techniques) != first_pass_len: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break else: for x in second_pass: techniques.insert(0, x) layers_count = len(layers_list) idx += 1 # --- Compiling results ---------------------------------------------------------------------------------- if len(layers_list) > 0: extract_file = False num_layers = len(layers_list) heur_id = None # Compute heuristic if num_layers < 5: heur_id = 1 elif num_layers < 10: heur_id = 2 elif num_layers < 50: heur_id = 3 elif num_layers < 100: heur_id = 4 elif num_layers >= 100: heur_id = 5 # Cleanup final layer clean = self.clean_up_final_layer(layers_list[-1][1]) if clean != request.file_contents: # Check for new IOCs pat_values = patterns.ioc_match(clean, bogon_ip=True, just_network=False) diff_tags = {} for k, val in pat_values.items(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode( 'ascii', 'ignore') if (k, asc_asc) not in before: diff_tags.setdefault(k, []) diff_tags[k].append(asc_asc) else: for v in val: if (k, v) not in before: diff_tags.setdefault(k, []) diff_tags[k].append(v) if request.deep_scan or \ (len(clean) > 1000 and heur_id >= 4) or diff_tags: extract_file = True # Display obfuscation steps mres = ResultSection( "De-obfuscation steps taken by DeobsfuScripter", parent=request.result) if heur_id: mres.set_heuristic(heur_id) lcount = Counter([x[0] for x in layers_list]) for l, c in lcount.items(): mres.add_line(f"{l}, {c} time(s).") # Display final layer byte_count = 5000 if extract_file: # Save extracted file byte_count = 500 fn = f"{request.file_name}_decoded_final" fp = os.path.join(self.working_directory, fn) with open(fp, 'wb') as dcf: dcf.write(clean) self.log.debug( f"Submitted dropped file for analysis: {fp}") request.add_extracted(fp, fn, "Final deobfuscation layer") ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]), body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result) # Display new IOCs from final layer if len(diff_tags) > 0: ioc_new = ResultSection( "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) has_network_heur = False for ty, val in diff_tags.items(): for v in val: if "network" in ty: has_network_heur = True ioc_new.add_line( f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_new.add_tag(ty, v) if has_network_heur: ioc_new.set_heuristic(7) else: ioc_new.set_heuristic(6) if len(self.files_extracted) > 0: ext_file_res = ResultSection( "The following files were extracted during the deobfuscation", heuristic=Heuristic(8), parent=request.result) for f in self.files_extracted: ext_file_res.add_line(os.path.basename(f)) request.add_extracted( f, os.path.basename(f), "File of interest deobfuscated from sample")
def execute(self, request: ServiceRequest) -> None: # --- Setup ---------------------------------------------------------------------------------------------- request.result = Result() patterns = PatternMatch() if request.deep_scan: max_attempts = 100 else: max_attempts = 10 self.files_extracted = set() self.hashes = set() # --- Pre-Processing -------------------------------------------------------------------------------------- # Get all IOCs prior to de-obfuscation pat_values = patterns.ioc_match(request.file_contents, bogon_ip=True, just_network=False) if pat_values and request.get_param('extract_original_iocs'): ioc_res = ResultSection( "The following IOCs were found in the original file", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) for k, val in pat_values.items(): for v in val: if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_res.add_tag(k, v) # --- Prepare Techniques ---------------------------------------------------------------------------------- techniques = [ ('MSOffice Embedded script', self.msoffice_embedded_script_string), ('CHR and CHRB decode', self.chr_decode), ('String replace', self.string_replace), ('Powershell carets', self.powershell_carets), ('Array of strings', self.array_of_strings), ('Fake array vars', self.vars_of_fake_arrays), ('Reverse strings', self.str_reverse), ('B64 Decode', self.b64decode_str), ('Simple XOR function', self.simple_xor_function), ] second_pass = [('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), ('Charcode hex', self.charcode_hex)] final_pass = [ ('Charcode', self.charcode), ] code_extracts = [('.*html.*', "HTML scripts extraction", self.extract_htmlscript)] layers_list: List[Tuple[str, bytes]] = [] layer = request.file_contents # --- Stage 1: Script Extraction -------------------------------------------------------------------------- for pattern, name, func in code_extracts: if regex.match(regex.compile(pattern), request.task.file_type): extracted_parts = func(request.file_contents) layer = b"\n".join(extracted_parts).strip() layers_list.append((name, layer)) break # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------ idx = 0 first_pass_len = len(techniques) layers_count = len(layers_list) while True: if idx > max_attempts: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break with ThreadPoolExecutor() as executor: threads = [ executor.submit(technique, layer) for name, technique in techniques ] results = [thread.result() for thread in threads] for i in range(len(results)): result = results[i] if result: layers_list.append((techniques[i][0], result)) # Looks like it worked, restart with new layer layer = result # If the layers haven't changed in a passing, break if layers_count == len(layers_list): if len(techniques) != first_pass_len: final_pass.extend(techniques) with ThreadPoolExecutor() as executor: threads = [ executor.submit(technique, layer) for name, technique in final_pass ] results = [thread.result() for thread in threads] for i in range(len(results)): result = results[i] if result: layers_list.append((techniques[i][0], result)) break for x in second_pass: techniques.insert(0, x) layers_count = len(layers_list) idx += 1 # --- Compiling results ---------------------------------------------------------------------------------- if len(layers_list) > 0: extract_file = False num_layers = len(layers_list) # Compute heuristic if num_layers < 5: heur_id = 1 elif num_layers < 10: heur_id = 2 elif num_layers < 50: heur_id = 3 elif num_layers < 100: heur_id = 4 else: # num_layers >= 100 heur_id = 5 # Cleanup final layer clean = self.clean_up_final_layer(layers_list[-1][1]) if clean != request.file_contents: # Check for new IOCs pat_values = patterns.ioc_match(clean, bogon_ip=True, just_network=False) diff_tags: Dict[str, List[bytes]] = {} for uri in pat_values.get('network.static.uri', []): # Compare URIs without query string uri = uri.split(b'?', 1)[0] if uri not in request.file_contents: diff_tags.setdefault('network.static.uri', []) diff_tags['network.static.uri'].append(uri) if request.deep_scan or (len(clean) > 1000 and heur_id >= 4) or diff_tags: extract_file = True # Display obfuscation steps mres = ResultSection( "De-obfuscation steps taken by DeobsfuScripter", parent=request.result) if heur_id: mres.set_heuristic(heur_id) lcount = Counter([x[0] for x in layers_list]) for l, c in lcount.items(): mres.add_line(f"{l}, {c} time(s).") # Display final layer byte_count = 5000 if extract_file: # Save extracted file byte_count = 500 file_name = f"{os.path.basename(request.file_name)}_decoded_final" file_path = os.path.join(self.working_directory, file_name) # Ensure directory exists before write os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, 'wb+') as f: f.write(clean) self.log.debug( f"Submitted dropped file for analysis: {file_path}" ) request.add_extracted(file_path, file_name, "Final deobfuscation layer") ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]), body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result) # Display new IOCs from final layer if len(diff_tags) > 0: ioc_new = ResultSection( "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) has_network_heur = False for ty, val in diff_tags.items(): for v in val: if "network" in ty: has_network_heur = True ioc_new.add_line( f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_new.add_tag(ty, v) if has_network_heur: ioc_new.set_heuristic(7) else: ioc_new.set_heuristic(6) if len(self.files_extracted) > 0: ext_file_res = ResultSection( "The following files were extracted during the deobfuscation", heuristic=Heuristic(8), parent=request.result) for extracted in self.files_extracted: file_name = os.path.basename(extracted) ext_file_res.add_line(file_name) request.add_extracted( extracted, file_name, "File of interest deobfuscated from sample")