def execute(self, request): result = Result() file_path = request.file_path file_type = request.file_type shutil.copyfile(file_path, self.working_directory + "/analyzed") p1 = subprocess.Popen( "java -jar /var/lib/assemblyline/StegExpose/StegExpose.jar " + self.working_directory + " standard default " + self.working_directory + "/report.csv", shell=True) p1.wait() lsb_steg_results = self.read_csv(self.working_directory + "/report.csv") lsb_steg_results = self.beautify_dict(lsb_steg_results) kv_section = ResultSection("Result of the LSB steganalysis", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(lsb_steg_results)) result.add_section(kv_section) request.result = result
def execute(self, request): temp_filename = request.file_path # Filter out large documents if os.path.getsize(temp_filename) > self.max_pdf_size: file_res = Result() res = (ResultSection( f"PDF Analysis of the file was skipped because the " f"file is too big (limit is {(self.max_pdf_size / 1000 / 1000)} MB)." )) file_res.add_section(res) request.result = file_res return filename = os.path.basename(temp_filename) # noinspection PyUnusedLocal file_content = '' with open(temp_filename, 'rb') as f: file_content = f.read() if '<xdp:xdp'.encode(encoding='UTF-8') in file_content: self.find_xdp_embedded(filename, file_content, request) self.peepdf_analysis(temp_filename, file_content, request)
def test_reduce(): from assemblyline_v4_service.common.section_reducer import reduce from assemblyline_v4_service.common.result import Result, ResultSection res = Result() result_section = ResultSection("blah") res.add_section(result_section) reduce(res) # Code coverage only assert True
def resubmit_dex2jar_output(self, apk_file: str, target: str, result: Result, request): dex = os.path.join(self.working_directory, "classes.dex") self.get_dex(apk_file, dex) if os.path.exists(dex): d2j = Popen([self.dex2jar, "--output", target, dex], stdout=PIPE, stderr=PIPE) d2j.communicate() if os.path.exists(target): res_sec = ResultSection("Classes.dex file was recompiled as a JAR and re-submitted for analysis") res_sec.add_line(f"JAR file resubmitted as: {os.path.basename(target)}") request.add_extracted(target, os.path.basename(target), "Dex2Jar output JAR file") result.add_section(res_sec)
def execute(self, request: ServiceRequest) -> None: sha256 = request.sha256 result = Result() # First, let's get the analysis metadata, if it exists on the system main_api_result = self._get_analysis_metadata( request.get_param('analysis_id'), sha256) if not main_api_result: self.log.debug(f"SHA256 {sha256} is not on the system.") request.result = result return if main_api_result.get( "verdict") in Verdicts.NOT_SUPPORTED_VERDICTS.value: self.log.debug(f"Unsupported file type: {request.file_type}") request.result = result return elif main_api_result.get("verdict") == AnalysisStatusCode.FAILED.value: self.log.warning("The Intezer server is not feeling well :(") request.result = result return analysis_id = main_api_result["analysis_id"] # Setup the main result section main_kv_section = ResultKeyValueSection( "IntezerStatic analysis report") processed_main_api_result = self._process_details( main_api_result.copy(), UNINTERESTING_ANALYSIS_KEYS) main_kv_section.update_items(processed_main_api_result) if "family_name" in main_api_result: main_kv_section.add_tag("attribution.family", main_api_result["family_name"]) # This file-verdict map will be used later on to assign heuristics to sub-analyses file_verdict_map = {} self._process_iocs(analysis_id, file_verdict_map, main_kv_section) if not self.config["is_on_premise"]: self._process_ttps(analysis_id, main_kv_section) self._handle_subanalyses(request, sha256, analysis_id, file_verdict_map, main_kv_section) # Setting heuristic here to avoid FPs if main_kv_section.subsections: self._set_heuristic_by_verdict(main_kv_section, main_api_result["verdict"]) if main_kv_section.subsections or main_kv_section.heuristic: result.add_section(main_kv_section) request.result = result
def test_execute(sample, metadefender_class_instance, mocker): from assemblyline_v4_service.common.task import Task from assemblyline_v4_service.common.result import Result from assemblyline.odm.messages.task import Task as ServiceTask from assemblyline_v4_service.common.request import ServiceRequest import json metadefender_class_instance.nodes["blah"] = { "engine_count": 1, "oldest_dat": 1, "newest_dat": 1 } mocker.patch.object(metadefender_class_instance, "_get_version_map") metadefender_class_instance.start() service_task = ServiceTask(sample) task = Task(service_task) metadefender_class_instance._task = task service_request = ServiceRequest(task) mocker.patch.object(metadefender_class_instance, "scan_file") mocker.patch.object(metadefender_class_instance, "new_node") mocker.patch.object(metadefender_class_instance, "parse_results", return_value=Result()) # Actually executing the sample metadefender_class_instance.execute(service_request) # For coverage metadefender_class_instance.config["max_node_time"] = 0 metadefender_class_instance.execute(service_request) metadefender_class_instance.config["max_node_time"] = 1000 metadefender_class_instance.config["min_node_time"] = 0 metadefender_class_instance.execute(service_request)
def execute(self, request): result = Result() apk = request.file_path filename = os.path.basename(apk) quark_out = os.path.join(self.working_directory, 'quark_out') quark_graph = os.path.join('/opt/al_service/', 'call_graph_image') if request.get_param('generate_graphs'): call([ "quark", "-a", apk, "-g", "-s", "-o", quark_out, "-r", "/opt/al_support/quark-rules" ]) else: call([ "quark", "-a", apk, "-o", quark_out, "-r", "/opt/al_support/quark-rules" ]) if os.path.exists(quark_out): self.run_analysis(quark_out, result) request.add_supplementary( quark_out, "quark_out", "These are quark Results as a JSON file") if os.path.exists(quark_graph): for filename in os.listdir(quark_graph): if filename.endswith(".png"): request.add_supplementary( os.path.join(quark_graph, filename), filename, "call graph : {0}".format(filename)) request.result = result
def gen_results(self, api_response): procr = self.upmal.process_results(api_response, self.upm) result = Result() service_task = ServiceTask(sample1) task = Task(service_task) request = ServiceRequest(task) self.upmal.generate_results(procr, result, api_response, request)
def execute(self, request): result = Result() request.result = result file_path = request.file_path password = request.get_param('password') start_point = request.get_param('start point') try: data = process_file(file=file_path, password=password, noninteractive=True, no_indent=True, output_level=0, return_deobfuscated=True, extract_only=True) data_deobfuscated = process_file( file=file_path, password=password, start_point=start_point, noninteractive=True, no_indent=True, output_level=0, output_formula_format='[[CELL-ADDR]]: [[INT-FORMULA]]', return_deobfuscated=True) except Exception as e: section = ResultSection('Failed to analyze', parent=request.result) section.add_line(str(e)) if str(e).startswith('Failed to decrypt'): section.set_heuristic(6) return add_results(result, data, data_deobfuscated)
def test_parse_results(response, correct_res_secs, metadefender_class_instance): from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic metadefender_class_instance.blocklist = ["a"] metadefender_class_instance.sig_score_revision_map = {} metadefender_class_instance.kw_score_revision_map = {} metadefender_class_instance.current_node = "http://blah" metadefender_class_instance.nodes[ metadefender_class_instance.current_node] = { "engine_map": { "z": { "version": "blah", "def_time": "blah" }, "y": { "version": "blah", "def_time": "blah" } }, "queue_times": [], "file_count": 0 } correct_result = Result() for correct_res_sec in correct_res_secs: section = ResultSection( correct_res_sec["title_text"], body_format=BODY_FORMAT.TEXT if not correct_res_sec.get("body_format") else BODY_FORMAT.JSON, body=correct_res_sec.get("body")) for subsec in correct_res_sec.get("subsections", []): subsection = ResultSection( subsec["title_text"], body=subsec["body"], body_format=BODY_FORMAT.KEY_VALUE, tags=subsec.get("tags"), ) if subsec.get("heuristic"): subsection.set_heuristic(subsec["heuristic"]["heur_id"]) print(subsec["heuristic"]["signatures"]) for key in subsec["heuristic"]["signatures"].keys(): subsection.heuristic.add_signature_id(key) section.add_subsection(subsection) correct_result.add_section(section) actual_result = metadefender_class_instance.parse_results(response) for index, section in enumerate(actual_result.sections): assert check_section_equality(section, correct_result.sections[index])
def execute(self, request: ServiceRequest) -> None: """ Main Module. See README for details.""" request.result = Result() patterns = PatternMatch() self.sample_type = request.file_type self.excess_extracted = 0 # Filters for submission modes. Listed in order of use. if request.deep_scan: # Maximum size of submitted file to run this service: max_size = 8000000 # String length maximum # Used in basic ASCII and UNICODE modules: max_length = 1000000 # String list maximum size # List produced by basic ASCII and UNICODE module results and will determine # if patterns.py will only evaluate network IOC patterns: st_max_size = 1000000 # BBcrack maximum size of submitted file to run module: bb_max_size = 200000 else: max_size = self.config.get('max_size', 3000000) max_length = self.config.get('max_length', 5000) st_max_size = self.config.get('st_max_size', 0) bb_max_size = self.config.get('bb_max_size', 85000) # Begin analysis if (len(request.file_contents) or 0) >= max_size or self.sample_type.startswith("archive/"): # No analysis is done if the file is an archive or too large return self.ascii_results(request, patterns, max_length, st_max_size) self.embedded_pe_results(request) # Possible encoded strings -- all sample types except code/* (code is handled by deobfuscripter service) if not self.sample_type.startswith('code'): self.base64_results(request, patterns) if (len(request.file_contents) or 0) < bb_max_size: self.bbcrack_results(request) # Other possible encoded strings -- all sample types but code and executables if not self.sample_type.split('/', 1)[0] in ['executable', 'code']: self.unicode_results(request, patterns) # Go over again, looking for long ASCII-HEX character strings if not self.sample_type.startswith('document/office'): self.hex_results(request, patterns) if self.excess_extracted: self.log.warning( f"Too many files extracted from {request.sha256}, " f"{self.excess_extracted} files were not extracted") request.result.add_section( ResultSection( f"Over extraction limit: " f"{self.excess_extracted} files were not extracted"))
def execute(self, request: ServiceRequest) -> Optional[Dict[str, Any]]: result = Result() request.result = result # Get AV labels from previous services av_labels = request.task.tags.get('av.virus_name') if not av_labels: return # Extract AVclass tags av_tags = self._get_avclass_tags(request.md5, request.sha1, request.sha256, av_labels) if av_tags is None: return # Build results section = self._get_result_section(av_tags.family, av_tags.is_pup) for tag_section in self._get_category_sections(av_tags.tags): section.add_subsection(tag_section) result.add_section(section)
def execute(self, request): self.mwcp_report = cli.register() result = Result() # Run Ratdecoders output = cli.run_ratdecoders(request.file_path, self.mwcp_report) if type(output) is str: self.log.info(output) output = "" if type(output) is dict: self.log.info(output) for parser, fields in output.items(): self.section_builder(parser, fields, result, "RATDecoder") tags = { f"al_{k.replace('.', '_')}": i for k, i in request.task.tags.items() } newtags = {} # yara externals must be dicts w key value pairs being strings for k, v in tags.items(): key = f"al_{k.replace('.', '_')}" for i in range(len(v)): if not isinstance(v[i], str): v[i] = str(v[i]) value = " | ".join(v) newtags[key] = value # get matches for both, dedup then run parsers = cli.deduplicate(self.file_parsers, self.tag_parsers, request.file_path, newtags) output_fields = cli.run(parsers, request.file_path, self.mwcp_report) for parser, field_dict in output_fields.items(): self.section_builder(parser, field_dict, result) if "outputfile" in field_dict: # outputfile value is a list of lists containing filename, description and md5 has of additional # outputfiles outputfiles = field_dict['outputfile'] for output_list in outputfiles: output_filename = output_list[0] output_description = output_list[1] output_md5 = output_list[2] output_fullpath = os.path.join( os.getcwd(), output_md5[:5] + '_' + output_filename) request.add_supplementary(output_fullpath, output_filename, output_description) fd, temp_path = tempfile.mkstemp(dir=self.working_directory) if output or output_fields: with os.fdopen(fd, "w") as myfile: myfile.write(json.dumps(output)) myfile.write(json.dumps(output_fields)) request.add_supplementary(temp_path, "output.json", "This is MWCP output as a JSON file") request.result = result
def execute(self, request): """Main Module. See README for details.""" result = Result() self.sha = request.sha256 local = request.file_path text_section = None kv_section = None extracted, metadata = self.dexray(request, local) num_extracted = len(request.extracted) if num_extracted != 0: text_section = ResultSection("DeXRAY found files:") for extracted in request.extracted: file_name = extracted.get('name') text_section.add_line( f"Resubmitted un-quarantined file as : {file_name}") if metadata: # Can contain live URLs to the original content source kv_section = ResultSection("DeXRAY Quarantine Metadata", body_format=BODY_FORMAT.JSON, body=json.dumps(metadata)) result.add_section(kv_section) for section in (text_section, kv_section): if section: result.add_section(section)
def parse_results(response: Dict[str, Any]): res = Result() response = response['data'] url_section = ResultSection('VirusTotal report permalink', body_format=BODY_FORMAT.URL, body=json.dumps( {"url": response['links']['self']})) res.add_section(url_section) response = response['attributes'] scans = response['last_analysis_results'] av_hits = ResultSection('Anti-Virus Detections') av_hits.add_line( f'Found {response["last_analysis_stats"]["malicious"]} AV hit(s) from ' f'{len(response["last_analysis_results"].keys())}') for majorkey, subdict in sorted(scans.items()): if subdict['category'] == "malicious": virus_name = subdict['result'] av_hit_section = AvHitSection(majorkey, virus_name) av_hit_section.set_heuristic( 1, signature=f'{majorkey}.{virus_name}') av_hit_section.add_tag('av.virus_name', virus_name) av_hits.add_subsection(av_hit_section) res.add_section(av_hits) return res
def execute(self, request): result = Result() file = request.file_path with open(file, "rb") as f: file_content = f.read() content_list = autoit_ripper.extract(data=file_content) if content_list: content = content_list[0][1].decode("utf-8") text_section = ResultSection('[DUMP RESULT]') text_section.add_line(content) text_section.set_heuristic(1) result.add_section(text_section) with open(self.working_directory + "script.au3", "w") as f: f.write(content) request.add_extracted(self.working_directory + 'script.au3', 'script.au3', 'This is the unpacked script') request.result = result
def _extract_result_from_matches(self, matches): """ Iterate through Yara match object and send to parser. Args: matches: Yara rules Match object (list). Returns: AL Result object. """ result = Result() for match in matches: self._add_resultinfo_for_match(result, match) return result
def execute(self, request): result = Result() file_path = request.file_path p1 = subprocess.Popen("clamscan -a -z --detect-pua --alert-macros " + file_path, shell=True, stdout=subprocess.PIPE) p1.wait() stdout = p1.communicate()[0].decode("utf-8") report = stdout.split("\n") report = list(filter(None, report)) text_section = ResultSection("Successfully scanned the file") if "FOUND" in report[0]: text_section.set_heuristic(1) for l in report: text_section.add_line(l) result.add_section(text_section) request.result = result
def execute(self, request: ServiceRequest) -> None: result = Result() self.hits = {} # clear the hits dict path = request.file_path file_name = request.file_name self.log.info(f" Executing {file_name}") self.log.info(f"Number of rules {len(self.sigma_parser.rules)}") self.sigma_parser.register_callback(self.sigma_hit) self.sigma_parser.check_logfile(path) if len(self.hits) > 0: hit_section = ResultSection('Events detected as suspicious') # group alerts together for id, events in self.hits.items(): title = self.sigma_parser.rules[id].title section = SigmaHitSection(title, events) tags = self.sigma_parser.rules[id].tags if tags: for tag in tags: name = tag[7:] if name.startswith(('t', 'g', 's')): attack_id = name.upper() source = events[0]['signature_source'] if attack_id: section.set_heuristic(get_heur_id(events[0]['score']), attack_id=attack_id, signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") else: section.set_heuristic(get_heur_id(events[0]['score']), signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") for event in events: # add the event data as a subsection section.add_subsection(EventDataSection(event)) hit_section.add_subsection(section) result.add_section(hit_section) request.result = result
def execute(self, request): result = Result() url = request.task.metadata.get('submitted_url') api_key = request.get_param("api_key") public = request.get_param("public") u = UrlScan(apikey=api_key, url=url, public=public) u.submit() # We need to wait for the API to process our request response = self.wait_processing(u) # We get the response parts that we want and merge them all together report = { **response.json()["verdicts"]["overall"], **response.json()["lists"], **response.json()["page"] } # We convert the "certicates" section from a list of dictionnaries to a dictionnary of lists certificates = report.pop("certificates") certificates = { k: [dic[k] for dic in certificates] for k in certificates[0] } # We add the converted section to the report report = {**report, **certificates} # We create the KEY_VALUE section to add the report to the result page kv_section = ResultSection("Urlscan.io report", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(report)) for domain in report["domains"]: kv_section.add_tag("network.static.domain", domain.strip()) result.add_section(kv_section) # We get the preview of the website screenshot = u.getScreenshot() with open(self.working_directory + "/preview.png", "wb") as ofile: ofile.write(screenshot) # Adding the preview on the result page url_section = ResultSection( 'Urlscan.io website screenshot', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "The preview is also available here !", "url": response.json()["task"]["screenshotURL"] })) result.add_section(url_section) request.add_extracted(self.working_directory + "/preview.png", "preview.png", "Here\'s the preview of the site") request.result = result
def execute(self, request): result = Result() request.set_service_context(self.get_tool_version()) apk = request.file_path filename = os.path.basename(apk) d2j_out = os.path.join(self.working_directory, f'{filename}.jar') apktool_out = os.path.join(self.working_directory, f'{filename}_apktool') apktool_workdir = os.path.join(self.working_directory, f'{filename}_apktool_workdir') self.run_badging_analysis(apk, result) self.run_strings_analysis(apk, result) self.run_apktool(apk, apktool_out, apktool_workdir, result) if request.get_param('resubmit_apk_as_jar'): self.resubmit_dex2jar_output(apk, d2j_out, result, request) request.result = result
def check_file_name_anomalies(self, filename): """Filename anomalies detection""" is_double_ext, f_ext = self.fna_check_double_extension(filename) is_empty_filename = self.fna_check_empty_filename(filename, f_ext) too_many_whitespaces = self.fna_check_filename_ws(filename, f_ext) has_unicode_ext_hiding_ctrls = self.fna_check_unicode_bidir_ctrls(filename, f_ext) file_res = Result() if too_many_whitespaces or is_double_ext or has_unicode_ext_hiding_ctrls or is_empty_filename: res = ResultSection(title_text="File Name Anomalies", parent=file_res) # Tag filename as it might be of interest res.add_tag("file.name.extracted", filename) # Remove Unicode controls, if any, for reporting fn_no_controls = "".join( c for c in filename if c not in ["\u202E", "\u202B", "\u202D", "\u202A", "\u200E", "\u200F"] ) # Also add a line with "actual" file name res.add_line(f"Actual file name: {wrap_bidir_unicode_string(fn_no_controls)}") if too_many_whitespaces: sec = ResultSection("Too many whitespaces", parent=res, heuristic=Heuristic(1)) sec.add_tag("file.name.anomaly", "TOO_MANY_WHITESPACES") sec.add_tag("file.behavior", "File name has too many whitespaces") if is_double_ext: sec = ResultSection("Double file extension", parent=res, heuristic=Heuristic(2)) sec.add_tag("file.name.anomaly", "DOUBLE_FILE_EXTENSION") sec.add_tag("file.behavior", "Double file extension") if has_unicode_ext_hiding_ctrls: sec = ResultSection("Hidden launchable file extension", parent=res, heuristic=Heuristic(3)) sec.add_tag("file.name.anomaly", "UNICODE_EXTENSION_HIDING") sec.add_tag("file.behavior", "Real file extension hidden using unicode trickery") if is_empty_filename: sec = ResultSection("Empty Filename", parent=res, heuristic=Heuristic(4)) sec.add_tag("file.name.anomaly", "FILENAME_EMPTY_OR_ALL_SPACES") sec.add_tag("file.behavior", "File name is empty or all whitespaces") return file_res
def execute(self, request: ServiceRequest): try: self.client = Client(apikey=self.config.get( "api_key", request.get_param("api_key")), proxy=self.config.get('proxy') or None) except Exception as e: self.log.error("No API key found for VirusTotal") raise e if request.task.metadata.get('submitted_url', None) and request.task.depth == 0: response = self.scan_url(request) else: response = self.scan_file(request) if response: result = self.parse_results(response) request.result = result else: request.result = Result()
def execute(self, request): # Result Object result = Result() api_key = request.get_param("api_key") if self.prechecks(request, api_key): upm = unpacme.UnpacMe(api_key) record = upm.upload_file(request.file_path) if record['success']: analysis_results = self.wait_for_completion(upm, record) if analysis_results: presults = self.process_results(analysis_results, upm) result, request = self.generate_results( presults, result, analysis_results, request) else: self.log.error( f"An exception occurred while uploading the sample to UNPACME: %s" % record['msg']) request.result = result
def execute(self, request): qr = xqrcode.decode_from_file(request.file_path) if len(qr) > 0: result_url = qr[0]['data'] result = Result() text_section = ResultSection('QR Code') text_section.add_line(result_url) result.add_section(text_section) url_section = ResultSection('url extracted', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "QR Code Url", "url": f"{result_url}" })) url_section.add_tag("network.static.domain", result_url) result.add_section(url_section) request.result = result else: request.result = Result()
def execute(self, request): # --- Setup ---------------------------------------------------------------------------------------------- request.result = Result() patterns = PatternMatch() if request.deep_scan: max_attempts = 100 else: max_attempts = 10 self.files_extracted = set() self.hashes = set() before = set() # --- Pre-Processing -------------------------------------------------------------------------------------- # Get all IOCs prior to de-obfuscation pat_values = patterns.ioc_match(request.file_contents, bogon_ip=True, just_network=False) if pat_values: if request.get_param('extract_original_iocs'): ioc_res = ResultSection( "The following IOCs were found in the original file", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) else: ioc_res = None for k, val in pat_values.items(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode( 'ascii', 'ignore') if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(asc_asc)}" ) ioc_res.add_tag(k, asc_asc) before.add((k, asc_asc)) else: for v in val: if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_res.add_tag(k, v) before.add((k, v)) # --- Prepare Techniques ---------------------------------------------------------------------------------- techniques = [ ('MSOffice Embedded script', self.msoffice_embedded_script_string), ('CHR and CHRB decode', self.chr_decode), ('String replace', self.string_replace), ('Powershell carets', self.powershell_carets), ('Array of strings', self.array_of_strings), ('Fake array vars', self.vars_of_fake_arrays), ('Reverse strings', self.str_reverse), ('B64 Decode', self.b64decode_str), ('Simple XOR function', self.simple_xor_function), ] second_pass = [('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), ('Charcode hex', self.charcode_hex)] final_pass = [ ('Charcode', self.charcode), ] code_extracts = [('.*html.*', "HTML scripts extraction", self.extract_htmlscript)] layers_list = [] layer = request.file_contents # --- Stage 1: Script Extraction -------------------------------------------------------------------------- for pattern, name, func in code_extracts: if re.match(re.compile(pattern), request.task.file_type): extracted_parts = func(request.file_contents) layer = b"\n".join(extracted_parts).strip() layers_list.append((name, layer)) break # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------ idx = 0 first_pass_len = len(techniques) layers_count = len(layers_list) while True: if idx > max_attempts: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break for name, technique in techniques: res = technique(layer) if res: layers_list.append((name, res)) # Looks like it worked, restart with new layer layer = res # If the layers haven't changed in a passing, break if layers_count == len(layers_list): if len(techniques) != first_pass_len: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break else: for x in second_pass: techniques.insert(0, x) layers_count = len(layers_list) idx += 1 # --- Compiling results ---------------------------------------------------------------------------------- if len(layers_list) > 0: extract_file = False num_layers = len(layers_list) heur_id = None # Compute heuristic if num_layers < 5: heur_id = 1 elif num_layers < 10: heur_id = 2 elif num_layers < 50: heur_id = 3 elif num_layers < 100: heur_id = 4 elif num_layers >= 100: heur_id = 5 # Cleanup final layer clean = self.clean_up_final_layer(layers_list[-1][1]) if clean != request.file_contents: # Check for new IOCs pat_values = patterns.ioc_match(clean, bogon_ip=True, just_network=False) diff_tags = {} for k, val in pat_values.items(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode( 'ascii', 'ignore') if (k, asc_asc) not in before: diff_tags.setdefault(k, []) diff_tags[k].append(asc_asc) else: for v in val: if (k, v) not in before: diff_tags.setdefault(k, []) diff_tags[k].append(v) if request.deep_scan or \ (len(clean) > 1000 and heur_id >= 4) or diff_tags: extract_file = True # Display obfuscation steps mres = ResultSection( "De-obfuscation steps taken by DeobsfuScripter", parent=request.result) if heur_id: mres.set_heuristic(heur_id) lcount = Counter([x[0] for x in layers_list]) for l, c in lcount.items(): mres.add_line(f"{l}, {c} time(s).") # Display final layer byte_count = 5000 if extract_file: # Save extracted file byte_count = 500 fn = f"{request.file_name}_decoded_final" fp = os.path.join(self.working_directory, fn) with open(fp, 'wb') as dcf: dcf.write(clean) self.log.debug( f"Submitted dropped file for analysis: {fp}") request.add_extracted(fp, fn, "Final deobfuscation layer") ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]), body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result) # Display new IOCs from final layer if len(diff_tags) > 0: ioc_new = ResultSection( "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) has_network_heur = False for ty, val in diff_tags.items(): for v in val: if "network" in ty: has_network_heur = True ioc_new.add_line( f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_new.add_tag(ty, v) if has_network_heur: ioc_new.set_heuristic(7) else: ioc_new.set_heuristic(6) if len(self.files_extracted) > 0: ext_file_res = ResultSection( "The following files were extracted during the deobfuscation", heuristic=Heuristic(8), parent=request.result) for f in self.files_extracted: ext_file_res.add_line(os.path.basename(f)) request.add_extracted( f, os.path.basename(f), "File of interest deobfuscated from sample")
def peepdf_analysis(self, temp_filename, file_content, request): file_res = Result() try: res_list = [] # js_stream = [] f_list = [] js_dump = [] pdf_parser = PDFParser() ret, pdf_file = pdf_parser.parse(temp_filename, True, False, file_content) if ret == 0: stats_dict = pdf_file.getStats() if ", ".join(stats_dict['Errors']) == "Bad PDF header, %%EOF not found, PDF sections not found, No " \ "indirect objects found in the body": # Not a PDF return json_body = dict( version=stats_dict['Version'], binary=stats_dict['Binary'], linearized=stats_dict['Linearized'], encrypted=stats_dict['Encrypted'], ) if stats_dict['Encryption Algorithms']: temp = [] for algorithmInfo in stats_dict['Encryption Algorithms']: temp.append(f"{algorithmInfo[0]} {str(algorithmInfo[1])} bits") json_body["encryption_algorithms"] = temp json_body.update(dict( updates=stats_dict['Updates'], objects=stats_dict['Objects'], streams=stats_dict['Streams'], comments=stats_dict['Comments'], errors={True: ", ".join(stats_dict['Errors']), False: "None"}[len(stats_dict['Errors']) != 0] )) res = ResultSection("PDF File Information", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(json_body)) for version in range(len(stats_dict['Versions'])): stats_version = stats_dict['Versions'][version] v_json_body = dict( catalog=stats_version['Catalog'] or "no", info=stats_version['Info'] or "no", objects=self.list_first_x(stats_version['Objects'][1]), ) if stats_version['Compressed Objects'] is not None: v_json_body['compressed_objects'] = self.list_first_x(stats_version['Compressed Objects'][1]) if stats_version['Errors'] is not None: v_json_body['errors'] = self.list_first_x(stats_version['Errors'][1]) v_json_body['streams'] = self.list_first_x(stats_version['Streams'][1]) if stats_version['Xref Streams'] is not None: v_json_body['xref_streams'] = self.list_first_x(stats_version['Xref Streams'][1]) if stats_version['Object Streams'] is not None: v_json_body['object_streams'] = self.list_first_x(stats_version['Object Streams'][1]) if int(stats_version['Streams'][0]) > 0: v_json_body['encoded'] = self.list_first_x(stats_version['Encoded'][1]) if stats_version['Decoding Errors'] is not None: v_json_body['decoding_errors'] = self.list_first_x(stats_version['Decoding Errors'][1]) if stats_version['Objects with JS code'] is not None: v_json_body['objects_with_js_code'] = \ self.list_first_x(stats_version['Objects with JS code'][1]) # js_stream.extend(stats_version['Objects with JS code'][1]) res_version = ResultSection(f"Version {str(version)}", parent=res, body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(v_json_body)) actions = stats_version['Actions'] events = stats_version['Events'] vulns = stats_version['Vulns'] elements = stats_version['Elements'] is_suspicious = False if events is not None or actions is not None or vulns is not None or elements is not None: res_suspicious = ResultSection('Suspicious elements', parent=res_version) if events is not None: for event in events: res_suspicious.add_line(f"{event}: {self.list_first_x(events[event])}") is_suspicious = True if actions is not None: for action in actions: res_suspicious.add_line(f"{action}: {self.list_first_x(actions[action])}") is_suspicious = True if vulns is not None: for vuln in vulns: if vuln in vulnsDict: temp = [vuln, ' ('] for vulnCVE in vulnsDict[vuln]: if len(temp) != 2: temp.append(',') vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE temp.append(vulnCVE) cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE) if cve_found: res_suspicious.add_tag('attribution.exploit', vulnCVE[cve_found.start():cve_found.end()]) res_suspicious.add_tag('file.behavior', vulnCVE[cve_found.start():cve_found.end()]) temp.append('): ') temp.append(str(vulns[vuln])) res_suspicious.add_line(temp) else: res_suspicious.add_line(f"{vuln}: {str(vulns[vuln])}") is_suspicious = True if elements is not None: for element in elements: if element in vulnsDict: temp = [element, ' ('] for vulnCVE in vulnsDict[element]: if len(temp) != 2: temp.append(',') vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE temp.append(vulnCVE) cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE) if cve_found: res_suspicious.add_tag('attribution.exploit', vulnCVE[cve_found.start():cve_found.end()]) res_suspicious.add_tag('file.behavior', vulnCVE[cve_found.start():cve_found.end()]) temp.append('): ') temp.append(str(elements[element])) res_suspicious.add_line(temp) is_suspicious = True else: res_suspicious.add_line(f"\t\t{element}: {str(elements[element])}") is_suspicious = True res_suspicious.set_heuristic(8) if is_suspicious else None urls = stats_version['URLs'] if urls is not None: res.add_line("") res_url = ResultSection('Found URLs', parent=res) for url in urls: res_url.add_line(f"\t\t{url}") res_url.set_heuristic(9) for obj in stats_version['Objects'][1]: cur_obj = pdf_file.getObject(obj, version) if cur_obj.containsJScode: cur_res = ResultSection(f"Object [{obj} {version}] contains {len(cur_obj.JSCode)} " f"block of JavaScript") score_modifier = 0 js_idx = 0 for js in cur_obj.JSCode: sub_res = ResultSection('Block of JavaScript', parent=cur_res) js_idx += 1 js_score = 0 js_code, unescaped_bytes, _, _, _ = analyseJS(js) js_dump += [x for x in js_code] # Malicious characteristics big_buffs = self.get_big_buffs("".join(js_code)) if len(big_buffs) == 1: js_score += 500 * len(big_buffs) if len(big_buffs) > 0: js_score += 500 * len(big_buffs) has_eval, has_unescape = self.check_dangerous_func("".join(js_code)) if has_unescape: js_score += 100 if has_eval: js_score += 100 js_cmt = "" if has_eval or has_unescape or len(big_buffs) > 0: score_modifier += js_score js_cmt = "Suspiciously malicious " cur_res.add_tag('file.behavior', "Suspicious JavaScript in PDF") sub_res.set_heuristic(7) js_res = ResultSection(f"{js_cmt}JavaScript Code (block: {js_idx})", parent=sub_res) if js_score > 0: temp_js_outname = f"object{obj}-{version}_{js_idx}.js" temp_js_path = os.path.join(self.working_directory, temp_js_outname) temp_js_bin = "".join(js_code).encode("utf-8") f = open(temp_js_path, "wb") f.write(temp_js_bin) f.close() f_list.append(temp_js_path) js_res.add_line(f"The JavaScript block was saved as {temp_js_outname}") if has_eval or has_unescape: analysis_res = ResultSection("[Suspicious Functions]", parent=js_res) if has_eval: analysis_res.add_line("eval: This JavaScript block uses eval() function " "which is often used to launch deobfuscated " "JavaScript code.") analysis_res.set_heuristic(3) if has_unescape: analysis_res.add_line("unescape: This JavaScript block uses unescape() " "function. It may be legitimate but it is definitely " "suspicious since malware often use this to " "deobfuscate code blocks.") analysis_res.set_heuristic(3) buff_idx = 0 for buff in big_buffs: buff_idx += 1 error, new_buff = unescape(buff) if error == 0: buff = new_buff if buff not in unescaped_bytes: temp_path_name = None if ";base64," in buff[:100] and "data:" in buff[:100]: temp_path_name = f"obj{obj}_unb64_{buff_idx}.buff" try: buff = b64decode(buff.split(";base64,")[1].strip()) temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(buff) f.close() f_list.append(temp_path) except Exception: self.log.error("Found 'data:;base64, ' buffer " "but failed to base64 decode.") temp_path_name = None if temp_path_name is not None: buff_cond = f" and was resubmitted as {temp_path_name}" else: buff_cond = "" buff_res = ResultSection( f"A {len(buff)} bytes buffer was found in the JavaScript " f"block{buff_cond}. Here are the first 256 bytes.", parent=js_res, body=hexdump(bytes(buff[:256], "utf-8")), body_format=BODY_FORMAT.MEMORY_DUMP) buff_res.set_heuristic(2) processed_sc = [] sc_idx = 0 for sc in unescaped_bytes: if sc not in processed_sc: sc_idx += 1 processed_sc.append(sc) try: sc = sc.decode("hex") except Exception: pass shell_score = 500 temp_path_name = f"obj{obj}_unescaped_{sc_idx}.buff" shell_res = ResultSection(f"Unknown unescaped {len(sc)} bytes JavaScript " f"buffer (id: {sc_idx}) was resubmitted as " f"{temp_path_name}. Here are the first 256 bytes.", parent=js_res) shell_res.set_body(hexdump(sc[:256]), body_format=BODY_FORMAT.MEMORY_DUMP) temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(sc) f.close() f_list.append(temp_path) cur_res.add_tag('file.behavior', "Unescaped JavaScript Buffer") shell_res.set_heuristic(6) score_modifier += shell_score if score_modifier > 0: res_list.append(cur_res) elif cur_obj.type == "stream": if cur_obj.isEncodedStream and cur_obj.filter is not None: data = cur_obj.decodedStream encoding = cur_obj.filter.value.replace("[", "").replace("]", "").replace("/", "").strip() val = cur_obj.rawValue otype = cur_obj.elements.get("/Type", None) sub_type = cur_obj.elements.get("/Subtype", None) length = cur_obj.elements.get("/Length", None) else: data = cur_obj.rawStream encoding = None val = cur_obj.rawValue otype = cur_obj.elements.get("/Type", None) sub_type = cur_obj.elements.get("/Subtype", None) length = cur_obj.elements.get("/Length", None) if otype: otype = otype.value.replace("/", "").lower() if sub_type: sub_type = sub_type.value.replace("/", "").lower() if length: length = length.value if otype == "embeddedfile": if len(data) > 4096: if encoding is not None: temp_encoding_str = f"_{encoding}" else: temp_encoding_str = "" cur_res = ResultSection( f'Embedded file found ({length} bytes) [obj: {obj} {version}] ' f'and dumped for analysis {f"(Type: {otype}) " if otype is not None else ""}' f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}' f'{f"(Encoded with {encoding})" if encoding is not None else ""}' ) temp_path_name = f"EmbeddedFile_{obj}{temp_encoding_str}.obj" temp_path = os.path.join(self.working_directory, temp_path_name) f = open(temp_path, "wb") f.write(data) f.close() f_list.append(temp_path) cur_res.add_line(f"The EmbeddedFile object was saved as {temp_path_name}") res_list.append(cur_res) elif otype not in BANNED_TYPES: cur_res = ResultSection( f'Unknown stream found [obj: {obj} {version}] ' f'{f"(Type: {otype}) " if otype is not None else ""}' f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}' f'{f"(Encoded with {encoding})" if encoding is not None else ""}' ) for line in val.splitlines(): cur_res.add_line(line) emb_res = ResultSection('First 256 bytes', parent=cur_res) first_256 = data[:256] if isinstance(first_256, str): first_256 = first_256.encode() emb_res.set_body(hexdump(first_256), BODY_FORMAT.MEMORY_DUMP) res_list.append(cur_res) else: pass file_res.add_section(res) for results in res_list: file_res.add_section(results) if js_dump: js_dump_res = ResultSection('Full JavaScript dump') temp_js_dump = "javascript_dump.js" temp_js_dump_path = os.path.join(self.working_directory, temp_js_dump) try: temp_js_dump_bin = "\n\n----\n\n".join(js_dump).encode("utf-8") except UnicodeDecodeError: temp_js_dump_bin = "\n\n----\n\n".join(js_dump) temp_js_dump_sha1 = hashlib.sha1(temp_js_dump_bin).hexdigest() f = open(temp_js_dump_path, "wb") f.write(temp_js_dump_bin) f.flush() f.close() f_list.append(temp_js_dump_path) js_dump_res.add_line(f"The JavaScript dump was saved as {temp_js_dump}") js_dump_res.add_line(f"The SHA-1 for the JavaScript dump is {temp_js_dump_sha1}") js_dump_res.add_tag('file.pdf.javascript.sha1', temp_js_dump_sha1) file_res.add_section(js_dump_res) for filename in f_list: request.add_extracted(filename, os.path.basename(filename), f"Dumped from {os.path.basename(temp_filename)}") else: res = ResultSection("ERROR: Could not parse file with PeePDF.") file_res.add_section(res) finally: request.result = file_res try: del pdf_file except Exception: pass try: del pdf_parser except Exception: pass gc.collect()
def execute(self, request): parser = eml_parser.eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) content_str = request.file_contents # Attempt conversion of potential Outlook file -> eml if request.file_type == "document/office/email": try: content_str = msg2eml(request.file_path).as_bytes() except Exception: # Try using mailparser to convert converted_path, _ = msgconvert(request.file_path) content_str = open(converted_path, "rb").read() header_agg = { "From": set(), "To": set(), "Cc": set(), "Sent": set(), "Reply-To": set(), "Date": set() } # Assume this is an email saved in HTML format if request.file_type == "code/html": parsed_html = BeautifulSoup(content_str, "lxml") valid_headers = [ "To:", "Cc:", "Sent:", "From:", "Subject:", "Reply-To:" ] if not parsed_html.body or not any(header in parsed_html.body.text for header in valid_headers): # We can assume this is just an HTML doc (or lacking body), one of which we can't process request.result = Result() return # Can't trust 'Date' to determine the difference between HTML docs vs HTML emails valid_headers.append("Date:") html_email = email.message_from_bytes(content_str) generator_metadata_content = "" for meta in parsed_html.find_all("meta"): if meta.attrs.get("name", None) == "Generator": generator_metadata_content = meta.attrs.get("content", "") break # Process HTML emails generated from Outlook if generator_metadata_content == "Microsoft Word 15": paragraphs = parsed_html.body.find_all("p") # Likely an email that was exported with original email headers if any(header in paragraphs[0] for header in valid_headers): for p in paragraphs: if any(valid_header in p.text for valid_header in valid_headers): h_key, h_value = p.text.replace( "\xa0", "").replace("\r\n", " ").split(":", 1) html_email[h_key] = h_value # Subject line indicates the end of the email header, beginning of body if "Subject" in p.text: break # Process HTML emails from MS Exchange Server or missing top-level headers (aggregate headers) elif (generator_metadata_content == "Microsoft Word 15 (filtered medium)" or generator_metadata_content == "Microsoft Exchange Server" or generator_metadata_content == ""): subject = None for div in parsed_html.find_all("div"): # Header information within divs if any(header in div.text for header in valid_headers ) and "WordSection1" not in div.attrs.get( "class", []): # Usually expect headers to be \n separated in text output but check first if "\n" in div.text: for h in div.text.split("\n"): if any(header in h for header in valid_headers): h_key, h_value = h.split(":", 1) # Implying some malformed message got mixed with the headers of another message if h_key not in valid_headers: for header in valid_headers: if header in h: h_key = header[:-1] # Use the latest message's subject (this maintains FW, RE, etc.) if h_key == "Subject" and not subject: subject = h_value elif h_key != "Subject": header_agg[h_key].add(h_value) # Document was probably not well formatted, so we'll use the headers as delimiters else: header_offset_map = {} # Determine the position of each header for header in list( header_agg.keys()) + ["Subject"]: if header in div.text: header_offset_map[div.text.index( header)] = header # Use the positions and length of header name to determine an offset for i in range(len(header_offset_map)): sorted_keys = sorted(header_offset_map.keys()) header_name = header_offset_map[sorted_keys[i]] offset = len( f"{header_name}: ") + sorted_keys[i] value = (div.text[offset:sorted_keys[i + 1]] if i < len(header_offset_map) - 1 else div.text[offset:]) if header_name == "Subject": subject = value else: header_agg[header_name].add(value) # Assign aggregated info to email object html_email["Subject"] = subject for key, value in header_agg.items(): html_email[key] = "; ".join(value) content_str = html_email.as_bytes() parsed_eml = parser.decode_email_bytes(content_str) result = Result() header = parsed_eml["header"] if "from" in header or "to" in header: all_uri = set() body_words = set(extract_passwords(header["subject"])) for body_counter, body in enumerate(parsed_eml["body"]): body_text = BeautifulSoup(body["content"]).text body_words.update(extract_passwords(body_text)) if request.get_param("extract_body_text"): fd, path = mkstemp() with open(path, "w") as f: f.write(body["content"]) os.close(fd) request.add_extracted(path, "body_" + str(body_counter), "Body text") if "uri" in body: for uri in body["uri"]: all_uri.add(uri) # Words in the email body, used by extract to guess passwords request.temp_submission_data["email_body"] = list(body_words) kv_section = ResultSection("Email Headers", body_format=BODY_FORMAT.KEY_VALUE, parent=result) # Basic tags from_addr = header["from"].strip() if header.get("from", None) else None if from_addr and re.match(EMAIL_REGEX, from_addr): kv_section.add_tag("network.email.address", from_addr) [ kv_section.add_tag("network.email.address", to.strip()) for to in header["to"] if re.match(EMAIL_REGEX, to.strip()) ] kv_section.add_tag("network.email.date", str(header["date"]).strip()) subject = header["subject"].strip() if header.get("subject", None) else None if subject: kv_section.add_tag("network.email.subject", subject) # Add CCs to body and tags if "cc" in header: [ kv_section.add_tag("network.email.address", cc.strip()) for cc in header["cc"] if re.match(EMAIL_REGEX, cc.strip()) ] # Add Message ID to body and tags if "message-id" in header["header"]: kv_section.add_tag("network.email.msg_id", header["header"]["message-id"][0].strip()) # Add Tags for received IPs if "received_ip" in header: for ip in header["received_ip"]: ip = ip.strip() try: if isinstance(ip_address(ip), IPv4Address): kv_section.add_tag("network.static.ip", ip) except ValueError: pass # Add Tags for received Domains if "received_domain" in header: for dom in header["received_domain"]: kv_section.add_tag("network.static.domain", dom.strip()) # If we've found URIs, add them to a section if len(all_uri) > 0: uri_section = ResultSection("URIs Found:", parent=result) for uri in all_uri: uri_section.add_line(uri) uri_section.add_tag("network.static.uri", uri.strip()) parsed_url = urlparse(uri) if parsed_url.hostname and re.match( IP_ONLY_REGEX, parsed_url.hostname): uri_section.add_tag("network.static.ip", parsed_url.hostname) else: uri_section.add_tag("network.static.domain", parsed_url.hostname) # Bring all headers together... extra_header = header.pop("header", {}) header.pop("received", None) header.update(extra_header) # Convert to common format header["date"] = [self.json_serial(header["date"])] # Replace with aggregated date(s) if any available if header_agg["Date"]: # Replace if any( default_date in header["date"] for default_date in ["1970-01-01T00:00:00", "Thu, 01 Jan 1970 00:00:00 +0000" ]): header["date"] = list(header_agg["Date"]) # Append else: header["date"] += list(header_agg["Date"]) (kv_section.add_tag("network.email.date", str(date).strip()) for date in header_agg["Date"]) # Filter out useless headers from results self.log.debug(header.keys()) [header.pop(h) for h in self.header_filter if h in header.keys()] kv_section.set_body(json.dumps(header, default=self.json_serial)) attachments_added = [] if "attachment" in parsed_eml: attachments = parsed_eml["attachment"] for attachment in attachments: fd, path = mkstemp() with open(path, "wb") as f: f.write(base64.b64decode(attachment["raw"])) os.close(fd) try: if request.add_extracted( path, attachment["filename"], "Attachment ", safelist_interface=self.api_interface): attachments_added.append(attachment["filename"]) except MaxExtractedExceeded: self.log.warning( f"Extract limit reached on attachments: " f"{len(attachment) - len(attachments_added)} not added" ) break ResultSection("Extracted Attachments:", body="\n".join([x for x in attachments_added]), parent=result) if request.get_param("save_emlparser_output"): fd, temp_path = tempfile.mkstemp(dir=self.working_directory) attachments = parsed_eml.get("attachment", []) # Remove raw attachments, all attachments up to MaxExtractedExceeded already extracted for attachment in attachments: _ = attachment.pop("raw", None) with os.fdopen(fd, "w") as myfile: myfile.write( json.dumps(parsed_eml, default=self.json_serial)) request.add_supplementary( temp_path, "parsing.json", "These are the raw results of running GOVCERT-LU's eml_parser" ) else: self.log.warning( "emlParser could not parse EML; no useful information in result's headers" ) request.result = result
def run_badging_analysis(self, apk_file: str, result: Result): badging_args = ['d', 'badging', apk_file] badging, errors = self.run_appt(badging_args) if not badging: return res_badging = ResultSection("Android application details") libs = [] permissions = [] components = [] features = [] pkg_version = None for line in badging.splitlines(): if line.startswith("package:"): pkg_name = line.split("name='")[1].split("'")[0] pkg_version = line.split("versionCode='")[1].split("'")[0] res_badging.add_line(f"Package: {pkg_name} v.{pkg_version}") res_badging.add_tag('file.apk.pkg_name', pkg_name) res_badging.add_tag('file.apk.app.version', pkg_version) if line.startswith("sdkVersion:"): min_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Min SDK: {min_sdk}") res_badging.add_tag('file.apk.sdk.min', min_sdk) if line.startswith("targetSdkVersion:"): target_sdk = line.split(":'")[1][:-1] res_badging.add_line(f"Target SDK: {target_sdk}") res_badging.add_tag('file.apk.sdk.target', target_sdk) if line.startswith("application-label:"): label = line.split(":'")[1][:-1] res_badging.add_line(f"Default Label: {label}") res_badging.add_tag('file.apk.app.label', label) if line.startswith("launchable-activity:"): launch = line.split("name='")[1].split("'")[0] res_badging.add_line(f"Launchable activity: {launch}") res_badging.add_tag('file.apk.activity', launch) if line.startswith("uses-library-not-required:"): lib = line.split(":'")[1][:-1] if lib not in libs: libs.append(lib) if line.startswith("uses-permission:") or line.startswith("uses-implied-permission:"): perm = line.split("name='")[1].split("'")[0] if perm not in permissions: permissions.append(perm) if line.startswith("provides-component:"): component = line.split(":'")[1][:-1] if component not in components: components.append(component) if "uses-feature:" in line or "uses-implied-feature:" in line: feature = line.split("name='")[1].split("'")[0] if feature not in features: features.append(feature) if pkg_version is not None: pkg_version = int(pkg_version) if pkg_version < 15: ResultSection("Package version is suspiciously low", parent=res_badging, heuristic=Heuristic(17)) elif pkg_version > 999999999: ResultSection("Package version is suspiciously high", parent=res_badging, heuristic=Heuristic(17)) if libs: res_lib = ResultSection("Libraries used", parent=res_badging) for lib in libs: res_lib.add_line(lib) res_lib.add_tag('file.apk.used_library', lib) if permissions: res_permissions = ResultSection("Permissions used", parent=res_badging) dangerous_permissions = [] unknown_permissions = [] for perm in permissions: if perm in ALL_ANDROID_PERMISSIONS: if 'dangerous' in ALL_ANDROID_PERMISSIONS[perm]: dangerous_permissions.append(perm) else: res_permissions.add_line(perm) res_permissions.add_tag('file.apk.permission', perm) else: unknown_permissions.append(perm) if len(set(permissions)) < len(permissions): ResultSection("Some permissions are defined more then once", parent=res_badging, heuristic=Heuristic(18)) if dangerous_permissions: res_dangerous_perm = ResultSection("Dangerous permissions used", parent=res_badging, heuristic=Heuristic(4)) for perm in dangerous_permissions: res_dangerous_perm.add_line(perm) res_dangerous_perm.add_tag('file.apk.permission', perm) if unknown_permissions: res_unknown_perm = ResultSection("Unknown permissions used", parent=res_badging, heuristic=Heuristic(5)) for perm in unknown_permissions: res_unknown_perm.add_line(perm) res_unknown_perm.add_tag('file.apk.permission', perm) if features: res_features = ResultSection("Features used", parent=res_badging) for feature in features: res_features.add_line(feature) res_features.add_tag('file.apk.feature', feature) if components: res_components = ResultSection("Components provided", parent=res_badging) for component in components: res_components.add_line(component) res_components.add_tag('file.apk.provides_component', component) result.add_section(res_badging)
def execute(self, request): """Main Module. See README for details.""" max_size = self.config.get('MAX_PDF_SIZE', 3000000) request.result = result = Result() if (os.path.getsize(request.file_path) or 0) < max_size or request.deep_scan: path = request.file_path working_dir = self.working_directory # CALL PDFID and identify all suspicious keyword streams additional_keywords = self.config.get('ADDITIONAL_KEYS', []) heur = deepcopy(self.config.get('HEURISTICS', [])) all_errors = set() res_txt = "Main Document Results" res, contains_objstms, errors = self.analyze_pdf( request, res_txt, path, working_dir, heur, additional_keywords) result.add_section(res) for e in errors: all_errors.add(e) # ObjStms: Treat all ObjStms like a standalone PDF document if contains_objstms: objstm_files = self.analyze_objstm(path, working_dir, request.deep_scan) obj_cnt = 1 for osf in objstm_files: parent_obj = os.path.basename(osf).split("_")[1] res_txt = "ObjStream Object {0} from Parent Object {1}".format( obj_cnt, parent_obj) # It is going to look suspicious as the service created the PDF heur = [ x for x in heur if 'plugin_suspicious_properties' not in x and 'plugin_embeddedfile' not in x and 'plugin_nameobfuscation' not in x ] res, contains_objstms, errors = self.analyze_pdf( request, res_txt, osf, working_dir, heur, additional_keywords, get_malform=False) obj_cnt += 1 result.add_section(res) if len(all_errors) > 0: erres = ResultSection(title_text="Errors Analyzing PDF") for e in all_errors: erres.add_line(e) result.add_section(erres) else: section = ResultSection( "PDF Analysis of the file was skipped because the file is too big (limit is 3 MB)." ) section.set_heuristic(10) result.add_section(section)