def parse_alerts(self, alerts): res = Result() line_count = 0 newline_count = 0 content = "" yml_indicator = "" xml_hits = ResultSection(title_text='xml Malware Indicator Match') if os.stat("/opt/al/pkg/al_services/alsvc_beach/alerts_generated.txt" ).st_size == 0: # Result file is empty, nothing to report return res for line in alerts: # Otherwise we iterate through each line to read the required information if line != "\n": line_count += 1 if line_count == 1: yml_indicator = line else: content += line + "\n" elif line_count == 0: newline_count += 1 else: newline_count = 0 xml_hits.add_section( XmlResultObject(yml_indicator, content, SCORE.VHIGH)) content = "" line_count = 0 res.add_result(xml_hits) return res
def rip_mhtml(self, data): if self.task.tag != 'document/office/mhtml': return mime_res = ResultSection(score=500, title_text="ActiveMime Document(s) in multipart/related") mhtml = email.message_from_string(data) # find all the attached files: for part in mhtml.walk(): content_type = part.get_content_type() if content_type == "application/x-mso": part_data = part.get_payload(decode=True) if len(part_data) > 0x32 and part_data[:10].lower() == "activemime": try: part_data = zlib.decompress(part_data[0x32:]) # Grab the zlib-compressed data part_filename = part.get_filename(None) or hashlib.sha256(part_data).hexdigest() part_path = os.path.join(self.working_directory, part_filename) with open(part_path, 'w') as fh: fh.write(part_data) try: mime_res.add_line(part_filename) self.request.add_extracted(part_path, "ActiveMime x-mso from multipart/related.") except Exception as e: self.log.error("Error submitting extracted file: {}".format(e)) except Exception as e: self.log.debug("Could not decompress ActiveMime part: {}".format(e)) if len(mime_res.body) > 0: self.ole_result.add_section(mime_res)
def run_pdfparser(filename, request): file_res = request.result oPDFParser = None try: oPDFParser = cPDFParser(filename, False, None) cntComment = 0 cntXref = 0 cntTrailer = 0 cntStartXref = 0 cntIndirectObject = 0 dicObjectTypes = {} while True: pdf_obj = oPDFParser.GetObject() if pdf_obj is not None: if pdf_obj.type == PDF_ELEMENT_COMMENT: cntComment += 1 elif pdf_obj.type == PDF_ELEMENT_XREF: cntXref += 1 elif pdf_obj.type == PDF_ELEMENT_TRAILER: cntTrailer += 1 elif pdf_obj.type == PDF_ELEMENT_STARTXREF: cntStartXref += 1 elif pdf_obj.type == PDF_ELEMENT_INDIRECT_OBJECT: cntIndirectObject += 1 obj_type = pdf_obj.GetType() if obj_type not in dicObjectTypes: dicObjectTypes[obj_type] = [pdf_obj.id] else: dicObjectTypes[obj_type].append(pdf_obj.id) else: break stats_output = 'Comment: %s\nXREF: %s\nTrailer: %s\nStartXref: %s\nIndirect pdf_obj: %s\n' % \ (cntComment, cntXref, cntTrailer, cntStartXref, cntIndirectObject) names = dicObjectTypes.keys() names.sort() for key in names: stats_output = "%s %s %d: %s\n" % ( stats_output, key, len(dicObjectTypes[key]), ', '.join( map(lambda x: '%d' % x, dicObjectTypes[key]))) stats_hash = hashlib.sha1(stats_output).hexdigest() file_res.add_tag(TAG_TYPE['PDF_STATS_SHA1'], stats_hash, TAG_WEIGHT['MED']) if file_res.score > 0 or request.deep_scan: res = ResultSection(SCORE['NULL'], "PDF-parser --stats output:", body_format=TEXT_FORMAT.MEMORY_DUMP) for line in stats_output.splitlines(): res.add_line(line) file_res.add_section(res) finally: if oPDFParser is not None: oPDFParser.CloseOpenFiles()
def lookup_upatre_downloader(response): result = response.get('upatre', None) if not result: return None result = result[0] r_section = ResultSection(title_text='Upatre activity') r_section.score = SCORE.VHIGH r_section.add_line('The file %s decodes to %s using XOR key %s' % (result['firstSeen'], result['decrypted_md5'], result['decryption_key'])) return r_section
def lookup_callouts(response): results = response.get('callout', None) if not results: return None, [] tags = [] r_section = ResultSection(title_text='Sandbox Call-Outs') r_section.score = SCORE.HIGH analyser = '' r_sub_section = None for result in results[:10]: if analyser != result['analyser']: title = '%s (Analysed on %s)' % (result['analyser'], result['date']) r_sub_section = ResultSection(title_text=title, parent=r_section) analyser = result['analyser'] channel = result['request'] if channel is not None: channel = "(%s)" % channel.split('~~')[0] else: channel = "" r_sub_section.add_line("{0:s}:{1:d}{2:s}".format( result['callout'], result['port'], channel)) try: p1, p2, p3, p4 = result['callout'].split(".") if int(p1) <= 255 and int(p2) <= 255 and int( p3) <= 255 and int(p4) <= 255: tags.append( Tag(TAG_TYPE.NET_IP, result['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) except ValueError: tags.append( Tag(TAG_TYPE.NET_DOMAIN_NAME, result['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) if result['port'] != 0: tags.append( Tag(TAG_TYPE.NET_PORT, str(result['port']), TAG_WEIGHT.MED, context=Context.BEACONS)) if len(results) > 10: r_section.add_line("And %s more..." % str(len(results) - 10)) return r_section, tags
def lookup_source(response): result = response.get('source', None) if not result: return None if result['count'] > 0: r_section = ResultSection(title_text='File Frequency') r_section.score = SCORE.NULL r_section.add_line('First Seen: %s' % result['first_seen']) r_section.add_line('Last Seen: %s' % result['last_seen']) r_section.add_line('Source Count: %d' % result['count']) return r_section
def execute(self, request): local = request.download() al_result = Result() command = self.construct_command(request) request.task.set_milestone("started", True) extract_section = ResultSection(SCORE.NULL, 'Extracted and Carved Files') for module in binwalk.scan(local, **command): section = ResultSection(SCORE.NULL, module.name, body_format=TEXT_FORMAT.MEMORY_DUMP) for result in module.results: section.add_line("0x%.8X : %s" % (result.offset, result.description)) if(module.extractor.output.has_key(result.file.path)): if module.extractor.output[result.file.path].carved.has_key(result.offset): extract_section.add_line("Carved data from offset 0x%X to %s" % (result.offset, module.extractor.output[result.file.path].carved[result.offset])) file_name = module.extractor.output[result.file.path].carved[result.offset].split("/")[-1] request.add_extracted(module.extractor.output[result.file.path].carved[result.offset], 'Carved File', file_name) if module.extractor.output[result.file.path].extracted.has_key(result.offset) and \ len(module.extractor.output[result.file.path].extracted[result.offset].files) > 0: path = module.extractor.output[result.file.path].extracted[result.offset].files[0] extract = module.extractor.output[result.file.path].extracted[result.offset].command extract_section.add_line("Extracted %d files from offset 0x%X to '%s' using '%s'" % ( len(module.extractor.output[result.file.path].extracted[result.offset].files), result.offset, path, extract)) if(os.path.isdir(path)): file = zipfile.ZipFile("%s.zip" % path.split("/")[-1], 'w', zipfile.ZIP_DEFLATED) self.zip_dir(path, file) file.close() request.add_supplementary(file.filename, extract, file.filename.split("/")[-1]) else: request.add_extracted(path, extract, path.split("/")[-1]) al_result.add_section(section) request.task.set_milestone("finished", True) al_result.add_section(extract_section) request.result = al_result
def _report_embedded_xdp(self, file_res, chunk_number, binary, leftover): file_res.add_section( ResultSection( SCORE['INFO'], ["Found %s " % chunk_number, "Embedded PDF (in XDP)"])) file_res.add_tag(TAG_TYPE['FILE_SUMMARY'], "Embedded PDF (in XDP)", 10, 'IDENTIFICATION')
def execute(self, request): request.result = Result() temp_filename = request.download() filename = os.path.basename(temp_filename) with open(temp_filename, 'r') as f: file_content = f.read() if '<xdp:xdp' in file_content: self.find_xdp_embedded(filename, file_content, request) if len(file_content) < 3000000: pdf = PDFiD2String(PDF_iD(temp_filename, False, True, False), False) if pdf: if self.parse_pdfid(pdf, request): self.run_pdfparser(temp_filename, request) else: # a file too big error message would be better but, this will do for now. request.result.add_section( ResultSection( SCORE['NULL'], "PDF Analysis of the file was" " skipped because the file is " "too big (limit is 3 MB)."))
def execute(self, request): request.result = Result() self.file_res = request.result self.path = request.download() filename = os.path.basename(self.path) self.request = request self.pe_file = None self._sect_list = None self.entropy_warning = False self.unexpected_sname = [] self.import_hash = None self.filename = filename self.print_slack = True self.patch_section = None self.filesize_from_peheader = -1 with open(self.path, 'r') as f: file_content = f.read() try: self.pe_file = pefile.PE(data=file_content) except pefile.PEFormatError, e: if e.value != "DOS Header magic not found.": res = ResultSection(SCORE['HIGH'], [ "WARNING: this file looks like a PE but failed loading inside PE file. [", e.value, "]" ]) self.file_res.add_section(res) self.log.debug(e)
def parse_results(self, response): res = Result() response = response.get('results', response) if response is not None and response.get('response_code') == 204: message = "You exceeded the public API request rate limit (4 requests of any nature per minute)" raise VTException(message) elif response is not None and response.get('response_code') == 203: message = "You tried to perform calls to functions for which you require a Private API key." raise VTException(message) elif response is not None and response.get('response_code') == 1: av_hits = ResultSection(title_text='Anti-Virus Detections') url_section = ResultSection( SCORE.NULL, 'Virus total report permalink', self.SERVICE_CLASSIFICATION, body_format=TEXT_FORMAT.URL, body=json.dumps({"url": response.get('permalink')})) res.add_section(url_section) scans = response.get('scans', response) av_hits.add_line('Found %d AV hit(s) from %d scans.' % (response.get('positives'), response.get('total'))) for majorkey, subdict in sorted(scans.iteritems()): if subdict['detected']: virus_name = subdict['result'] res.append_tag(VirusHitTag(virus_name, context="scanner:%s" % majorkey)) av_hits.add_section(AvHitSection(majorkey, virus_name, SCORE.SURE)) res.add_result(av_hits) return res
def parse_results(self, response): res = Result() response = response.get('results', response) if response is not None and response.get('response_code') == 1: av_hits = ResultSection(title_text='Anti-Virus Detections') url_section = ResultSection( SCORE.NULL, 'Virus total report permalink', self.SERVICE_CLASSIFICATION, body_format=TEXT_FORMAT.URL, body=json.dumps({"url": response.get('permalink')})) res.add_section(url_section) scans = response.get('scans', response) av_hits.add_line( 'Found %d AV hit(s) from %d scans.' % (response.get('positives'), response.get('total'))) for majorkey, subdict in sorted(scans.iteritems()): if subdict['detected']: virus_name = subdict['result'] res.append_tag( VirusHitTag(virus_name, context="scanner:%s" % majorkey)) av_hits.add_section( AvHitSection(majorkey, virus_name, SCORE.SURE)) res.add_result(av_hits) return res
def create_macro_sections(self, request_hash): # noinspection PyBroadException try: filtered_macros = [] if len(self.all_macros) > 0: # noinspection PyBroadException try: # first sort all analyzed macros by their relative score, highest first self.all_macros.sort(key=attrgetter('macro_score'), reverse=True) # then only keep, theoretically, the most interesting ones filtered_macros = self.all_macros[0:min(len(self.all_macros), self.MAX_MACRO_SECTIONS)] except: self.log.debug("Sort and filtering of macro scores failed, " "reverting to full list of extracted macros") filtered_macros = self.all_macros else: self.ole_result.add_section(ResultSection(SCORE.NULL, "No interesting macros found.")) for macro in filtered_macros: if macro.macro_score >= self.MIN_MACRO_SECTION_SCORE: self.ole_result.add_section(macro.macro_section) # Create extracted file for all VBA script. if len(self.all_vba) > 0: vba_file_path = "" all_vba = "\n".join(self.all_vba) vba_all_sha256 = hashlib.sha256(all_vba).hexdigest() if vba_all_sha256 == request_hash: return try: vba_file_path = os.path.join(self.working_directory, vba_all_sha256) with open(vba_file_path, 'w') as fh: fh.write(all_vba) self.request.add_extracted(vba_file_path, "vba_code", "all_vba_%s.vba" % vba_all_sha256[:7]) except Exception as e: self.log.error("Error while adding extracted" " macro: {}: {}".format(vba_file_path, str(e))) except Exception as e: self.log.debug("OleVBA VBA_Parser.detect_vba_macros failed: {}".format(e)) section = ResultSection(SCORE.NULL, "OleVBA : Error parsing macros: {}".format(e)) self.ole_result.add_section(section)
def execute(self, request): if not self.rules: return self.task = request.task local_filename = request.download() yara_externals = {} for k, i in self.get_yara_externals.iteritems(): # Check default request.task fields try: sval = self.task.get(i) except: sval = None if not sval: # Check metadata dictionary smeta = self.task.metadata if not smeta: sval = smeta.get(i, None) if not sval: # Check params dictionary smeta = self.task.params if not smeta: sval = smeta.get(i, None) # Create dummy value if item not found if not sval: sval = i yara_externals[k] = sval with self.initialization_lock: try: matches = self.rules.match(local_filename, externals=yara_externals) self.counters[RULE_HITS] += len(matches) request.result = self._extract_result_from_matches(matches) except Exception as e: if e.message != "internal error: 30": raise else: self.log.warning("Yara internal error 30 detected on submission {}" .format(self.task.sid)) section = ResultSection(title_text="Yara scan not completed.") section.add_line("File returned too many matches with current rule set and Yara exited.") result = Result() request.result = result result.add_result(section)
def execute(self, request): # We have the sha1 digest in the task object so there is no need to # fetch the sample for NSRL execution. cur_result = Result() try: dbresults = self.connection.query(request.sha1) except NSRLDatasource.DatabaseException: raise RecoverableError("Query failed") # If we found a result in the NSRL database, drop this task as we don't want to process it further. if dbresults: request.drop() benign = "This file was found in the NSRL database. It is not malware." res = ResultSection(title_text=benign) res.score = SCORE.NOT for dbresult in dbresults[:10]: res.add_line(dbresult[0] + " - %s (%s) - v: %s - by: %s [%s]" % (dbresult[1], dbresult[2], dbresult[3], dbresult[4], dbresult[5])) if len(dbresults) > 10: res.add_line("And %s more..." % str(len(dbresults) - 10)) cur_result.add_section(res) request.result = cur_result
def process_debug(debug, al_result, classification): failed = False if 'errors' in debug: error_res = ResultSection(title_text='Analysis Errors', classification=classification) for error in debug['errors']: err_str = str(error) err_str = err_str.lower() if err_str is not None and len(err_str) > 0: # Timeouts - ok, just means the process never exited # Start Error - probably a corrupt file.. # Initialization Error - restart the docker container error_res.add_line(error) if "analysis hit the critical timeout" not in err_str and \ "Unable to execute the initial process" not in err_str: raise RecoverableError("An error prevented cuckoo from " "generating complete results: %s" % safe_str(error)) if len(error_res.body) > 0: al_result.add_section(error_res) return failed
def apply_parser(self, config_parser, request, hit, content): result = request.result # if the config_parser statisfies the prerequisite... if config_parser.accept(request, hit, content): # Attempt to parse config. parsed_configs = [] try: parsed_configs = config_parser.parse(request, hit, content) except: # pylint: disable=W0702 self.log.exception("Parse failure:") failed = set() for parsed in parsed_configs: try: if type( parsed ) == configparser.NullParsedConfig and parsed.name not in failed: failed.add(parsed.name) section = ResultSection( SCORE['LOW'], "Configuration identified for %s but " "was not successfully parsed!" % parsed.name, parsed.classification) else: section = ResultSection(SCORE['SURE'], [ parsed.name, " configuration successfully parsed." ], parsed.classification) result.add_tag(TAG_TYPE['FILE_CONFIG'], parsed.name, TAG_WEIGHT['HIGH'], classification=parsed.classification) # Add parsed config to the report. parsed.report(request, section, self) if section: result.add_section(section) except: # pylint: disable=W0702 self.log.exception("Parse failure:")
def execute(self, request): result = Result() continue_after_extract = request.get_param('continue_after_extract') self._last_password = None local = request.download() password_protected = False white_listed = 0 try: password_protected, white_listed = self.extract(request, local) except ExtractMaxExceeded, e: result.add_section(ResultSection(score=SCORE["NULL"], title_text=str(e)))
def execute(self, request): request.result = Result() uresult = self._unpack(request, ['upx']) if uresult.ok and uresult.localpath: request.add_extracted(uresult.localpath, 'Unpacked from %s' % request.srl, display_name=uresult.displayname) request.result.add_section( ResultSection( SCORE.NULL, "%s successfully unpacked!" % (os.path.basename(uresult.displayname)), self.SERVICE_CLASSIFICATION))
def execute(self, request): local = request.download() self.result = request.result #Start construction of CLI string local_dir = os.path.dirname( os.path.realpath(__file__)) + '/Manalyze/bin' os.chdir(local_dir) cmdLine = ['./manalyze', local, '-o', 'json', '-d', 'all', '--hashes'] self.construct_plugins(cmdLine) try: result_section = self.parse(output=subprocess.check_output( cmdLine, preexec_fn=set_death_signal())) except: result_section = ResultSection(SCORE.NULL, "Summary") result_section.add_line(subprocess.check_output(cmdLine)) result_section.add_line("JSON Decoding Failed!") raise result = Result() result.add_section(result_section) # result.add_section(test_section) request.result = result
def archive_with_executables(request, result): if len(request.extracted) == 1 and \ os.path.splitext(request.extracted[0].display_name)[1].lower() in Extract.LAUNCHABLE_EXTENSIONS: new_section = ResultSection(SCORE.VHIGH, "Archive file with single executable inside. Potentially malicious...") result.add_section(new_section) result.add_tag(TAG_TYPE['FILE_SUMMARY'], "Archived Single Executable", TAG_WEIGHT['MED']) else: for extracted in request.extracted: if os.path.splitext(extracted.display_name)[1].lower() in Extract.LAUNCHABLE_EXTENSIONS: result.add_tag(TAG_TYPE['FILE_SUMMARY'], "Executable Content in Archive", TAG_WEIGHT['MED']) break
def parse_pdfid(self, pdfid_output, request): file_res = request.result res = ResultSection(SCORE['NULL'], "PDF_iD output:", body_format=TEXT_FORMAT.MEMORY_DUMP) for line in pdfid_output.splitlines(): if not self.parse_line(line, file_res, res): return False self._report_section(file_res, res, request) return True
def check_for_indicators(self, filename): # noinspection PyBroadException try: ole_id = OleID(filename) indicators = ole_id.check() for indicator in indicators: # ignore these OleID indicators, they aren't all that useful if indicator.id in ("ole_format", "has_suminfo",): continue indicator_score = SCORE.LOW # default to LOW if indicator.value is True: if indicator.id in ("word", "excel", "ppt", "visio"): # good to know that the filetypes have been detected, but not a score-able offense indicator_score = SCORE.NULL section = ResultSection(indicator_score, "OleID indicator : " + indicator.name) if indicator.description: section.add_line(indicator.description) self.ole_result.add_section(section) except: self.log.debug("OleID analysis failed")
def check_for_macros(self, filename, file_contents, request_hash): # noinspection PyBroadException try: vba_parser = VBA_Parser(filename=filename, data=file_contents) try: if vba_parser.detect_vba_macros(): self.ole_result.add_tag(TAG_TYPE.TECHNIQUE_MACROS, "Contains VBA Macro(s)", weight=TAG_WEIGHT.LOW, usage=TAG_USAGE.IDENTIFICATION) try: for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_macros(): if vba_code.strip() == '': continue vba_code_sha256 = hashlib.sha256(vba_code).hexdigest() if vba_code_sha256 == request_hash: continue self.all_vba.append(vba_code) macro_section = self.macro_section_builder(vba_code) toplevel_score = self.calculate_nested_scores(macro_section) self.all_macros.append(Macro(vba_code, vba_code_sha256, macro_section, toplevel_score)) except Exception as e: self.log.debug("OleVBA VBA_Parser.extract_macros failed: {}".format(str(e))) section = ResultSection(SCORE.NULL, "OleVBA : Error extracting macros") self.ole_result.add_section(section) except Exception as e: self.log.debug("OleVBA VBA_Parser.detect_vba_macros failed: {}".format(e)) section = ResultSection(SCORE.NULL, "OleVBA : Error parsing macros: {}".format(e)) self.ole_result.add_section(section) except: self.log.debug("OleVBA VBA_Parser constructor failed, may not be a supported OLE document")
def parse_results(self, response): res = Result() response = response.get('scan_results', response) virus_name = "" if response is not None and response.get('progress_percentage') == 100: hit = False av_hits = ResultSection(title_text='Anti-Virus Detections') scans = response.get('scan_details', response) for majorkey, subdict in sorted(scans.iteritems()): score = SCORE.NULL if subdict['scan_result_i'] == 1: virus_name = subdict['threat_found'] if virus_name: score = SCORE.SURE elif subdict['scan_result_i'] == 2: virus_name = subdict['threat_found'] if virus_name: score = SCORE.VHIGH if score: virus_name = virus_name.replace("a variant of ", "") engine = self.engine_map[self._format_engine_name( majorkey)] res.append_tag( VirusHitTag(virus_name, context="scanner:%s" % majorkey)) av_hits.add_section( AvHitSection(majorkey, virus_name, engine, score)) hit = True if hit: res.add_result(av_hits) return res
def parse(self, output=None): data = json.loads(str(output)) parent_section = ResultSection(SCORE.NULL, "Manalyze Results:") for name, level2 in data.iteritems(): # Skip the first level (Its the filename) for key, value in level2.iteritems(): section = ResultSection(SCORE.NULL, key) self.recurse_dict(value, section) if section.body.count("\n") > 25: section.body_format = TEXT_FORMAT.MEMORY_DUMP parent_section.add_section(section) return parent_section
def execute(self, request): file_path = request.download() filename = os.path.basename(file_path) bndb = os.path.join(self.working_directory, "%s.bndb" % filename) disas = os.path.join(self.working_directory, filename) self.clean_structures() if request.tag.startswith("executable/windows/"): self.bv = binaryninja.BinaryViewType['PE'].open(file_path) else: return if self.bv is None: return result = Result() self.bv.update_analysis_and_wait() # Preparation self.linear_sweep() self.preprocess() self.symbol_usage() self.process_target_functions() # Check Signatures for sig in self.sigs: results = {} self.check_api_sig(sig, results) if len(results) > 0: for res in results: rn = "%s - %s" % (results[res].name.split("-A")[0], sig['name']) section = ResultSection(sig['score'], rn) if res in self.processed: fn = "%s_%s" % (disas, rn.replace(" ", "_")) with open(fn, "wb") as fp: fp.write("\n".join("%s" % l for l in self.processed[res])) request.add_supplementary( fn, "Linear Disassembly of Matched Function", rn + ".disas") results[res].name = rn result.add_section(section) # Finalize Results and Store BNDB self.bv.create_database(bndb) request.add_supplementary(bndb, "Binary Ninja DB", filename + ".bndb") section = ResultSection(self.apiscore, "Target Symbols X-refs") for sym in sorted(self.used_syms.items(), key=lambda x: x[1], reverse=True): section.add_line("%d\t%s" % (sym[1], sym[0])) result.add_section(section) request.result = result self.clean_structures()
def execute(self, request): request.result = Result() local_filename = request.download() with open(local_filename) as f: file_content = f.read() request.set_service_context(self._av_info) max_retry = 2 done = False retry = 0 while not done: # If this is a retry, sleep for a second if retry: # Sleep between 1 and 3 seconds times the number of retry time.sleep(retry * random.randrange(100, 300, 1) / float(100)) output = self.icap.scan_data(file_content) ret = self.parse_results(output, request.result, local_filename) if ret in [201, 204]: done = True elif ret == 500: # Symantec often 500's on truncated zips and other formats. It tries to decompress/parse # them and can't proceed. request.result.add_section( ResultSection(SCORE.NULL, 'Symantec could not scan this file.')) done = True elif ret == 551: if retry == max_retry: raise Exception("[FAILED %s times] Resources unvailable" % max_retry) else: self.log.info("Resource unavailable... retrying") retry += 1 elif ret == 558: raise Exception( "Could not scan file, Symantec license is expired!") elif ret == 100: raise Exception("Could not find response from icap service, " "response header %s" % output.partition("\r")[0]) else: raise Exception("Unknown return code from symantec: %s" % ret) return
def execute(self, request): path = request.download() with open(path, 'rb') as fin: (entropy, part_entropies) = calculate_partition_entropy(fin) entropy_graph_data = { 'type': 'colormap', 'data': { 'domain': [0, 8], 'values': part_entropies } } section = ResultSection(SCORE.NULL, 'Entropy.\tEntire File: {}'.format( round(entropy, 3)), self.SERVICE_CLASSIFICATION, body_format=TEXT_FORMAT.GRAPH_DATA, body=json.dumps(entropy_graph_data)) result = Result() result.add_section(section) request.result = result
def execute(self, request): request.result = Result() temp_filename = request.download() # Filter out large documents if os.path.getsize(temp_filename) > self.max_pdf_size: request.result.add_section(ResultSection(SCORE['NULL'], "PDF Analysis of the file was skipped because the " "file is too big (limit is %i MB)." % ( self.max_pdf_size / 1000 / 1000))) return filename = os.path.basename(temp_filename) # noinspection PyUnusedLocal file_content = '' with open(temp_filename, 'r') as f: file_content = f.read() if '<xdp:xdp' in file_content: self.find_xdp_embedded(filename, file_content, request) self.peepdf_analysis(temp_filename, file_content, request)
def get_signify(file_handle, res, log = None): if log == None: log = logging.getLogger("get_signify") else: log = log.getChild("get_signify") # first, let's try parsing the file try: s_data = signed_pe.SignedPEFile(file_handle) except Exception as e: log.error("Error parsing. May not be a valid PE? Traceback: %s" % traceback.format_exc()) # Now try checking for verification try: s_data.verify() # signature is verified res.add_section(ResultSection(SCORE.OK, "This file is signed")) res.report_heuristic(PEFile.AL_PEFile_002) except signify.exceptions.SignedPEParseError as e: if e.message == "The PE file does not contain a certificate table.": res.add_section(ResultSection(SCORE.NULL, "No file signature data found")) else: res.add_section(ResultSection(SCORE.NULL, "Unknown exception. Traceback: %s" % traceback.format_exc())) except signify.exceptions.AuthenticodeVerificationError as e: if e.message == "The expected hash does not match the digest in SpcInfo": # This sig has been copied from another program res.add_section(ResultSection(SCORE.HIGH, "The signature does not match the program data")) res.report_heuristic(PEFile.AL_PEFile_001) else: res.add_section(ResultSection(SCORE.NULL, "Unknown authenticode exception. Traceback: %s" % traceback.format_exc())) except signify.exceptions.VerificationError as e: if e.message.startswith("Chain verification from"): # probably self signed res.add_section(ResultSection(SCORE.MED, "File is self-signed")) res.report_heuristic(PEFile.AL_PEFile_003) else: res.add_section( ResultSection(SCORE.NULL, "Unknown exception. Traceback: %s" % traceback.format_exc())) # Now try to get certificate and signature data sig_datas = [] try: sig_datas.extend([x for x in s_data.signed_datas]) except: pass if len(sig_datas) > 0: # Now extract certificate data from the sig for s in sig_datas: # Extract signer info. This is probably the most useful? res.add_tag(TAG_TYPE.CERT_SERIAL_NO, str(s.signer_info.serial_number)) res.add_tag(TAG_TYPE.CERT_ISSUER, s.signer_info.issuer_dn) # Get cert used for signing, then add valid from/to info for cert in [x for x in s.certificates if x.serial_number == s.signer_info.serial_number]: res.add_tag(TAG_TYPE.CERT_SUBJECT, cert.subject_dn) res.add_tag(TAG_TYPE.CERT_VALID_FROM, cert.valid_from.isoformat()) res.add_tag(TAG_TYPE.CERT_VALID_TO, cert.valid_to.isoformat()) for cert in s.certificates: cert_res = ResultSection(SCORE.NULL, "Certificate Information") # x509 CERTIFICATES # ('CERT_VERSION', 230), # ('CERT_SERIAL_NO', 231), # ('CERT_SIGNATURE_ALGO', 232), # ('CERT_ISSUER', 233), # ('CERT_VALID_FROM', 234), # ('CERT_VALID_TO', 235), # ('CERT_SUBJECT', 236), # ('CERT_KEY_USAGE', 237), # ('CERT_EXTENDED_KEY_USAGE', 238), # ('CERT_SUBJECT_ALT_NAME', 239), # ('CERT_THUMBPRINT', 240), # probably not worth doing tags for all this info? cert_res.add_lines(["CERT_VERSION: %d" % cert.version, "CERT_SERIAL_NO: %d" % cert.serial_number, "CERT_ISSUER: %s" % cert.issuer_dn, "CERT_SUBJECT: %s" % cert.subject_dn, "CERT_VALID_FROM: %s" % cert.valid_from.isoformat(), "CERT_VALID_TO: %s" % cert.valid_to.isoformat()]) # cert_res.add_tag(TAG_TYPE.CERT_VERSION, str(cert.version)) # cert_res.add_tag(TAG_TYPE.CERT_SERIAL_NO, str(cert.serial_number)) # cert_res.add_tag(TAG_TYPE.CERT_ISSUER, cert.issuer_dn) # cert_res.add_tag(TAG_TYPE.CERT_VALID_FROM, cert.valid_from.isoformat()) # cert_res.add_tag(TAG_TYPE.CERT_VALID_TO, cert.valid_to.isoformat()) # cert_res.add_tag(TAG_TYPE.CERT_SUBJECT, cert.subject_dn) res.add_section(cert_res)
def get_pe_info(self, lcid): """Dumps the PE header as Results in the FileResult.""" # PE Header pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER") # PE Header: Header Info pe_header_info_res = ResultSection(SCORE.NULL, "[HEADER INFO]", parent=pe_header_res) pe_header_info_res.add_line("Entry point address: 0x%08X" % self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint) pe_header_info_res.add_line("Linker Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion, self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion)) pe_header_info_res.add_line("OS Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion, self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion)) pe_header_info_res.add_line(["Time Date Stamp: %s (" % time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp), res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp), TAG_TYPE['PE_LINK_TIME_STAMP']), ")"]) try: pe_header_info_res.add_line("Machine Type: %s (%s)" % ( hex(self.pe_file.FILE_HEADER.Machine), pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine])) except KeyError: pass # PE Header: Rich Header # noinspection PyBroadException try: if self.pe_file.RICH_HEADER is not None: pe_rich_header_info = ResultSection(SCORE.NULL, "[RICH HEADER INFO]", parent=pe_header_res) values_list = self.pe_file.RICH_HEADER.values pe_rich_header_info.add_line("VC++ tools used:") for i in range(0, len(values_list) / 2): line = "Tool Id: %3d Version: %6d Times used: %3d" % ( values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF, values_list[2 * i + 1]) pe_rich_header_info.add_line(line) except: self.log.exception("Unable to parse PE Rich Header") # PE Header: Data Directories pe_dd_res = ResultSection(SCORE.NULL, "[DATA DIRECTORY]", parent=pe_header_res) for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY: if data_directory.Size or data_directory.VirtualAddress: pe_dd_res.add_line("%s - va: 0x%08X - size: 0x%08X" % (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):], data_directory.VirtualAddress, data_directory.Size)) # PE Header: Sections pe_sec_res = ResultSection(SCORE.NULL, "[SECTIONS]", parent=pe_header_res) self._init_section_list() try: for (sname, section, sec_md5, sec_entropy) in self._sect_list: txt = [sname, " - Virtual: 0x%08X (0x%08X bytes)" " - Physical: 0x%08X (0x%08X bytes) - " % (section.VirtualAddress, section.Misc_VirtualSize, section.PointerToRawData, section.SizeOfRawData), "hash:", res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']), " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy] # add a search tag for the Section Hash make_tag(self.file_res, 'PE_SECTION_HASH', sec_md5, 'HIGH', usage='CORRELATION') pe_sec_res.add_line(txt) except AttributeError: pass self.file_res.add_section(pe_header_res) # debug try: if self.pe_file.DebugTimeDateStamp: pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG") self.file_res.add_section(pe_debug_res) pe_debug_res.add_line("Time Date Stamp: %s" % time.ctime(self.pe_file.DebugTimeDateStamp)) # When it is a unicode, we know we are coming from RSDS which is UTF-8 # otherwise, we come from NB10 and we need to guess the charset. if type(self.pe_file.pdb_filename) != unicode: char_enc_guessed = translate_str(self.pe_file.pdb_filename) pdb_filename = char_enc_guessed['converted'] else: char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'} pdb_filename = self.pe_file.pdb_filename pe_debug_res.add_line(["PDB: '", res_txt_tag_charset(pdb_filename, TAG_TYPE['PE_PDB_FILENAME'], char_enc_guessed['encoding'], char_enc_guessed['confidence']), "'"]) # self.log.debug(u"\tPDB: %s" % pdb_filename) except AttributeError: pass # imports try: if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len(self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0: pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS") self.file_res.add_section(pe_import_res) for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT: pe_import_dll_res = ResultSection(SCORE.NULL, "[%s]" % entry.dll, parent=pe_import_res) first_element = True line = StringIO() for imp in entry.imports: if first_element: first_element = False else: line.write(", ") if imp.name is None: line.write(str(imp.ordinal)) else: line.write(imp.name) pe_import_dll_res.add_line(line.getvalue()) else: pe_import_res = ResultSection(SCORE['NULL'], "PE: NO IMPORTS DETECTED ") self.file_res.add_section(pe_import_res) except AttributeError: pass # exports try: if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None: pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS") self.file_res.add_section(pe_export_res) # noinspection PyBroadException try: pe_export_res.add_line(["Module Name: ", res_txt_tag(safe_str(self.pe_file.ModuleName), TAG_TYPE['PE_EXPORT_MODULE_NAME'])]) except: pass if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0: pe_export_res.add_line("Time Date Stamp: 0") else: pe_export_res.add_line("Time Date Stamp: %s" % time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp)) first_element = True txt = [] for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols: if first_element: first_element = False else: txt.append(", ") txt.append(str(exp.ordinal)) if exp.name is not None: txt.append(": ") txt.append(res_txt_tag(exp.name, TAG_TYPE['PE_EXPORT_FCT_NAME'])) pe_export_res.add_line(txt) except AttributeError: pass # resources try: if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0: pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES") self.file_res.add_section(pe_resource_res) for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if res_entry.name is None: # noinspection PyBroadException try: entry_name = pefile.RESOURCE_TYPE[res_entry.id] except: # pylint: disable-msg=W0702 # unfortunately this code was done before we started to really care about which # exception to catch so, I actually don't really know at this point, would need to try # out :-\ entry_name = "UNKNOWN" else: entry_name = res_entry.name for name_id in res_entry.directory.entries: if name_id.name is None: name_id.name = hex(name_id.id) for language in name_id.directory.entries: try: language_desc = lcid[language.id] except KeyError: language_desc = 'Unknown language' line = [] if res_entry.name is None: line.append(entry_name) else: line.append(res_txt_tag(str(entry_name), TAG_TYPE['PE_RESOURCE_NAME'])) line.append(" " + str(name_id.name) + " ") line.append("0x") # this will add a link to search in AL for the value line.append(res_txt_tag("%04X" % language.id, TAG_TYPE['PE_RESOURCE_LANGUAGE'])) line.append(" (%s)" % language_desc) make_tag(self.file_res, 'PE_RESOURCE_LANGUAGE', language.id, weight='LOW', usage='IDENTIFICATION') # get the size of the resource res_size = language.data.struct.Size line.append(" Size: 0x%x" % res_size) pe_resource_res.add_line(line) except AttributeError: pass # Resources-VersionInfo try: if len(self.pe_file.FileInfo) > 2: pass for file_info in self.pe_file.FileInfo: if file_info.name == "StringFileInfo": if len(file_info.StringTable) > 0: pe_resource_verinfo_res = ResultSection(SCORE['NULL'], "PE: RESOURCES-VersionInfo") self.file_res.add_section(pe_resource_verinfo_res) try: if "LangID" in file_info.StringTable[0].entries: lang_id = file_info.StringTable[0].get("LangID") if not int(lang_id, 16) >> 16 == 0: txt = ('LangId: ' + lang_id + " (" + lcid[ int(lang_id, 16) >> 16] + ")") pe_resource_verinfo_res.add_line(txt) else: txt = ('LangId: ' + lang_id + " (NEUTRAL)") pe_resource_verinfo_res.add_line(txt) except (ValueError, KeyError): txt = ('LangId: %s is invalid' % lang_id) pe_resource_verinfo_res.add_line(txt) for entry in file_info.StringTable[0].entries.items(): txt = ['%s: ' % entry[0]] if entry[0] == 'OriginalFilename': txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_ORIGINAL_FILENAME'])) elif entry[0] == 'FileDescription': txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_FILE_DESCRIPTION'])) else: txt.append(entry[1]) pe_resource_verinfo_res.add_line(txt) except AttributeError: pass # Resources Strings try: BYTE = 1 WORD = 2 DWORD = 4 DS_SETFONT = 0x40 DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD DIALOGEX_ITEM_TRAIL = WORD ITEM_TYPES = {0x80: "BUTTON", 0x81: "EDIT", 0x82: "STATIC", 0x83: "LIST BOX", 0x84: "SCROLL BAR", 0x85: "COMBO BOX"} if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'): for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if dir_type.name is None: if dir_type.id in pefile.RESOURCE_TYPE: dir_type.name = pefile.RESOURCE_TYPE[dir_type.id] for nameID in dir_type.directory.entries: if nameID.name is None: nameID.name = hex(nameID.id) for language in nameID.directory.entries: strings = [] if str(dir_type.name) == "RT_DIALOG": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size] offset = 0 if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \ and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF: # Use Extended Dialog Parsing # Remove leading bytes offset += DIALOGEX_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD # Get window title window_title = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(window_title) != 0: strings.append(("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD # Remove trailing bytes offset += DIALOGEX_TRAIL offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOGEX_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva(data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text item_text = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(item_text) != 0: strings.append((item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset) offset += extra_bytes + DIALOGEX_ITEM_TRAIL # Alignment adjustment if (offset % 4) != 0: offset += WORD else: # TODO: Use Non extended Dialog Parsing # Remove leading bytes style = self.pe_file.get_word_at_rva(data_rva + offset) offset += DIALOG_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # Get window title window_title = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(window_title) != 0: strings.append(("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD if (style & DS_SETFONT) != 0: offset += WORD offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOG_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva(data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += DWORD else: item_text = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(item_text) != 0: strings.append((item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset) offset += extra_bytes + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD elif str(dir_type.name) == "RT_STRING": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size] offset = 0 while True: if offset >= size: break ustr_length = self.pe_file.get_word_from_data(data[offset:offset + 2], 0) offset += 2 if ustr_length == 0: continue ustr = self.pe_file.get_string_u_at_rva(data_rva + offset, max_length=ustr_length) offset += ustr_length * 2 strings.append((None, ustr)) if len(strings) > 0: success = False try: comment = "%s (id:%s - lang_id:0x%04X [%s])" % ( str(dir_type.name), str(nameID.name), language.id, lcid[language.id]) except KeyError: comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % ( str(dir_type.name), str(nameID.name), language.id) res = ResultSection(SCORE['NULL'], "PE: STRINGS - %s" % comment) for idx in xrange(len(strings)): # noinspection PyBroadException try: tag_value = strings[idx][1] # The following line crash chardet if a # UPX packed file as packed the resources... chardet.detect(tag_value) # TODO: Find a better way to do this tag_value = tag_value.replace('\r', ' ').replace('\n', ' ') if strings[idx][0] is not None: res.add_line( [strings[idx][0], ": ", res_txt_tag(tag_value, TAG_TYPE['FILE_STRING'])]) else: res.add_line(res_txt_tag(tag_value, TAG_TYPE['FILE_STRING'])) make_tag(self.file_res, 'FILE_STRING', tag_value, weight='NULL', usage='IDENTIFICATION') success = True except: pass if success: self.file_res.add_section(res) else: pass except AttributeError, e: self.log.debug("\t Error parsing output: " + repr(e))
except Exception, e: print e # print slack space if it exists if (self.print_slack and self.filesize_from_peheader > 0 and ( len(self.pe_file.__data__) > self.filesize_from_peheader)): length_to_display = PEFILE_SLACK_LENGTH_TO_DISPLAY if length_to_display > 0: length_display_str = "" slack_size = len(self.pe_file.__data__) - self.filesize_from_peheader if slack_size > length_to_display: length_display_str = "- displaying first %d bytes" % length_to_display pe_slack_space_res = ResultSection(SCORE['NULL'], "PE: SLACK SPACE (The file contents after the PE file size ends) " "[%d bytes] %s" % ( len(self.pe_file.__data__) - self.filesize_from_peheader, length_display_str), body_format=TEXT_FORMAT['MEMORY_DUMP']) pe_slack_space_res.add_line(hexdump( self.pe_file.__data__[self.filesize_from_peheader:self.filesize_from_peheader + length_to_display])) self.file_res.add_section(pe_slack_space_res) def _init_section_list(self): # Lazy init if self._sect_list is None: self._sect_list = [] try: for section in self.pe_file.sections: zero_idx = section.Name.find(chr(0x0)) if not zero_idx == -1: sname = section.Name[:zero_idx]